Merge trunk version 221103 into gupc branch.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob6df84c1dfc1c63206f8b4efce149bbcbdda57221
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "stringpool.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "except.h"
64 #include "output.h"
65 #include "dbxout.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "diagnostic-core.h"
76 #include "toplev.h"
77 #include "ggc.h"
78 #include "tm_p.h"
79 #include "target.h"
80 #include "target-def.h"
81 #include "common/common-target.h"
82 #include "langhooks.h"
83 #include "reload.h"
84 #include "cfgloop.h"
85 #include "sched-int.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "gimple-iterator.h"
96 #include "gimple-walk.h"
97 #include "intl.h"
98 #include "params.h"
99 #include "tm-constrs.h"
100 #include "ira.h"
101 #include "opts.h"
102 #include "tree-vectorizer.h"
103 #include "dumpfile.h"
104 #include "hash-map.h"
105 #include "plugin-api.h"
106 #include "ipa-ref.h"
107 #include "cgraph.h"
108 #include "target-globals.h"
109 #include "builtins.h"
110 #include "context.h"
111 #include "tree-pass.h"
112 #if TARGET_XCOFF
113 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
114 #endif
115 #if TARGET_MACHO
116 #include "gstab.h" /* for N_SLINE */
117 #endif
119 #ifndef TARGET_NO_PROTOTYPE
120 #define TARGET_NO_PROTOTYPE 0
121 #endif
123 #define min(A,B) ((A) < (B) ? (A) : (B))
124 #define max(A,B) ((A) > (B) ? (A) : (B))
126 /* Structure used to define the rs6000 stack */
127 typedef struct rs6000_stack {
128 int reload_completed; /* stack info won't change from here on */
129 int first_gp_reg_save; /* first callee saved GP register used */
130 int first_fp_reg_save; /* first callee saved FP register used */
131 int first_altivec_reg_save; /* first callee saved AltiVec register used */
132 int lr_save_p; /* true if the link reg needs to be saved */
133 int cr_save_p; /* true if the CR reg needs to be saved */
134 unsigned int vrsave_mask; /* mask of vec registers to save */
135 int push_p; /* true if we need to allocate stack space */
136 int calls_p; /* true if the function makes any calls */
137 int world_save_p; /* true if we're saving *everything*:
138 r13-r31, cr, f14-f31, vrsave, v20-v31 */
139 enum rs6000_abi abi; /* which ABI to use */
140 int gp_save_offset; /* offset to save GP regs from initial SP */
141 int fp_save_offset; /* offset to save FP regs from initial SP */
142 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
143 int lr_save_offset; /* offset to save LR from initial SP */
144 int cr_save_offset; /* offset to save CR from initial SP */
145 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
146 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
147 int varargs_save_offset; /* offset to save the varargs registers */
148 int ehrd_offset; /* offset to EH return data */
149 int ehcr_offset; /* offset to EH CR field data */
150 int reg_size; /* register size (4 or 8) */
151 HOST_WIDE_INT vars_size; /* variable save area size */
152 int parm_size; /* outgoing parameter size */
153 int save_size; /* save area size */
154 int fixed_size; /* fixed size of stack frame */
155 int gp_size; /* size of saved GP registers */
156 int fp_size; /* size of saved FP registers */
157 int altivec_size; /* size of saved AltiVec registers */
158 int cr_size; /* size to hold CR if not in save_size */
159 int vrsave_size; /* size to hold VRSAVE if not in save_size */
160 int altivec_padding_size; /* size of altivec alignment padding if
161 not in save_size */
162 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
163 int spe_padding_size;
164 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
165 int spe_64bit_regs_used;
166 int savres_strategy;
167 } rs6000_stack_t;
169 /* A C structure for machine-specific, per-function data.
170 This is added to the cfun structure. */
171 typedef struct GTY(()) machine_function
173 /* Whether the instruction chain has been scanned already. */
174 int insn_chain_scanned_p;
175 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
176 int ra_needs_full_frame;
177 /* Flags if __builtin_return_address (0) was used. */
178 int ra_need_lr;
179 /* Cache lr_save_p after expansion of builtin_eh_return. */
180 int lr_save_state;
181 /* Whether we need to save the TOC to the reserved stack location in the
182 function prologue. */
183 bool save_toc_in_prologue;
184 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
185 varargs save area. */
186 HOST_WIDE_INT varargs_save_offset;
187 /* Temporary stack slot to use for SDmode copies. This slot is
188 64-bits wide and is allocated early enough so that the offset
189 does not overflow the 16-bit load/store offset field. */
190 rtx sdmode_stack_slot;
191 /* Flag if r2 setup is needed with ELFv2 ABI. */
192 bool r2_setup_needed;
193 } machine_function;
195 /* Support targetm.vectorize.builtin_mask_for_load. */
196 static GTY(()) tree altivec_builtin_mask_for_load;
198 /* Set to nonzero once AIX common-mode calls have been defined. */
199 static GTY(()) int common_mode_defined;
201 /* Label number of label created for -mrelocatable, to call to so we can
202 get the address of the GOT section */
203 static int rs6000_pic_labelno;
205 #ifdef USING_ELFOS_H
206 /* Counter for labels which are to be placed in .fixup. */
207 int fixuplabelno = 0;
208 #endif
210 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
211 int dot_symbols;
213 /* Specify the machine mode that pointers have. After generation of rtl, the
214 compiler makes no further distinction between pointers and any other objects
215 of this machine mode. The type is unsigned since not all things that
216 include rs6000.h also include machmode.h. */
217 unsigned rs6000_pmode;
219 /* Width in bits of a pointer. */
220 unsigned rs6000_pointer_size;
222 #ifdef HAVE_AS_GNU_ATTRIBUTE
223 /* Flag whether floating point values have been passed/returned. */
224 static bool rs6000_passes_float;
225 /* Flag whether vector values have been passed/returned. */
226 static bool rs6000_passes_vector;
227 /* Flag whether small (<= 8 byte) structures have been returned. */
228 static bool rs6000_returns_struct;
229 #endif
231 /* Value is TRUE if register/mode pair is acceptable. */
232 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
234 /* Maximum number of registers needed for a given register class and mode. */
235 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
237 /* How many registers are needed for a given register and mode. */
238 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
240 /* Map register number to register class. */
241 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
243 static int dbg_cost_ctrl;
245 /* Built in types. */
246 tree rs6000_builtin_types[RS6000_BTI_MAX];
247 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
249 /* Flag to say the TOC is initialized */
250 int toc_initialized;
251 char toc_label_name[10];
253 /* Cached value of rs6000_variable_issue. This is cached in
254 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
255 static short cached_can_issue_more;
257 static GTY(()) section *read_only_data_section;
258 static GTY(()) section *private_data_section;
259 static GTY(()) section *tls_data_section;
260 static GTY(()) section *tls_private_data_section;
261 static GTY(()) section *read_only_private_data_section;
262 static GTY(()) section *sdata2_section;
263 static GTY(()) section *toc_section;
265 struct builtin_description
267 const HOST_WIDE_INT mask;
268 const enum insn_code icode;
269 const char *const name;
270 const enum rs6000_builtins code;
273 /* Describe the vector unit used for modes. */
274 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
275 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
277 /* Register classes for various constraints that are based on the target
278 switches. */
279 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
281 /* Describe the alignment of a vector. */
282 int rs6000_vector_align[NUM_MACHINE_MODES];
284 /* Map selected modes to types for builtins. */
285 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
287 /* What modes to automatically generate reciprocal divide estimate (fre) and
288 reciprocal sqrt (frsqrte) for. */
289 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
291 /* Masks to determine which reciprocal esitmate instructions to generate
292 automatically. */
293 enum rs6000_recip_mask {
294 RECIP_SF_DIV = 0x001, /* Use divide estimate */
295 RECIP_DF_DIV = 0x002,
296 RECIP_V4SF_DIV = 0x004,
297 RECIP_V2DF_DIV = 0x008,
299 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
300 RECIP_DF_RSQRT = 0x020,
301 RECIP_V4SF_RSQRT = 0x040,
302 RECIP_V2DF_RSQRT = 0x080,
304 /* Various combination of flags for -mrecip=xxx. */
305 RECIP_NONE = 0,
306 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
307 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
308 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
310 RECIP_HIGH_PRECISION = RECIP_ALL,
312 /* On low precision machines like the power5, don't enable double precision
313 reciprocal square root estimate, since it isn't accurate enough. */
314 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
317 /* -mrecip options. */
318 static struct
320 const char *string; /* option name */
321 unsigned int mask; /* mask bits to set */
322 } recip_options[] = {
323 { "all", RECIP_ALL },
324 { "none", RECIP_NONE },
325 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
326 | RECIP_V2DF_DIV) },
327 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
328 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
329 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
330 | RECIP_V2DF_RSQRT) },
331 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
332 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
335 /* Pointer to function (in rs6000-c.c) that can define or undefine target
336 macros that have changed. Languages that don't support the preprocessor
337 don't link in rs6000-c.c, so we can't call it directly. */
338 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
340 /* Simplfy register classes into simpler classifications. We assume
341 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
342 check for standard register classes (gpr/floating/altivec/vsx) and
343 floating/vector classes (float/altivec/vsx). */
345 enum rs6000_reg_type {
346 NO_REG_TYPE,
347 PSEUDO_REG_TYPE,
348 GPR_REG_TYPE,
349 VSX_REG_TYPE,
350 ALTIVEC_REG_TYPE,
351 FPR_REG_TYPE,
352 SPR_REG_TYPE,
353 CR_REG_TYPE,
354 SPE_ACC_TYPE,
355 SPEFSCR_REG_TYPE
358 /* Map register class to register type. */
359 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
361 /* First/last register type for the 'normal' register types (i.e. general
362 purpose, floating point, altivec, and VSX registers). */
363 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
365 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
368 /* Register classes we care about in secondary reload or go if legitimate
369 address. We only need to worry about GPR, FPR, and Altivec registers here,
370 along an ANY field that is the OR of the 3 register classes. */
372 enum rs6000_reload_reg_type {
373 RELOAD_REG_GPR, /* General purpose registers. */
374 RELOAD_REG_FPR, /* Traditional floating point regs. */
375 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
376 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
377 N_RELOAD_REG
380 /* For setting up register classes, loop through the 3 register classes mapping
381 into real registers, and skip the ANY class, which is just an OR of the
382 bits. */
383 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
384 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
386 /* Map reload register type to a register in the register class. */
387 struct reload_reg_map_type {
388 const char *name; /* Register class name. */
389 int reg; /* Register in the register class. */
392 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
393 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
394 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
395 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
396 { "Any", -1 }, /* RELOAD_REG_ANY. */
399 /* Mask bits for each register class, indexed per mode. Historically the
400 compiler has been more restrictive which types can do PRE_MODIFY instead of
401 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
402 typedef unsigned char addr_mask_type;
404 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
405 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
406 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
407 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
408 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
409 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
410 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
412 /* Register type masks based on the type, of valid addressing modes. */
413 struct rs6000_reg_addr {
414 enum insn_code reload_load; /* INSN to reload for loading. */
415 enum insn_code reload_store; /* INSN to reload for storing. */
416 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
417 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
418 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
419 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
420 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
423 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
425 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
426 static inline bool
427 mode_supports_pre_incdec_p (machine_mode mode)
429 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
430 != 0);
433 /* Helper function to say whether a mode supports PRE_MODIFY. */
434 static inline bool
435 mode_supports_pre_modify_p (machine_mode mode)
437 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
438 != 0);
442 /* Target cpu costs. */
444 struct processor_costs {
445 const int mulsi; /* cost of SImode multiplication. */
446 const int mulsi_const; /* cost of SImode multiplication by constant. */
447 const int mulsi_const9; /* cost of SImode mult by short constant. */
448 const int muldi; /* cost of DImode multiplication. */
449 const int divsi; /* cost of SImode division. */
450 const int divdi; /* cost of DImode division. */
451 const int fp; /* cost of simple SFmode and DFmode insns. */
452 const int dmul; /* cost of DFmode multiplication (and fmadd). */
453 const int sdiv; /* cost of SFmode division (fdivs). */
454 const int ddiv; /* cost of DFmode division (fdiv). */
455 const int cache_line_size; /* cache line size in bytes. */
456 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
457 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
458 const int simultaneous_prefetches; /* number of parallel prefetch
459 operations. */
462 const struct processor_costs *rs6000_cost;
464 /* Processor costs (relative to an add) */
466 /* Instruction size costs on 32bit processors. */
467 static const
468 struct processor_costs size32_cost = {
469 COSTS_N_INSNS (1), /* mulsi */
470 COSTS_N_INSNS (1), /* mulsi_const */
471 COSTS_N_INSNS (1), /* mulsi_const9 */
472 COSTS_N_INSNS (1), /* muldi */
473 COSTS_N_INSNS (1), /* divsi */
474 COSTS_N_INSNS (1), /* divdi */
475 COSTS_N_INSNS (1), /* fp */
476 COSTS_N_INSNS (1), /* dmul */
477 COSTS_N_INSNS (1), /* sdiv */
478 COSTS_N_INSNS (1), /* ddiv */
485 /* Instruction size costs on 64bit processors. */
486 static const
487 struct processor_costs size64_cost = {
488 COSTS_N_INSNS (1), /* mulsi */
489 COSTS_N_INSNS (1), /* mulsi_const */
490 COSTS_N_INSNS (1), /* mulsi_const9 */
491 COSTS_N_INSNS (1), /* muldi */
492 COSTS_N_INSNS (1), /* divsi */
493 COSTS_N_INSNS (1), /* divdi */
494 COSTS_N_INSNS (1), /* fp */
495 COSTS_N_INSNS (1), /* dmul */
496 COSTS_N_INSNS (1), /* sdiv */
497 COSTS_N_INSNS (1), /* ddiv */
498 128,
504 /* Instruction costs on RS64A processors. */
505 static const
506 struct processor_costs rs64a_cost = {
507 COSTS_N_INSNS (20), /* mulsi */
508 COSTS_N_INSNS (12), /* mulsi_const */
509 COSTS_N_INSNS (8), /* mulsi_const9 */
510 COSTS_N_INSNS (34), /* muldi */
511 COSTS_N_INSNS (65), /* divsi */
512 COSTS_N_INSNS (67), /* divdi */
513 COSTS_N_INSNS (4), /* fp */
514 COSTS_N_INSNS (4), /* dmul */
515 COSTS_N_INSNS (31), /* sdiv */
516 COSTS_N_INSNS (31), /* ddiv */
517 128, /* cache line size */
518 128, /* l1 cache */
519 2048, /* l2 cache */
520 1, /* streams */
523 /* Instruction costs on MPCCORE processors. */
524 static const
525 struct processor_costs mpccore_cost = {
526 COSTS_N_INSNS (2), /* mulsi */
527 COSTS_N_INSNS (2), /* mulsi_const */
528 COSTS_N_INSNS (2), /* mulsi_const9 */
529 COSTS_N_INSNS (2), /* muldi */
530 COSTS_N_INSNS (6), /* divsi */
531 COSTS_N_INSNS (6), /* divdi */
532 COSTS_N_INSNS (4), /* fp */
533 COSTS_N_INSNS (5), /* dmul */
534 COSTS_N_INSNS (10), /* sdiv */
535 COSTS_N_INSNS (17), /* ddiv */
536 32, /* cache line size */
537 4, /* l1 cache */
538 16, /* l2 cache */
539 1, /* streams */
542 /* Instruction costs on PPC403 processors. */
543 static const
544 struct processor_costs ppc403_cost = {
545 COSTS_N_INSNS (4), /* mulsi */
546 COSTS_N_INSNS (4), /* mulsi_const */
547 COSTS_N_INSNS (4), /* mulsi_const9 */
548 COSTS_N_INSNS (4), /* muldi */
549 COSTS_N_INSNS (33), /* divsi */
550 COSTS_N_INSNS (33), /* divdi */
551 COSTS_N_INSNS (11), /* fp */
552 COSTS_N_INSNS (11), /* dmul */
553 COSTS_N_INSNS (11), /* sdiv */
554 COSTS_N_INSNS (11), /* ddiv */
555 32, /* cache line size */
556 4, /* l1 cache */
557 16, /* l2 cache */
558 1, /* streams */
561 /* Instruction costs on PPC405 processors. */
562 static const
563 struct processor_costs ppc405_cost = {
564 COSTS_N_INSNS (5), /* mulsi */
565 COSTS_N_INSNS (4), /* mulsi_const */
566 COSTS_N_INSNS (3), /* mulsi_const9 */
567 COSTS_N_INSNS (5), /* muldi */
568 COSTS_N_INSNS (35), /* divsi */
569 COSTS_N_INSNS (35), /* divdi */
570 COSTS_N_INSNS (11), /* fp */
571 COSTS_N_INSNS (11), /* dmul */
572 COSTS_N_INSNS (11), /* sdiv */
573 COSTS_N_INSNS (11), /* ddiv */
574 32, /* cache line size */
575 16, /* l1 cache */
576 128, /* l2 cache */
577 1, /* streams */
580 /* Instruction costs on PPC440 processors. */
581 static const
582 struct processor_costs ppc440_cost = {
583 COSTS_N_INSNS (3), /* mulsi */
584 COSTS_N_INSNS (2), /* mulsi_const */
585 COSTS_N_INSNS (2), /* mulsi_const9 */
586 COSTS_N_INSNS (3), /* muldi */
587 COSTS_N_INSNS (34), /* divsi */
588 COSTS_N_INSNS (34), /* divdi */
589 COSTS_N_INSNS (5), /* fp */
590 COSTS_N_INSNS (5), /* dmul */
591 COSTS_N_INSNS (19), /* sdiv */
592 COSTS_N_INSNS (33), /* ddiv */
593 32, /* cache line size */
594 32, /* l1 cache */
595 256, /* l2 cache */
596 1, /* streams */
599 /* Instruction costs on PPC476 processors. */
600 static const
601 struct processor_costs ppc476_cost = {
602 COSTS_N_INSNS (4), /* mulsi */
603 COSTS_N_INSNS (4), /* mulsi_const */
604 COSTS_N_INSNS (4), /* mulsi_const9 */
605 COSTS_N_INSNS (4), /* muldi */
606 COSTS_N_INSNS (11), /* divsi */
607 COSTS_N_INSNS (11), /* divdi */
608 COSTS_N_INSNS (6), /* fp */
609 COSTS_N_INSNS (6), /* dmul */
610 COSTS_N_INSNS (19), /* sdiv */
611 COSTS_N_INSNS (33), /* ddiv */
612 32, /* l1 cache line size */
613 32, /* l1 cache */
614 512, /* l2 cache */
615 1, /* streams */
618 /* Instruction costs on PPC601 processors. */
619 static const
620 struct processor_costs ppc601_cost = {
621 COSTS_N_INSNS (5), /* mulsi */
622 COSTS_N_INSNS (5), /* mulsi_const */
623 COSTS_N_INSNS (5), /* mulsi_const9 */
624 COSTS_N_INSNS (5), /* muldi */
625 COSTS_N_INSNS (36), /* divsi */
626 COSTS_N_INSNS (36), /* divdi */
627 COSTS_N_INSNS (4), /* fp */
628 COSTS_N_INSNS (5), /* dmul */
629 COSTS_N_INSNS (17), /* sdiv */
630 COSTS_N_INSNS (31), /* ddiv */
631 32, /* cache line size */
632 32, /* l1 cache */
633 256, /* l2 cache */
634 1, /* streams */
637 /* Instruction costs on PPC603 processors. */
638 static const
639 struct processor_costs ppc603_cost = {
640 COSTS_N_INSNS (5), /* mulsi */
641 COSTS_N_INSNS (3), /* mulsi_const */
642 COSTS_N_INSNS (2), /* mulsi_const9 */
643 COSTS_N_INSNS (5), /* muldi */
644 COSTS_N_INSNS (37), /* divsi */
645 COSTS_N_INSNS (37), /* divdi */
646 COSTS_N_INSNS (3), /* fp */
647 COSTS_N_INSNS (4), /* dmul */
648 COSTS_N_INSNS (18), /* sdiv */
649 COSTS_N_INSNS (33), /* ddiv */
650 32, /* cache line size */
651 8, /* l1 cache */
652 64, /* l2 cache */
653 1, /* streams */
656 /* Instruction costs on PPC604 processors. */
657 static const
658 struct processor_costs ppc604_cost = {
659 COSTS_N_INSNS (4), /* mulsi */
660 COSTS_N_INSNS (4), /* mulsi_const */
661 COSTS_N_INSNS (4), /* mulsi_const9 */
662 COSTS_N_INSNS (4), /* muldi */
663 COSTS_N_INSNS (20), /* divsi */
664 COSTS_N_INSNS (20), /* divdi */
665 COSTS_N_INSNS (3), /* fp */
666 COSTS_N_INSNS (3), /* dmul */
667 COSTS_N_INSNS (18), /* sdiv */
668 COSTS_N_INSNS (32), /* ddiv */
669 32, /* cache line size */
670 16, /* l1 cache */
671 512, /* l2 cache */
672 1, /* streams */
675 /* Instruction costs on PPC604e processors. */
676 static const
677 struct processor_costs ppc604e_cost = {
678 COSTS_N_INSNS (2), /* mulsi */
679 COSTS_N_INSNS (2), /* mulsi_const */
680 COSTS_N_INSNS (2), /* mulsi_const9 */
681 COSTS_N_INSNS (2), /* muldi */
682 COSTS_N_INSNS (20), /* divsi */
683 COSTS_N_INSNS (20), /* divdi */
684 COSTS_N_INSNS (3), /* fp */
685 COSTS_N_INSNS (3), /* dmul */
686 COSTS_N_INSNS (18), /* sdiv */
687 COSTS_N_INSNS (32), /* ddiv */
688 32, /* cache line size */
689 32, /* l1 cache */
690 1024, /* l2 cache */
691 1, /* streams */
694 /* Instruction costs on PPC620 processors. */
695 static const
696 struct processor_costs ppc620_cost = {
697 COSTS_N_INSNS (5), /* mulsi */
698 COSTS_N_INSNS (4), /* mulsi_const */
699 COSTS_N_INSNS (3), /* mulsi_const9 */
700 COSTS_N_INSNS (7), /* muldi */
701 COSTS_N_INSNS (21), /* divsi */
702 COSTS_N_INSNS (37), /* divdi */
703 COSTS_N_INSNS (3), /* fp */
704 COSTS_N_INSNS (3), /* dmul */
705 COSTS_N_INSNS (18), /* sdiv */
706 COSTS_N_INSNS (32), /* ddiv */
707 128, /* cache line size */
708 32, /* l1 cache */
709 1024, /* l2 cache */
710 1, /* streams */
713 /* Instruction costs on PPC630 processors. */
714 static const
715 struct processor_costs ppc630_cost = {
716 COSTS_N_INSNS (5), /* mulsi */
717 COSTS_N_INSNS (4), /* mulsi_const */
718 COSTS_N_INSNS (3), /* mulsi_const9 */
719 COSTS_N_INSNS (7), /* muldi */
720 COSTS_N_INSNS (21), /* divsi */
721 COSTS_N_INSNS (37), /* divdi */
722 COSTS_N_INSNS (3), /* fp */
723 COSTS_N_INSNS (3), /* dmul */
724 COSTS_N_INSNS (17), /* sdiv */
725 COSTS_N_INSNS (21), /* ddiv */
726 128, /* cache line size */
727 64, /* l1 cache */
728 1024, /* l2 cache */
729 1, /* streams */
732 /* Instruction costs on Cell processor. */
733 /* COSTS_N_INSNS (1) ~ one add. */
734 static const
735 struct processor_costs ppccell_cost = {
736 COSTS_N_INSNS (9/2)+2, /* mulsi */
737 COSTS_N_INSNS (6/2), /* mulsi_const */
738 COSTS_N_INSNS (6/2), /* mulsi_const9 */
739 COSTS_N_INSNS (15/2)+2, /* muldi */
740 COSTS_N_INSNS (38/2), /* divsi */
741 COSTS_N_INSNS (70/2), /* divdi */
742 COSTS_N_INSNS (10/2), /* fp */
743 COSTS_N_INSNS (10/2), /* dmul */
744 COSTS_N_INSNS (74/2), /* sdiv */
745 COSTS_N_INSNS (74/2), /* ddiv */
746 128, /* cache line size */
747 32, /* l1 cache */
748 512, /* l2 cache */
749 6, /* streams */
752 /* Instruction costs on PPC750 and PPC7400 processors. */
753 static const
754 struct processor_costs ppc750_cost = {
755 COSTS_N_INSNS (5), /* mulsi */
756 COSTS_N_INSNS (3), /* mulsi_const */
757 COSTS_N_INSNS (2), /* mulsi_const9 */
758 COSTS_N_INSNS (5), /* muldi */
759 COSTS_N_INSNS (17), /* divsi */
760 COSTS_N_INSNS (17), /* divdi */
761 COSTS_N_INSNS (3), /* fp */
762 COSTS_N_INSNS (3), /* dmul */
763 COSTS_N_INSNS (17), /* sdiv */
764 COSTS_N_INSNS (31), /* ddiv */
765 32, /* cache line size */
766 32, /* l1 cache */
767 512, /* l2 cache */
768 1, /* streams */
771 /* Instruction costs on PPC7450 processors. */
772 static const
773 struct processor_costs ppc7450_cost = {
774 COSTS_N_INSNS (4), /* mulsi */
775 COSTS_N_INSNS (3), /* mulsi_const */
776 COSTS_N_INSNS (3), /* mulsi_const9 */
777 COSTS_N_INSNS (4), /* muldi */
778 COSTS_N_INSNS (23), /* divsi */
779 COSTS_N_INSNS (23), /* divdi */
780 COSTS_N_INSNS (5), /* fp */
781 COSTS_N_INSNS (5), /* dmul */
782 COSTS_N_INSNS (21), /* sdiv */
783 COSTS_N_INSNS (35), /* ddiv */
784 32, /* cache line size */
785 32, /* l1 cache */
786 1024, /* l2 cache */
787 1, /* streams */
790 /* Instruction costs on PPC8540 processors. */
791 static const
792 struct processor_costs ppc8540_cost = {
793 COSTS_N_INSNS (4), /* mulsi */
794 COSTS_N_INSNS (4), /* mulsi_const */
795 COSTS_N_INSNS (4), /* mulsi_const9 */
796 COSTS_N_INSNS (4), /* muldi */
797 COSTS_N_INSNS (19), /* divsi */
798 COSTS_N_INSNS (19), /* divdi */
799 COSTS_N_INSNS (4), /* fp */
800 COSTS_N_INSNS (4), /* dmul */
801 COSTS_N_INSNS (29), /* sdiv */
802 COSTS_N_INSNS (29), /* ddiv */
803 32, /* cache line size */
804 32, /* l1 cache */
805 256, /* l2 cache */
806 1, /* prefetch streams /*/
809 /* Instruction costs on E300C2 and E300C3 cores. */
810 static const
811 struct processor_costs ppce300c2c3_cost = {
812 COSTS_N_INSNS (4), /* mulsi */
813 COSTS_N_INSNS (4), /* mulsi_const */
814 COSTS_N_INSNS (4), /* mulsi_const9 */
815 COSTS_N_INSNS (4), /* muldi */
816 COSTS_N_INSNS (19), /* divsi */
817 COSTS_N_INSNS (19), /* divdi */
818 COSTS_N_INSNS (3), /* fp */
819 COSTS_N_INSNS (4), /* dmul */
820 COSTS_N_INSNS (18), /* sdiv */
821 COSTS_N_INSNS (33), /* ddiv */
823 16, /* l1 cache */
824 16, /* l2 cache */
825 1, /* prefetch streams /*/
828 /* Instruction costs on PPCE500MC processors. */
829 static const
830 struct processor_costs ppce500mc_cost = {
831 COSTS_N_INSNS (4), /* mulsi */
832 COSTS_N_INSNS (4), /* mulsi_const */
833 COSTS_N_INSNS (4), /* mulsi_const9 */
834 COSTS_N_INSNS (4), /* muldi */
835 COSTS_N_INSNS (14), /* divsi */
836 COSTS_N_INSNS (14), /* divdi */
837 COSTS_N_INSNS (8), /* fp */
838 COSTS_N_INSNS (10), /* dmul */
839 COSTS_N_INSNS (36), /* sdiv */
840 COSTS_N_INSNS (66), /* ddiv */
841 64, /* cache line size */
842 32, /* l1 cache */
843 128, /* l2 cache */
844 1, /* prefetch streams /*/
847 /* Instruction costs on PPCE500MC64 processors. */
848 static const
849 struct processor_costs ppce500mc64_cost = {
850 COSTS_N_INSNS (4), /* mulsi */
851 COSTS_N_INSNS (4), /* mulsi_const */
852 COSTS_N_INSNS (4), /* mulsi_const9 */
853 COSTS_N_INSNS (4), /* muldi */
854 COSTS_N_INSNS (14), /* divsi */
855 COSTS_N_INSNS (14), /* divdi */
856 COSTS_N_INSNS (4), /* fp */
857 COSTS_N_INSNS (10), /* dmul */
858 COSTS_N_INSNS (36), /* sdiv */
859 COSTS_N_INSNS (66), /* ddiv */
860 64, /* cache line size */
861 32, /* l1 cache */
862 128, /* l2 cache */
863 1, /* prefetch streams /*/
866 /* Instruction costs on PPCE5500 processors. */
867 static const
868 struct processor_costs ppce5500_cost = {
869 COSTS_N_INSNS (5), /* mulsi */
870 COSTS_N_INSNS (5), /* mulsi_const */
871 COSTS_N_INSNS (4), /* mulsi_const9 */
872 COSTS_N_INSNS (5), /* muldi */
873 COSTS_N_INSNS (14), /* divsi */
874 COSTS_N_INSNS (14), /* divdi */
875 COSTS_N_INSNS (7), /* fp */
876 COSTS_N_INSNS (10), /* dmul */
877 COSTS_N_INSNS (36), /* sdiv */
878 COSTS_N_INSNS (66), /* ddiv */
879 64, /* cache line size */
880 32, /* l1 cache */
881 128, /* l2 cache */
882 1, /* prefetch streams /*/
885 /* Instruction costs on PPCE6500 processors. */
886 static const
887 struct processor_costs ppce6500_cost = {
888 COSTS_N_INSNS (5), /* mulsi */
889 COSTS_N_INSNS (5), /* mulsi_const */
890 COSTS_N_INSNS (4), /* mulsi_const9 */
891 COSTS_N_INSNS (5), /* muldi */
892 COSTS_N_INSNS (14), /* divsi */
893 COSTS_N_INSNS (14), /* divdi */
894 COSTS_N_INSNS (7), /* fp */
895 COSTS_N_INSNS (10), /* dmul */
896 COSTS_N_INSNS (36), /* sdiv */
897 COSTS_N_INSNS (66), /* ddiv */
898 64, /* cache line size */
899 32, /* l1 cache */
900 128, /* l2 cache */
901 1, /* prefetch streams /*/
904 /* Instruction costs on AppliedMicro Titan processors. */
905 static const
906 struct processor_costs titan_cost = {
907 COSTS_N_INSNS (5), /* mulsi */
908 COSTS_N_INSNS (5), /* mulsi_const */
909 COSTS_N_INSNS (5), /* mulsi_const9 */
910 COSTS_N_INSNS (5), /* muldi */
911 COSTS_N_INSNS (18), /* divsi */
912 COSTS_N_INSNS (18), /* divdi */
913 COSTS_N_INSNS (10), /* fp */
914 COSTS_N_INSNS (10), /* dmul */
915 COSTS_N_INSNS (46), /* sdiv */
916 COSTS_N_INSNS (72), /* ddiv */
917 32, /* cache line size */
918 32, /* l1 cache */
919 512, /* l2 cache */
920 1, /* prefetch streams /*/
923 /* Instruction costs on POWER4 and POWER5 processors. */
924 static const
925 struct processor_costs power4_cost = {
926 COSTS_N_INSNS (3), /* mulsi */
927 COSTS_N_INSNS (2), /* mulsi_const */
928 COSTS_N_INSNS (2), /* mulsi_const9 */
929 COSTS_N_INSNS (4), /* muldi */
930 COSTS_N_INSNS (18), /* divsi */
931 COSTS_N_INSNS (34), /* divdi */
932 COSTS_N_INSNS (3), /* fp */
933 COSTS_N_INSNS (3), /* dmul */
934 COSTS_N_INSNS (17), /* sdiv */
935 COSTS_N_INSNS (17), /* ddiv */
936 128, /* cache line size */
937 32, /* l1 cache */
938 1024, /* l2 cache */
939 8, /* prefetch streams /*/
942 /* Instruction costs on POWER6 processors. */
943 static const
944 struct processor_costs power6_cost = {
945 COSTS_N_INSNS (8), /* mulsi */
946 COSTS_N_INSNS (8), /* mulsi_const */
947 COSTS_N_INSNS (8), /* mulsi_const9 */
948 COSTS_N_INSNS (8), /* muldi */
949 COSTS_N_INSNS (22), /* divsi */
950 COSTS_N_INSNS (28), /* divdi */
951 COSTS_N_INSNS (3), /* fp */
952 COSTS_N_INSNS (3), /* dmul */
953 COSTS_N_INSNS (13), /* sdiv */
954 COSTS_N_INSNS (16), /* ddiv */
955 128, /* cache line size */
956 64, /* l1 cache */
957 2048, /* l2 cache */
958 16, /* prefetch streams */
961 /* Instruction costs on POWER7 processors. */
962 static const
963 struct processor_costs power7_cost = {
964 COSTS_N_INSNS (2), /* mulsi */
965 COSTS_N_INSNS (2), /* mulsi_const */
966 COSTS_N_INSNS (2), /* mulsi_const9 */
967 COSTS_N_INSNS (2), /* muldi */
968 COSTS_N_INSNS (18), /* divsi */
969 COSTS_N_INSNS (34), /* divdi */
970 COSTS_N_INSNS (3), /* fp */
971 COSTS_N_INSNS (3), /* dmul */
972 COSTS_N_INSNS (13), /* sdiv */
973 COSTS_N_INSNS (16), /* ddiv */
974 128, /* cache line size */
975 32, /* l1 cache */
976 256, /* l2 cache */
977 12, /* prefetch streams */
980 /* Instruction costs on POWER8 processors. */
981 static const
982 struct processor_costs power8_cost = {
983 COSTS_N_INSNS (3), /* mulsi */
984 COSTS_N_INSNS (3), /* mulsi_const */
985 COSTS_N_INSNS (3), /* mulsi_const9 */
986 COSTS_N_INSNS (3), /* muldi */
987 COSTS_N_INSNS (19), /* divsi */
988 COSTS_N_INSNS (35), /* divdi */
989 COSTS_N_INSNS (3), /* fp */
990 COSTS_N_INSNS (3), /* dmul */
991 COSTS_N_INSNS (14), /* sdiv */
992 COSTS_N_INSNS (17), /* ddiv */
993 128, /* cache line size */
994 32, /* l1 cache */
995 256, /* l2 cache */
996 12, /* prefetch streams */
999 /* Instruction costs on POWER A2 processors. */
1000 static const
1001 struct processor_costs ppca2_cost = {
1002 COSTS_N_INSNS (16), /* mulsi */
1003 COSTS_N_INSNS (16), /* mulsi_const */
1004 COSTS_N_INSNS (16), /* mulsi_const9 */
1005 COSTS_N_INSNS (16), /* muldi */
1006 COSTS_N_INSNS (22), /* divsi */
1007 COSTS_N_INSNS (28), /* divdi */
1008 COSTS_N_INSNS (3), /* fp */
1009 COSTS_N_INSNS (3), /* dmul */
1010 COSTS_N_INSNS (59), /* sdiv */
1011 COSTS_N_INSNS (72), /* ddiv */
1013 16, /* l1 cache */
1014 2048, /* l2 cache */
1015 16, /* prefetch streams */
1019 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1020 #undef RS6000_BUILTIN_1
1021 #undef RS6000_BUILTIN_2
1022 #undef RS6000_BUILTIN_3
1023 #undef RS6000_BUILTIN_A
1024 #undef RS6000_BUILTIN_D
1025 #undef RS6000_BUILTIN_E
1026 #undef RS6000_BUILTIN_H
1027 #undef RS6000_BUILTIN_P
1028 #undef RS6000_BUILTIN_Q
1029 #undef RS6000_BUILTIN_S
1030 #undef RS6000_BUILTIN_X
1032 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1033 { NAME, ICODE, MASK, ATTR },
1035 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1036 { NAME, ICODE, MASK, ATTR },
1038 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1039 { NAME, ICODE, MASK, ATTR },
1041 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1042 { NAME, ICODE, MASK, ATTR },
1044 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1045 { NAME, ICODE, MASK, ATTR },
1047 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1048 { NAME, ICODE, MASK, ATTR },
1050 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1051 { NAME, ICODE, MASK, ATTR },
1053 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1054 { NAME, ICODE, MASK, ATTR },
1056 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1057 { NAME, ICODE, MASK, ATTR },
1059 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1060 { NAME, ICODE, MASK, ATTR },
1062 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1063 { NAME, ICODE, MASK, ATTR },
1065 struct rs6000_builtin_info_type {
1066 const char *name;
1067 const enum insn_code icode;
1068 const HOST_WIDE_INT mask;
1069 const unsigned attr;
1072 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1074 #include "rs6000-builtin.def"
1077 #undef RS6000_BUILTIN_1
1078 #undef RS6000_BUILTIN_2
1079 #undef RS6000_BUILTIN_3
1080 #undef RS6000_BUILTIN_A
1081 #undef RS6000_BUILTIN_D
1082 #undef RS6000_BUILTIN_E
1083 #undef RS6000_BUILTIN_H
1084 #undef RS6000_BUILTIN_P
1085 #undef RS6000_BUILTIN_Q
1086 #undef RS6000_BUILTIN_S
1087 #undef RS6000_BUILTIN_X
1089 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1090 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1093 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1094 static bool spe_func_has_64bit_regs_p (void);
1095 static struct machine_function * rs6000_init_machine_status (void);
1096 static int rs6000_ra_ever_killed (void);
1097 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1098 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1099 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1100 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1101 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1102 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1103 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1104 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1105 bool);
1106 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1107 static bool is_microcoded_insn (rtx_insn *);
1108 static bool is_nonpipeline_insn (rtx_insn *);
1109 static bool is_cracked_insn (rtx_insn *);
1110 static bool is_load_insn (rtx, rtx *);
1111 static bool is_store_insn (rtx, rtx *);
1112 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1113 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1114 static bool insn_must_be_first_in_group (rtx_insn *);
1115 static bool insn_must_be_last_in_group (rtx_insn *);
1116 static void altivec_init_builtins (void);
1117 static tree builtin_function_type (machine_mode, machine_mode,
1118 machine_mode, machine_mode,
1119 enum rs6000_builtins, const char *name);
1120 static void rs6000_common_init_builtins (void);
1121 static void paired_init_builtins (void);
1122 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1123 static void spe_init_builtins (void);
1124 static void htm_init_builtins (void);
1125 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1126 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1127 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1128 static rs6000_stack_t *rs6000_stack_info (void);
1129 static void is_altivec_return_reg (rtx, void *);
1130 int easy_vector_constant (rtx, machine_mode);
1131 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1132 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1133 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1134 bool, bool);
1135 #if TARGET_MACHO
1136 static void macho_branch_islands (void);
1137 #endif
1138 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1139 int, int *);
1140 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1141 int, int, int *);
1142 static bool rs6000_mode_dependent_address (const_rtx);
1143 static bool rs6000_debug_mode_dependent_address (const_rtx);
1144 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1145 machine_mode, rtx);
1146 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1147 machine_mode,
1148 rtx);
1149 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1150 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1151 enum reg_class);
1152 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1153 machine_mode);
1154 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1155 enum reg_class,
1156 machine_mode);
1157 static bool rs6000_cannot_change_mode_class (machine_mode,
1158 machine_mode,
1159 enum reg_class);
1160 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1161 machine_mode,
1162 enum reg_class);
1163 static bool rs6000_save_toc_in_prologue_p (void);
1165 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1166 int, int *)
1167 = rs6000_legitimize_reload_address;
1169 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1170 = rs6000_mode_dependent_address;
1172 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1173 machine_mode, rtx)
1174 = rs6000_secondary_reload_class;
1176 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1177 = rs6000_preferred_reload_class;
1179 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1180 machine_mode)
1181 = rs6000_secondary_memory_needed;
1183 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1184 machine_mode,
1185 enum reg_class)
1186 = rs6000_cannot_change_mode_class;
1188 const int INSN_NOT_AVAILABLE = -1;
1190 static void rs6000_print_isa_options (FILE *, int, const char *,
1191 HOST_WIDE_INT);
1192 static void rs6000_print_builtin_options (FILE *, int, const char *,
1193 HOST_WIDE_INT);
1195 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1196 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1197 enum rs6000_reg_type,
1198 machine_mode,
1199 secondary_reload_info *,
1200 bool);
1201 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1203 /* Hash table stuff for keeping track of TOC entries. */
1205 struct GTY((for_user)) toc_hash_struct
1207 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1208 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1209 rtx key;
1210 machine_mode key_mode;
1211 int labelno;
1214 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1216 static hashval_t hash (toc_hash_struct *);
1217 static bool equal (toc_hash_struct *, toc_hash_struct *);
1220 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1222 /* Hash table to keep track of the argument types for builtin functions. */
1224 struct GTY((for_user)) builtin_hash_struct
1226 tree type;
1227 machine_mode mode[4]; /* return value + 3 arguments. */
1228 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1231 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1233 static hashval_t hash (builtin_hash_struct *);
1234 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1237 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1240 /* Default register names. */
1241 char rs6000_reg_names[][8] =
1243 "0", "1", "2", "3", "4", "5", "6", "7",
1244 "8", "9", "10", "11", "12", "13", "14", "15",
1245 "16", "17", "18", "19", "20", "21", "22", "23",
1246 "24", "25", "26", "27", "28", "29", "30", "31",
1247 "0", "1", "2", "3", "4", "5", "6", "7",
1248 "8", "9", "10", "11", "12", "13", "14", "15",
1249 "16", "17", "18", "19", "20", "21", "22", "23",
1250 "24", "25", "26", "27", "28", "29", "30", "31",
1251 "mq", "lr", "ctr","ap",
1252 "0", "1", "2", "3", "4", "5", "6", "7",
1253 "ca",
1254 /* AltiVec registers. */
1255 "0", "1", "2", "3", "4", "5", "6", "7",
1256 "8", "9", "10", "11", "12", "13", "14", "15",
1257 "16", "17", "18", "19", "20", "21", "22", "23",
1258 "24", "25", "26", "27", "28", "29", "30", "31",
1259 "vrsave", "vscr",
1260 /* SPE registers. */
1261 "spe_acc", "spefscr",
1262 /* Soft frame pointer. */
1263 "sfp",
1264 /* HTM SPR registers. */
1265 "tfhar", "tfiar", "texasr",
1266 /* SPE High registers. */
1267 "0", "1", "2", "3", "4", "5", "6", "7",
1268 "8", "9", "10", "11", "12", "13", "14", "15",
1269 "16", "17", "18", "19", "20", "21", "22", "23",
1270 "24", "25", "26", "27", "28", "29", "30", "31"
1273 #ifdef TARGET_REGNAMES
1274 static const char alt_reg_names[][8] =
1276 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1277 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1278 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1279 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1280 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1281 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1282 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1283 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1284 "mq", "lr", "ctr", "ap",
1285 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1286 "ca",
1287 /* AltiVec registers. */
1288 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1289 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1290 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1291 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1292 "vrsave", "vscr",
1293 /* SPE registers. */
1294 "spe_acc", "spefscr",
1295 /* Soft frame pointer. */
1296 "sfp",
1297 /* HTM SPR registers. */
1298 "tfhar", "tfiar", "texasr",
1299 /* SPE High registers. */
1300 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1301 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1302 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1303 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1305 #endif
1307 /* Table of valid machine attributes. */
1309 static const struct attribute_spec rs6000_attribute_table[] =
1311 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1312 affects_type_identity } */
1313 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1314 false },
1315 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1316 false },
1317 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1318 false },
1319 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1320 false },
1321 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1322 false },
1323 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1324 SUBTARGET_ATTRIBUTE_TABLE,
1325 #endif
1326 { NULL, 0, 0, false, false, false, NULL, false }
1329 #ifndef TARGET_PROFILE_KERNEL
1330 #define TARGET_PROFILE_KERNEL 0
1331 #endif
1333 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1334 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1336 /* Initialize the GCC target structure. */
1337 #undef TARGET_ATTRIBUTE_TABLE
1338 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1339 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1340 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1341 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1342 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1344 #undef TARGET_ASM_ALIGNED_DI_OP
1345 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1347 /* Default unaligned ops are only provided for ELF. Find the ops needed
1348 for non-ELF systems. */
1349 #ifndef OBJECT_FORMAT_ELF
1350 #if TARGET_XCOFF
1351 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1352 64-bit targets. */
1353 #undef TARGET_ASM_UNALIGNED_HI_OP
1354 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1355 #undef TARGET_ASM_UNALIGNED_SI_OP
1356 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1357 #undef TARGET_ASM_UNALIGNED_DI_OP
1358 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1359 #else
1360 /* For Darwin. */
1361 #undef TARGET_ASM_UNALIGNED_HI_OP
1362 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1363 #undef TARGET_ASM_UNALIGNED_SI_OP
1364 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1365 #undef TARGET_ASM_UNALIGNED_DI_OP
1366 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1367 #undef TARGET_ASM_ALIGNED_DI_OP
1368 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1369 #endif
1370 #endif
1372 /* This hook deals with fixups for relocatable code and DI-mode objects
1373 in 64-bit code. */
1374 #undef TARGET_ASM_INTEGER
1375 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1377 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1378 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1379 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1380 #endif
1382 #undef TARGET_SET_UP_BY_PROLOGUE
1383 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1385 #undef TARGET_HAVE_TLS
1386 #define TARGET_HAVE_TLS HAVE_AS_TLS
1388 #undef TARGET_CANNOT_FORCE_CONST_MEM
1389 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1391 #undef TARGET_DELEGITIMIZE_ADDRESS
1392 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1394 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1395 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1397 #undef TARGET_ASM_FUNCTION_PROLOGUE
1398 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1399 #undef TARGET_ASM_FUNCTION_EPILOGUE
1400 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1405 #undef TARGET_LEGITIMIZE_ADDRESS
1406 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1408 #undef TARGET_SCHED_VARIABLE_ISSUE
1409 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1411 #undef TARGET_SCHED_ISSUE_RATE
1412 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1413 #undef TARGET_SCHED_ADJUST_COST
1414 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1415 #undef TARGET_SCHED_ADJUST_PRIORITY
1416 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1417 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1418 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1419 #undef TARGET_SCHED_INIT
1420 #define TARGET_SCHED_INIT rs6000_sched_init
1421 #undef TARGET_SCHED_FINISH
1422 #define TARGET_SCHED_FINISH rs6000_sched_finish
1423 #undef TARGET_SCHED_REORDER
1424 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1425 #undef TARGET_SCHED_REORDER2
1426 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1428 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1429 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1431 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1432 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1434 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1435 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1436 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1437 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1438 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1439 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1440 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1441 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1443 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1444 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1445 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1446 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1447 rs6000_builtin_support_vector_misalignment
1448 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1449 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1450 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1451 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1452 rs6000_builtin_vectorization_cost
1453 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1454 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1455 rs6000_preferred_simd_mode
1456 #undef TARGET_VECTORIZE_INIT_COST
1457 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1458 #undef TARGET_VECTORIZE_ADD_STMT_COST
1459 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1460 #undef TARGET_VECTORIZE_FINISH_COST
1461 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1462 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1463 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1465 #undef TARGET_INIT_BUILTINS
1466 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1467 #undef TARGET_BUILTIN_DECL
1468 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1470 #undef TARGET_EXPAND_BUILTIN
1471 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1473 #undef TARGET_MANGLE_TYPE
1474 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1476 #undef TARGET_INIT_LIBFUNCS
1477 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1479 #if TARGET_MACHO
1480 #undef TARGET_BINDS_LOCAL_P
1481 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1482 #endif
1484 #undef TARGET_MS_BITFIELD_LAYOUT_P
1485 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1487 #undef TARGET_ASM_OUTPUT_MI_THUNK
1488 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1490 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1491 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1493 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1494 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1496 #undef TARGET_REGISTER_MOVE_COST
1497 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1498 #undef TARGET_MEMORY_MOVE_COST
1499 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1500 #undef TARGET_RTX_COSTS
1501 #define TARGET_RTX_COSTS rs6000_rtx_costs
1502 #undef TARGET_ADDRESS_COST
1503 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1505 #undef TARGET_DWARF_REGISTER_SPAN
1506 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1508 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1509 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1511 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1512 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1514 #undef TARGET_PROMOTE_FUNCTION_MODE
1515 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1517 #undef TARGET_RETURN_IN_MEMORY
1518 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1520 #undef TARGET_RETURN_IN_MSB
1521 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1523 #undef TARGET_SETUP_INCOMING_VARARGS
1524 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1526 /* Always strict argument naming on rs6000. */
1527 #undef TARGET_STRICT_ARGUMENT_NAMING
1528 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1529 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1530 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1531 #undef TARGET_SPLIT_COMPLEX_ARG
1532 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1533 #undef TARGET_MUST_PASS_IN_STACK
1534 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1535 #undef TARGET_PASS_BY_REFERENCE
1536 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1537 #undef TARGET_ARG_PARTIAL_BYTES
1538 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1539 #undef TARGET_FUNCTION_ARG_ADVANCE
1540 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1541 #undef TARGET_FUNCTION_ARG
1542 #define TARGET_FUNCTION_ARG rs6000_function_arg
1543 #undef TARGET_FUNCTION_ARG_BOUNDARY
1544 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1546 #undef TARGET_BUILD_BUILTIN_VA_LIST
1547 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1549 #undef TARGET_EXPAND_BUILTIN_VA_START
1550 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1553 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1555 #undef TARGET_EH_RETURN_FILTER_MODE
1556 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1558 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1559 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1561 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1562 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1564 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1565 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1567 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1568 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1570 #undef TARGET_MD_ASM_CLOBBERS
1571 #define TARGET_MD_ASM_CLOBBERS rs6000_md_asm_clobbers
1573 #undef TARGET_OPTION_OVERRIDE
1574 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1576 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1577 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1578 rs6000_builtin_vectorized_function
1580 #if !TARGET_MACHO
1581 #undef TARGET_STACK_PROTECT_FAIL
1582 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1583 #endif
1585 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1586 The PowerPC architecture requires only weak consistency among
1587 processors--that is, memory accesses between processors need not be
1588 sequentially consistent and memory accesses among processors can occur
1589 in any order. The ability to order memory accesses weakly provides
1590 opportunities for more efficient use of the system bus. Unless a
1591 dependency exists, the 604e allows read operations to precede store
1592 operations. */
1593 #undef TARGET_RELAXED_ORDERING
1594 #define TARGET_RELAXED_ORDERING true
1596 #ifdef HAVE_AS_TLS
1597 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1598 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1599 #endif
1601 /* Use a 32-bit anchor range. This leads to sequences like:
1603 addis tmp,anchor,high
1604 add dest,tmp,low
1606 where tmp itself acts as an anchor, and can be shared between
1607 accesses to the same 64k page. */
1608 #undef TARGET_MIN_ANCHOR_OFFSET
1609 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1610 #undef TARGET_MAX_ANCHOR_OFFSET
1611 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1612 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1613 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1614 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1615 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1617 #undef TARGET_BUILTIN_RECIPROCAL
1618 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1620 #undef TARGET_EXPAND_TO_RTL_HOOK
1621 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1623 #undef TARGET_INSTANTIATE_DECLS
1624 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1626 #undef TARGET_SECONDARY_RELOAD
1627 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1629 #undef TARGET_LEGITIMATE_ADDRESS_P
1630 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1632 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1633 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1635 #undef TARGET_LRA_P
1636 #define TARGET_LRA_P rs6000_lra_p
1638 #undef TARGET_CAN_ELIMINATE
1639 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1641 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1642 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1644 #undef TARGET_TRAMPOLINE_INIT
1645 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1647 #undef TARGET_FUNCTION_VALUE
1648 #define TARGET_FUNCTION_VALUE rs6000_function_value
1650 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1651 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1653 #undef TARGET_OPTION_SAVE
1654 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1656 #undef TARGET_OPTION_RESTORE
1657 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1659 #undef TARGET_OPTION_PRINT
1660 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1662 #undef TARGET_CAN_INLINE_P
1663 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1665 #undef TARGET_SET_CURRENT_FUNCTION
1666 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1668 #undef TARGET_LEGITIMATE_CONSTANT_P
1669 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1671 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1672 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1674 #undef TARGET_CAN_USE_DOLOOP_P
1675 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1677 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1678 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1680 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1681 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1682 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1683 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1684 #undef TARGET_UNWIND_WORD_MODE
1685 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1688 /* Processor table. */
1689 struct rs6000_ptt
1691 const char *const name; /* Canonical processor name. */
1692 const enum processor_type processor; /* Processor type enum value. */
1693 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1696 static struct rs6000_ptt const processor_target_table[] =
1698 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1699 #include "rs6000-cpus.def"
1700 #undef RS6000_CPU
1703 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1704 name is invalid. */
1706 static int
1707 rs6000_cpu_name_lookup (const char *name)
1709 size_t i;
1711 if (name != NULL)
1713 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1714 if (! strcmp (name, processor_target_table[i].name))
1715 return (int)i;
1718 return -1;
1722 /* Return number of consecutive hard regs needed starting at reg REGNO
1723 to hold something of mode MODE.
1724 This is ordinarily the length in words of a value of mode MODE
1725 but can be less for certain modes in special long registers.
1727 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1728 scalar instructions. The upper 32 bits are only available to the
1729 SIMD instructions.
1731 POWER and PowerPC GPRs hold 32 bits worth;
1732 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1734 static int
1735 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1737 unsigned HOST_WIDE_INT reg_size;
1739 /* TF/TD modes are special in that they always take 2 registers. */
1740 if (FP_REGNO_P (regno))
1741 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1742 ? UNITS_PER_VSX_WORD
1743 : UNITS_PER_FP_WORD);
1745 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1746 reg_size = UNITS_PER_SPE_WORD;
1748 else if (ALTIVEC_REGNO_P (regno))
1749 reg_size = UNITS_PER_ALTIVEC_WORD;
1751 /* The value returned for SCmode in the E500 double case is 2 for
1752 ABI compatibility; storing an SCmode value in a single register
1753 would require function_arg and rs6000_spe_function_arg to handle
1754 SCmode so as to pass the value correctly in a pair of
1755 registers. */
1756 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1757 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1758 reg_size = UNITS_PER_FP_WORD;
1760 else
1761 reg_size = UNITS_PER_WORD;
1763 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1766 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1767 MODE. */
1768 static int
1769 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1771 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1773 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1774 register combinations, and use PTImode where we need to deal with quad
1775 word memory operations. Don't allow quad words in the argument or frame
1776 pointer registers, just registers 0..31. */
1777 if (mode == PTImode)
1778 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1779 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1780 && ((regno & 1) == 0));
1782 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1783 implementations. Don't allow an item to be split between a FP register
1784 and an Altivec register. Allow TImode in all VSX registers if the user
1785 asked for it. */
1786 if (TARGET_VSX && VSX_REGNO_P (regno)
1787 && (VECTOR_MEM_VSX_P (mode)
1788 || reg_addr[mode].scalar_in_vmx_p
1789 || (TARGET_VSX_TIMODE && mode == TImode)
1790 || (TARGET_VADDUQM && mode == V1TImode)))
1792 if (FP_REGNO_P (regno))
1793 return FP_REGNO_P (last_regno);
1795 if (ALTIVEC_REGNO_P (regno))
1797 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1798 return 0;
1800 return ALTIVEC_REGNO_P (last_regno);
1804 /* The GPRs can hold any mode, but values bigger than one register
1805 cannot go past R31. */
1806 if (INT_REGNO_P (regno))
1807 return INT_REGNO_P (last_regno);
1809 /* The float registers (except for VSX vector modes) can only hold floating
1810 modes and DImode. */
1811 if (FP_REGNO_P (regno))
1813 if (SCALAR_FLOAT_MODE_P (mode)
1814 && (mode != TDmode || (regno % 2) == 0)
1815 && FP_REGNO_P (last_regno))
1816 return 1;
1818 if (GET_MODE_CLASS (mode) == MODE_INT
1819 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1820 return 1;
1822 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1823 && PAIRED_VECTOR_MODE (mode))
1824 return 1;
1826 return 0;
1829 /* The CR register can only hold CC modes. */
1830 if (CR_REGNO_P (regno))
1831 return GET_MODE_CLASS (mode) == MODE_CC;
1833 if (CA_REGNO_P (regno))
1834 return mode == Pmode || mode == SImode;
1836 /* AltiVec only in AldyVec registers. */
1837 if (ALTIVEC_REGNO_P (regno))
1838 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1839 || mode == V1TImode);
1841 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1842 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1843 return 1;
1845 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1846 and it must be able to fit within the register set. */
1848 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1851 /* Print interesting facts about registers. */
1852 static void
1853 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1855 int r, m;
1857 for (r = first_regno; r <= last_regno; ++r)
1859 const char *comma = "";
1860 int len;
1862 if (first_regno == last_regno)
1863 fprintf (stderr, "%s:\t", reg_name);
1864 else
1865 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1867 len = 8;
1868 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1869 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1871 if (len > 70)
1873 fprintf (stderr, ",\n\t");
1874 len = 8;
1875 comma = "";
1878 if (rs6000_hard_regno_nregs[m][r] > 1)
1879 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1880 rs6000_hard_regno_nregs[m][r]);
1881 else
1882 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1884 comma = ", ";
1887 if (call_used_regs[r])
1889 if (len > 70)
1891 fprintf (stderr, ",\n\t");
1892 len = 8;
1893 comma = "";
1896 len += fprintf (stderr, "%s%s", comma, "call-used");
1897 comma = ", ";
1900 if (fixed_regs[r])
1902 if (len > 70)
1904 fprintf (stderr, ",\n\t");
1905 len = 8;
1906 comma = "";
1909 len += fprintf (stderr, "%s%s", comma, "fixed");
1910 comma = ", ";
1913 if (len > 70)
1915 fprintf (stderr, ",\n\t");
1916 comma = "";
1919 len += fprintf (stderr, "%sreg-class = %s", comma,
1920 reg_class_names[(int)rs6000_regno_regclass[r]]);
1921 comma = ", ";
1923 if (len > 70)
1925 fprintf (stderr, ",\n\t");
1926 comma = "";
1929 fprintf (stderr, "%sregno = %d\n", comma, r);
1933 static const char *
1934 rs6000_debug_vector_unit (enum rs6000_vector v)
1936 const char *ret;
1938 switch (v)
1940 case VECTOR_NONE: ret = "none"; break;
1941 case VECTOR_ALTIVEC: ret = "altivec"; break;
1942 case VECTOR_VSX: ret = "vsx"; break;
1943 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1944 case VECTOR_PAIRED: ret = "paired"; break;
1945 case VECTOR_SPE: ret = "spe"; break;
1946 case VECTOR_OTHER: ret = "other"; break;
1947 default: ret = "unknown"; break;
1950 return ret;
1953 /* Inner function printing just the address mask for a particular reload
1954 register class. */
1955 DEBUG_FUNCTION char *
1956 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1958 static char ret[8];
1959 char *p = ret;
1961 if ((mask & RELOAD_REG_VALID) != 0)
1962 *p++ = 'v';
1963 else if (keep_spaces)
1964 *p++ = ' ';
1966 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1967 *p++ = 'm';
1968 else if (keep_spaces)
1969 *p++ = ' ';
1971 if ((mask & RELOAD_REG_INDEXED) != 0)
1972 *p++ = 'i';
1973 else if (keep_spaces)
1974 *p++ = ' ';
1976 if ((mask & RELOAD_REG_OFFSET) != 0)
1977 *p++ = 'o';
1978 else if (keep_spaces)
1979 *p++ = ' ';
1981 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
1982 *p++ = '+';
1983 else if (keep_spaces)
1984 *p++ = ' ';
1986 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
1987 *p++ = '+';
1988 else if (keep_spaces)
1989 *p++ = ' ';
1991 if ((mask & RELOAD_REG_AND_M16) != 0)
1992 *p++ = '&';
1993 else if (keep_spaces)
1994 *p++ = ' ';
1996 *p = '\0';
1998 return ret;
2001 /* Print the address masks in a human readble fashion. */
2002 DEBUG_FUNCTION void
2003 rs6000_debug_print_mode (ssize_t m)
2005 ssize_t rc;
2007 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2008 for (rc = 0; rc < N_RELOAD_REG; rc++)
2009 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2010 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2012 if (rs6000_vector_unit[m] != VECTOR_NONE
2013 || rs6000_vector_mem[m] != VECTOR_NONE
2014 || (reg_addr[m].reload_store != CODE_FOR_nothing)
2015 || (reg_addr[m].reload_load != CODE_FOR_nothing)
2016 || reg_addr[m].scalar_in_vmx_p)
2018 fprintf (stderr,
2019 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
2020 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2021 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2022 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2023 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2024 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2027 fputs ("\n", stderr);
2030 #define DEBUG_FMT_ID "%-32s= "
2031 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2032 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2033 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2035 /* Print various interesting information with -mdebug=reg. */
2036 static void
2037 rs6000_debug_reg_global (void)
2039 static const char *const tf[2] = { "false", "true" };
2040 const char *nl = (const char *)0;
2041 int m;
2042 size_t m1, m2, v;
2043 char costly_num[20];
2044 char nop_num[20];
2045 char flags_buffer[40];
2046 const char *costly_str;
2047 const char *nop_str;
2048 const char *trace_str;
2049 const char *abi_str;
2050 const char *cmodel_str;
2051 struct cl_target_option cl_opts;
2053 /* Modes we want tieable information on. */
2054 static const machine_mode print_tieable_modes[] = {
2055 QImode,
2056 HImode,
2057 SImode,
2058 DImode,
2059 TImode,
2060 PTImode,
2061 SFmode,
2062 DFmode,
2063 TFmode,
2064 SDmode,
2065 DDmode,
2066 TDmode,
2067 V8QImode,
2068 V4HImode,
2069 V2SImode,
2070 V16QImode,
2071 V8HImode,
2072 V4SImode,
2073 V2DImode,
2074 V1TImode,
2075 V32QImode,
2076 V16HImode,
2077 V8SImode,
2078 V4DImode,
2079 V2TImode,
2080 V2SFmode,
2081 V4SFmode,
2082 V2DFmode,
2083 V8SFmode,
2084 V4DFmode,
2085 CCmode,
2086 CCUNSmode,
2087 CCEQmode,
2090 /* Virtual regs we are interested in. */
2091 const static struct {
2092 int regno; /* register number. */
2093 const char *name; /* register name. */
2094 } virtual_regs[] = {
2095 { STACK_POINTER_REGNUM, "stack pointer:" },
2096 { TOC_REGNUM, "toc: " },
2097 { STATIC_CHAIN_REGNUM, "static chain: " },
2098 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2099 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2100 { ARG_POINTER_REGNUM, "arg pointer: " },
2101 { FRAME_POINTER_REGNUM, "frame pointer:" },
2102 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2103 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2104 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2105 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2106 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2107 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2108 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2109 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2110 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2113 fputs ("\nHard register information:\n", stderr);
2114 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2115 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2116 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2117 LAST_ALTIVEC_REGNO,
2118 "vs");
2119 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2120 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2121 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2122 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2123 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2124 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2125 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2126 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2128 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2129 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2130 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2132 fprintf (stderr,
2133 "\n"
2134 "d reg_class = %s\n"
2135 "f reg_class = %s\n"
2136 "v reg_class = %s\n"
2137 "wa reg_class = %s\n"
2138 "wd reg_class = %s\n"
2139 "wf reg_class = %s\n"
2140 "wg reg_class = %s\n"
2141 "wh reg_class = %s\n"
2142 "wi reg_class = %s\n"
2143 "wj reg_class = %s\n"
2144 "wk reg_class = %s\n"
2145 "wl reg_class = %s\n"
2146 "wm reg_class = %s\n"
2147 "wr reg_class = %s\n"
2148 "ws reg_class = %s\n"
2149 "wt reg_class = %s\n"
2150 "wu reg_class = %s\n"
2151 "wv reg_class = %s\n"
2152 "ww reg_class = %s\n"
2153 "wx reg_class = %s\n"
2154 "wy reg_class = %s\n"
2155 "wz reg_class = %s\n"
2156 "\n",
2157 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2158 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2159 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2160 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2161 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2162 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2163 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2164 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2165 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2166 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2167 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2168 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2169 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2170 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2171 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2172 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2173 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2174 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2175 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2176 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2177 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2178 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2180 nl = "\n";
2181 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2182 rs6000_debug_print_mode (m);
2184 fputs ("\n", stderr);
2186 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2188 machine_mode mode1 = print_tieable_modes[m1];
2189 bool first_time = true;
2191 nl = (const char *)0;
2192 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2194 machine_mode mode2 = print_tieable_modes[m2];
2195 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2197 if (first_time)
2199 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2200 nl = "\n";
2201 first_time = false;
2204 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2208 if (!first_time)
2209 fputs ("\n", stderr);
2212 if (nl)
2213 fputs (nl, stderr);
2215 if (rs6000_recip_control)
2217 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2219 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2220 if (rs6000_recip_bits[m])
2222 fprintf (stderr,
2223 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2224 GET_MODE_NAME (m),
2225 (RS6000_RECIP_AUTO_RE_P (m)
2226 ? "auto"
2227 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2228 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2229 ? "auto"
2230 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2233 fputs ("\n", stderr);
2236 if (rs6000_cpu_index >= 0)
2238 const char *name = processor_target_table[rs6000_cpu_index].name;
2239 HOST_WIDE_INT flags
2240 = processor_target_table[rs6000_cpu_index].target_enable;
2242 sprintf (flags_buffer, "-mcpu=%s flags", name);
2243 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2245 else
2246 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2248 if (rs6000_tune_index >= 0)
2250 const char *name = processor_target_table[rs6000_tune_index].name;
2251 HOST_WIDE_INT flags
2252 = processor_target_table[rs6000_tune_index].target_enable;
2254 sprintf (flags_buffer, "-mtune=%s flags", name);
2255 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2257 else
2258 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2260 cl_target_option_save (&cl_opts, &global_options);
2261 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2262 rs6000_isa_flags);
2264 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2265 rs6000_isa_flags_explicit);
2267 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2268 rs6000_builtin_mask);
2270 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2272 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2273 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2275 switch (rs6000_sched_costly_dep)
2277 case max_dep_latency:
2278 costly_str = "max_dep_latency";
2279 break;
2281 case no_dep_costly:
2282 costly_str = "no_dep_costly";
2283 break;
2285 case all_deps_costly:
2286 costly_str = "all_deps_costly";
2287 break;
2289 case true_store_to_load_dep_costly:
2290 costly_str = "true_store_to_load_dep_costly";
2291 break;
2293 case store_to_load_dep_costly:
2294 costly_str = "store_to_load_dep_costly";
2295 break;
2297 default:
2298 costly_str = costly_num;
2299 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2300 break;
2303 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2305 switch (rs6000_sched_insert_nops)
2307 case sched_finish_regroup_exact:
2308 nop_str = "sched_finish_regroup_exact";
2309 break;
2311 case sched_finish_pad_groups:
2312 nop_str = "sched_finish_pad_groups";
2313 break;
2315 case sched_finish_none:
2316 nop_str = "sched_finish_none";
2317 break;
2319 default:
2320 nop_str = nop_num;
2321 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2322 break;
2325 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2327 switch (rs6000_sdata)
2329 default:
2330 case SDATA_NONE:
2331 break;
2333 case SDATA_DATA:
2334 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2335 break;
2337 case SDATA_SYSV:
2338 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2339 break;
2341 case SDATA_EABI:
2342 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2343 break;
2347 switch (rs6000_traceback)
2349 case traceback_default: trace_str = "default"; break;
2350 case traceback_none: trace_str = "none"; break;
2351 case traceback_part: trace_str = "part"; break;
2352 case traceback_full: trace_str = "full"; break;
2353 default: trace_str = "unknown"; break;
2356 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2358 switch (rs6000_current_cmodel)
2360 case CMODEL_SMALL: cmodel_str = "small"; break;
2361 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2362 case CMODEL_LARGE: cmodel_str = "large"; break;
2363 default: cmodel_str = "unknown"; break;
2366 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2368 switch (rs6000_current_abi)
2370 case ABI_NONE: abi_str = "none"; break;
2371 case ABI_AIX: abi_str = "aix"; break;
2372 case ABI_ELFv2: abi_str = "ELFv2"; break;
2373 case ABI_V4: abi_str = "V4"; break;
2374 case ABI_DARWIN: abi_str = "darwin"; break;
2375 default: abi_str = "unknown"; break;
2378 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2380 if (rs6000_altivec_abi)
2381 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2383 if (rs6000_spe_abi)
2384 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2386 if (rs6000_darwin64_abi)
2387 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2389 if (rs6000_float_gprs)
2390 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2392 fprintf (stderr, DEBUG_FMT_S, "fprs",
2393 (TARGET_FPRS ? "true" : "false"));
2395 fprintf (stderr, DEBUG_FMT_S, "single_float",
2396 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2398 fprintf (stderr, DEBUG_FMT_S, "double_float",
2399 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2401 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2402 (TARGET_SOFT_FLOAT ? "true" : "false"));
2404 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2405 (TARGET_E500_SINGLE ? "true" : "false"));
2407 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2408 (TARGET_E500_DOUBLE ? "true" : "false"));
2410 if (TARGET_LINK_STACK)
2411 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2413 if (targetm.lra_p ())
2414 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2416 if (TARGET_P8_FUSION)
2417 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2418 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2420 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2421 TARGET_SECURE_PLT ? "secure" : "bss");
2422 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2423 aix_struct_return ? "aix" : "sysv");
2424 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2425 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2426 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2427 tf[!!rs6000_align_branch_targets]);
2428 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2429 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2430 rs6000_long_double_type_size);
2431 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2432 (int)rs6000_sched_restricted_insns_priority);
2433 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2434 (int)END_BUILTINS);
2435 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2436 (int)RS6000_BUILTIN_COUNT);
2438 if (TARGET_VSX)
2439 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2440 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2444 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2445 legitimate address support to figure out the appropriate addressing to
2446 use. */
2448 static void
2449 rs6000_setup_reg_addr_masks (void)
2451 ssize_t rc, reg, m, nregs;
2452 addr_mask_type any_addr_mask, addr_mask;
2454 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2456 machine_mode m2 = (machine_mode)m;
2458 /* SDmode is special in that we want to access it only via REG+REG
2459 addressing on power7 and above, since we want to use the LFIWZX and
2460 STFIWZX instructions to load it. */
2461 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2463 any_addr_mask = 0;
2464 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2466 addr_mask = 0;
2467 reg = reload_reg_map[rc].reg;
2469 /* Can mode values go in the GPR/FPR/Altivec registers? */
2470 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2472 nregs = rs6000_hard_regno_nregs[m][reg];
2473 addr_mask |= RELOAD_REG_VALID;
2475 /* Indicate if the mode takes more than 1 physical register. If
2476 it takes a single register, indicate it can do REG+REG
2477 addressing. */
2478 if (nregs > 1 || m == BLKmode)
2479 addr_mask |= RELOAD_REG_MULTIPLE;
2480 else
2481 addr_mask |= RELOAD_REG_INDEXED;
2483 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2484 addressing. Restrict addressing on SPE for 64-bit types
2485 because of the SUBREG hackery used to address 64-bit floats in
2486 '32-bit' GPRs. */
2488 if (TARGET_UPDATE
2489 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2490 && GET_MODE_SIZE (m2) <= 8
2491 && !VECTOR_MODE_P (m2)
2492 && !COMPLEX_MODE_P (m2)
2493 && !indexed_only_p
2494 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2496 addr_mask |= RELOAD_REG_PRE_INCDEC;
2498 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2499 we don't allow PRE_MODIFY for some multi-register
2500 operations. */
2501 switch (m)
2503 default:
2504 addr_mask |= RELOAD_REG_PRE_MODIFY;
2505 break;
2507 case DImode:
2508 if (TARGET_POWERPC64)
2509 addr_mask |= RELOAD_REG_PRE_MODIFY;
2510 break;
2512 case DFmode:
2513 case DDmode:
2514 if (TARGET_DF_INSN)
2515 addr_mask |= RELOAD_REG_PRE_MODIFY;
2516 break;
2521 /* GPR and FPR registers can do REG+OFFSET addressing, except
2522 possibly for SDmode. */
2523 if ((addr_mask != 0) && !indexed_only_p
2524 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2525 addr_mask |= RELOAD_REG_OFFSET;
2527 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2528 addressing on 128-bit types. */
2529 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2530 && (addr_mask & RELOAD_REG_VALID) != 0)
2531 addr_mask |= RELOAD_REG_AND_M16;
2533 reg_addr[m].addr_mask[rc] = addr_mask;
2534 any_addr_mask |= addr_mask;
2537 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2542 /* Initialize the various global tables that are based on register size. */
2543 static void
2544 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2546 ssize_t r, m, c;
2547 int align64;
2548 int align32;
2550 /* Precalculate REGNO_REG_CLASS. */
2551 rs6000_regno_regclass[0] = GENERAL_REGS;
2552 for (r = 1; r < 32; ++r)
2553 rs6000_regno_regclass[r] = BASE_REGS;
2555 for (r = 32; r < 64; ++r)
2556 rs6000_regno_regclass[r] = FLOAT_REGS;
2558 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2559 rs6000_regno_regclass[r] = NO_REGS;
2561 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2562 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2564 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2565 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2566 rs6000_regno_regclass[r] = CR_REGS;
2568 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2569 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2570 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2571 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2572 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2573 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2574 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2575 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2576 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2577 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2578 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2579 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2581 /* Precalculate register class to simpler reload register class. We don't
2582 need all of the register classes that are combinations of different
2583 classes, just the simple ones that have constraint letters. */
2584 for (c = 0; c < N_REG_CLASSES; c++)
2585 reg_class_to_reg_type[c] = NO_REG_TYPE;
2587 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2588 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2589 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2590 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2591 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2592 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2593 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2594 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2595 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2596 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2597 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2598 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2600 if (TARGET_VSX)
2602 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2603 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2605 else
2607 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2608 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2611 /* Precalculate the valid memory formats as well as the vector information,
2612 this must be set up before the rs6000_hard_regno_nregs_internal calls
2613 below. */
2614 gcc_assert ((int)VECTOR_NONE == 0);
2615 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2616 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2618 gcc_assert ((int)CODE_FOR_nothing == 0);
2619 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2621 gcc_assert ((int)NO_REGS == 0);
2622 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2624 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2625 believes it can use native alignment or still uses 128-bit alignment. */
2626 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2628 align64 = 64;
2629 align32 = 32;
2631 else
2633 align64 = 128;
2634 align32 = 128;
2637 /* V2DF mode, VSX only. */
2638 if (TARGET_VSX)
2640 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2641 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2642 rs6000_vector_align[V2DFmode] = align64;
2645 /* V4SF mode, either VSX or Altivec. */
2646 if (TARGET_VSX)
2648 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2649 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2650 rs6000_vector_align[V4SFmode] = align32;
2652 else if (TARGET_ALTIVEC)
2654 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2655 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2656 rs6000_vector_align[V4SFmode] = align32;
2659 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2660 and stores. */
2661 if (TARGET_ALTIVEC)
2663 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2664 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2665 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2666 rs6000_vector_align[V4SImode] = align32;
2667 rs6000_vector_align[V8HImode] = align32;
2668 rs6000_vector_align[V16QImode] = align32;
2670 if (TARGET_VSX)
2672 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2673 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2674 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2676 else
2678 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2679 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2680 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2684 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2685 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2686 if (TARGET_VSX)
2688 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2689 rs6000_vector_unit[V2DImode]
2690 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2691 rs6000_vector_align[V2DImode] = align64;
2693 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2694 rs6000_vector_unit[V1TImode]
2695 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2696 rs6000_vector_align[V1TImode] = 128;
2699 /* DFmode, see if we want to use the VSX unit. Memory is handled
2700 differently, so don't set rs6000_vector_mem. */
2701 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2703 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2704 rs6000_vector_align[DFmode] = 64;
2707 /* SFmode, see if we want to use the VSX unit. */
2708 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2710 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2711 rs6000_vector_align[SFmode] = 32;
2714 /* Allow TImode in VSX register and set the VSX memory macros. */
2715 if (TARGET_VSX && TARGET_VSX_TIMODE)
2717 rs6000_vector_mem[TImode] = VECTOR_VSX;
2718 rs6000_vector_align[TImode] = align64;
2721 /* TODO add SPE and paired floating point vector support. */
2723 /* Register class constraints for the constraints that depend on compile
2724 switches. When the VSX code was added, different constraints were added
2725 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2726 of the VSX registers are used. The register classes for scalar floating
2727 point types is set, based on whether we allow that type into the upper
2728 (Altivec) registers. GCC has register classes to target the Altivec
2729 registers for load/store operations, to select using a VSX memory
2730 operation instead of the traditional floating point operation. The
2731 constraints are:
2733 d - Register class to use with traditional DFmode instructions.
2734 f - Register class to use with traditional SFmode instructions.
2735 v - Altivec register.
2736 wa - Any VSX register.
2737 wc - Reserved to represent individual CR bits (used in LLVM).
2738 wd - Preferred register class for V2DFmode.
2739 wf - Preferred register class for V4SFmode.
2740 wg - Float register for power6x move insns.
2741 wh - FP register for direct move instructions.
2742 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2743 wj - FP or VSX register to hold 64-bit integers for direct moves.
2744 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2745 wl - Float register if we can do 32-bit signed int loads.
2746 wm - VSX register for ISA 2.07 direct move operations.
2747 wn - always NO_REGS.
2748 wr - GPR if 64-bit mode is permitted.
2749 ws - Register class to do ISA 2.06 DF operations.
2750 wt - VSX register for TImode in VSX registers.
2751 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2752 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2753 ww - Register class to do SF conversions in with VSX operations.
2754 wx - Float register if we can do 32-bit int stores.
2755 wy - Register class to do ISA 2.07 SF operations.
2756 wz - Float register if we can do 32-bit unsigned int loads. */
2758 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2759 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2761 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2762 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2764 if (TARGET_VSX)
2766 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2767 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2768 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2769 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2771 if (TARGET_VSX_TIMODE)
2772 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2774 if (TARGET_UPPER_REGS_DF) /* DFmode */
2776 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2777 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2779 else
2780 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2783 /* Add conditional constraints based on various options, to allow us to
2784 collapse multiple insn patterns. */
2785 if (TARGET_ALTIVEC)
2786 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2788 if (TARGET_MFPGPR) /* DFmode */
2789 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2791 if (TARGET_LFIWAX)
2792 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2794 if (TARGET_DIRECT_MOVE)
2796 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2797 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2798 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2799 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2800 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2801 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2804 if (TARGET_POWERPC64)
2805 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2807 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2809 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2810 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2811 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2813 else if (TARGET_P8_VECTOR)
2815 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2816 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2818 else if (TARGET_VSX)
2819 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2821 if (TARGET_STFIWX)
2822 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2824 if (TARGET_LFIWZX)
2825 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2827 /* Set up the reload helper and direct move functions. */
2828 if (TARGET_VSX || TARGET_ALTIVEC)
2830 if (TARGET_64BIT)
2832 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2833 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2834 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2835 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2836 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2837 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2838 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2839 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2840 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2841 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2842 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2843 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2844 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2845 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2846 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2847 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2848 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2849 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2850 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2851 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2853 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2854 available. */
2855 if (TARGET_NO_SDMODE_STACK)
2857 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2858 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2861 if (TARGET_VSX_TIMODE)
2863 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2864 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2867 if (TARGET_DIRECT_MOVE)
2869 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2870 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2871 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2872 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2873 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2874 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2875 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2876 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2877 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2879 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2880 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2881 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2882 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2883 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2884 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2885 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2886 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2887 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2890 else
2892 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2893 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2894 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2895 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2896 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2897 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2898 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2899 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2900 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2901 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2902 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2903 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2904 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2905 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2906 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2907 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2908 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2909 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2910 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2911 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2913 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2914 available. */
2915 if (TARGET_NO_SDMODE_STACK)
2917 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2918 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2921 if (TARGET_VSX_TIMODE)
2923 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2924 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2927 if (TARGET_DIRECT_MOVE)
2929 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2930 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2931 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2935 if (TARGET_UPPER_REGS_DF)
2936 reg_addr[DFmode].scalar_in_vmx_p = true;
2938 if (TARGET_UPPER_REGS_SF)
2939 reg_addr[SFmode].scalar_in_vmx_p = true;
2942 /* Precalculate HARD_REGNO_NREGS. */
2943 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2944 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2945 rs6000_hard_regno_nregs[m][r]
2946 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2948 /* Precalculate HARD_REGNO_MODE_OK. */
2949 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2950 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2951 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2952 rs6000_hard_regno_mode_ok_p[m][r] = true;
2954 /* Precalculate CLASS_MAX_NREGS sizes. */
2955 for (c = 0; c < LIM_REG_CLASSES; ++c)
2957 int reg_size;
2959 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2960 reg_size = UNITS_PER_VSX_WORD;
2962 else if (c == ALTIVEC_REGS)
2963 reg_size = UNITS_PER_ALTIVEC_WORD;
2965 else if (c == FLOAT_REGS)
2966 reg_size = UNITS_PER_FP_WORD;
2968 else
2969 reg_size = UNITS_PER_WORD;
2971 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2973 machine_mode m2 = (machine_mode)m;
2974 int reg_size2 = reg_size;
2976 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2977 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2978 && (m == TDmode || m == TFmode))
2979 reg_size2 = UNITS_PER_FP_WORD;
2981 rs6000_class_max_nregs[m][c]
2982 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2986 if (TARGET_E500_DOUBLE)
2987 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2989 /* Calculate which modes to automatically generate code to use a the
2990 reciprocal divide and square root instructions. In the future, possibly
2991 automatically generate the instructions even if the user did not specify
2992 -mrecip. The older machines double precision reciprocal sqrt estimate is
2993 not accurate enough. */
2994 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2995 if (TARGET_FRES)
2996 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2997 if (TARGET_FRE)
2998 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2999 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3000 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3001 if (VECTOR_UNIT_VSX_P (V2DFmode))
3002 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3004 if (TARGET_FRSQRTES)
3005 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3006 if (TARGET_FRSQRTE)
3007 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3008 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3009 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3010 if (VECTOR_UNIT_VSX_P (V2DFmode))
3011 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3013 if (rs6000_recip_control)
3015 if (!flag_finite_math_only)
3016 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3017 if (flag_trapping_math)
3018 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3019 if (!flag_reciprocal_math)
3020 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3021 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3023 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3024 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3025 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3027 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3028 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3029 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3031 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3032 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3033 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3035 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3036 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3037 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3039 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3040 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3041 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3043 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3044 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3045 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3047 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3048 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3049 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3051 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3052 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3053 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3057 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3058 legitimate address support to figure out the appropriate addressing to
3059 use. */
3060 rs6000_setup_reg_addr_masks ();
3062 if (global_init_p || TARGET_DEBUG_TARGET)
3064 if (TARGET_DEBUG_REG)
3065 rs6000_debug_reg_global ();
3067 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3068 fprintf (stderr,
3069 "SImode variable mult cost = %d\n"
3070 "SImode constant mult cost = %d\n"
3071 "SImode short constant mult cost = %d\n"
3072 "DImode multipliciation cost = %d\n"
3073 "SImode division cost = %d\n"
3074 "DImode division cost = %d\n"
3075 "Simple fp operation cost = %d\n"
3076 "DFmode multiplication cost = %d\n"
3077 "SFmode division cost = %d\n"
3078 "DFmode division cost = %d\n"
3079 "cache line size = %d\n"
3080 "l1 cache size = %d\n"
3081 "l2 cache size = %d\n"
3082 "simultaneous prefetches = %d\n"
3083 "\n",
3084 rs6000_cost->mulsi,
3085 rs6000_cost->mulsi_const,
3086 rs6000_cost->mulsi_const9,
3087 rs6000_cost->muldi,
3088 rs6000_cost->divsi,
3089 rs6000_cost->divdi,
3090 rs6000_cost->fp,
3091 rs6000_cost->dmul,
3092 rs6000_cost->sdiv,
3093 rs6000_cost->ddiv,
3094 rs6000_cost->cache_line_size,
3095 rs6000_cost->l1_cache_size,
3096 rs6000_cost->l2_cache_size,
3097 rs6000_cost->simultaneous_prefetches);
3101 #if TARGET_MACHO
3102 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3104 static void
3105 darwin_rs6000_override_options (void)
3107 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3108 off. */
3109 rs6000_altivec_abi = 1;
3110 TARGET_ALTIVEC_VRSAVE = 1;
3111 rs6000_current_abi = ABI_DARWIN;
3113 if (DEFAULT_ABI == ABI_DARWIN
3114 && TARGET_64BIT)
3115 darwin_one_byte_bool = 1;
3117 if (TARGET_64BIT && ! TARGET_POWERPC64)
3119 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3120 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3122 if (flag_mkernel)
3124 rs6000_default_long_calls = 1;
3125 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3128 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3129 Altivec. */
3130 if (!flag_mkernel && !flag_apple_kext
3131 && TARGET_64BIT
3132 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3133 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3135 /* Unless the user (not the configurer) has explicitly overridden
3136 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3137 G4 unless targeting the kernel. */
3138 if (!flag_mkernel
3139 && !flag_apple_kext
3140 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3141 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3142 && ! global_options_set.x_rs6000_cpu_index)
3144 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3147 #endif
3149 /* If not otherwise specified by a target, make 'long double' equivalent to
3150 'double'. */
3152 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3153 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3154 #endif
3156 /* Return the builtin mask of the various options used that could affect which
3157 builtins were used. In the past we used target_flags, but we've run out of
3158 bits, and some options like SPE and PAIRED are no longer in
3159 target_flags. */
3161 HOST_WIDE_INT
3162 rs6000_builtin_mask_calculate (void)
3164 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3165 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3166 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3167 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3168 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3169 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3170 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3171 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3172 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3173 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3174 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3175 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3176 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3177 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3178 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3179 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3182 /* Implement TARGET_MD_ASM_CLOBBERS. All asm statements are considered
3183 to clobber the XER[CA] bit because clobbering that bit without telling
3184 the compiler worked just fine with versions of GCC before GCC 5, and
3185 breaking a lot of older code in ways that are hard to track down is
3186 not such a great idea. */
3188 static tree
3189 rs6000_md_asm_clobbers (tree, tree, tree clobbers)
3191 tree s = build_string (strlen (reg_names[CA_REGNO]), reg_names[CA_REGNO]);
3192 return tree_cons (NULL_TREE, s, clobbers);
3195 /* Override command line options. Mostly we process the processor type and
3196 sometimes adjust other TARGET_ options. */
3198 static bool
3199 rs6000_option_override_internal (bool global_init_p)
3201 bool ret = true;
3202 bool have_cpu = false;
3204 /* The default cpu requested at configure time, if any. */
3205 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3207 HOST_WIDE_INT set_masks;
3208 int cpu_index;
3209 int tune_index;
3210 struct cl_target_option *main_target_opt
3211 = ((global_init_p || target_option_default_node == NULL)
3212 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3214 /* Remember the explicit arguments. */
3215 if (global_init_p)
3216 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3218 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3219 library functions, so warn about it. The flag may be useful for
3220 performance studies from time to time though, so don't disable it
3221 entirely. */
3222 if (global_options_set.x_rs6000_alignment_flags
3223 && rs6000_alignment_flags == MASK_ALIGN_POWER
3224 && DEFAULT_ABI == ABI_DARWIN
3225 && TARGET_64BIT)
3226 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3227 " it is incompatible with the installed C and C++ libraries");
3229 /* Numerous experiment shows that IRA based loop pressure
3230 calculation works better for RTL loop invariant motion on targets
3231 with enough (>= 32) registers. It is an expensive optimization.
3232 So it is on only for peak performance. */
3233 if (optimize >= 3 && global_init_p
3234 && !global_options_set.x_flag_ira_loop_pressure)
3235 flag_ira_loop_pressure = 1;
3237 /* Set the pointer size. */
3238 if (TARGET_64BIT)
3240 rs6000_pmode = (int)DImode;
3241 rs6000_pointer_size = 64;
3243 else
3245 rs6000_pmode = (int)SImode;
3246 rs6000_pointer_size = 32;
3249 /* Some OSs don't support saving the high part of 64-bit registers on context
3250 switch. Other OSs don't support saving Altivec registers. On those OSs,
3251 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3252 if the user wants either, the user must explicitly specify them and we
3253 won't interfere with the user's specification. */
3255 set_masks = POWERPC_MASKS;
3256 #ifdef OS_MISSING_POWERPC64
3257 if (OS_MISSING_POWERPC64)
3258 set_masks &= ~OPTION_MASK_POWERPC64;
3259 #endif
3260 #ifdef OS_MISSING_ALTIVEC
3261 if (OS_MISSING_ALTIVEC)
3262 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3263 #endif
3265 /* Don't override by the processor default if given explicitly. */
3266 set_masks &= ~rs6000_isa_flags_explicit;
3268 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3269 the cpu in a target attribute or pragma, but did not specify a tuning
3270 option, use the cpu for the tuning option rather than the option specified
3271 with -mtune on the command line. Process a '--with-cpu' configuration
3272 request as an implicit --cpu. */
3273 if (rs6000_cpu_index >= 0)
3275 cpu_index = rs6000_cpu_index;
3276 have_cpu = true;
3278 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3280 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3281 have_cpu = true;
3283 else if (implicit_cpu)
3285 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3286 have_cpu = true;
3288 else
3290 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3291 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3292 have_cpu = false;
3295 gcc_assert (cpu_index >= 0);
3297 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3298 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3299 with those from the cpu, except for options that were explicitly set. If
3300 we don't have a cpu, do not override the target bits set in
3301 TARGET_DEFAULT. */
3302 if (have_cpu)
3304 rs6000_isa_flags &= ~set_masks;
3305 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3306 & set_masks);
3308 else
3309 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3310 & ~rs6000_isa_flags_explicit);
3312 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3313 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3314 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3315 to using rs6000_isa_flags, we need to do the initialization here. */
3316 if (!have_cpu)
3317 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3319 if (rs6000_tune_index >= 0)
3320 tune_index = rs6000_tune_index;
3321 else if (have_cpu)
3322 rs6000_tune_index = tune_index = cpu_index;
3323 else
3325 size_t i;
3326 enum processor_type tune_proc
3327 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3329 tune_index = -1;
3330 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3331 if (processor_target_table[i].processor == tune_proc)
3333 rs6000_tune_index = tune_index = i;
3334 break;
3338 gcc_assert (tune_index >= 0);
3339 rs6000_cpu = processor_target_table[tune_index].processor;
3341 /* Pick defaults for SPE related control flags. Do this early to make sure
3342 that the TARGET_ macros are representative ASAP. */
3344 int spe_capable_cpu =
3345 (rs6000_cpu == PROCESSOR_PPC8540
3346 || rs6000_cpu == PROCESSOR_PPC8548);
3348 if (!global_options_set.x_rs6000_spe_abi)
3349 rs6000_spe_abi = spe_capable_cpu;
3351 if (!global_options_set.x_rs6000_spe)
3352 rs6000_spe = spe_capable_cpu;
3354 if (!global_options_set.x_rs6000_float_gprs)
3355 rs6000_float_gprs =
3356 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3357 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3358 : 0);
3361 if (global_options_set.x_rs6000_spe_abi
3362 && rs6000_spe_abi
3363 && !TARGET_SPE_ABI)
3364 error ("not configured for SPE ABI");
3366 if (global_options_set.x_rs6000_spe
3367 && rs6000_spe
3368 && !TARGET_SPE)
3369 error ("not configured for SPE instruction set");
3371 if (main_target_opt != NULL
3372 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3373 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3374 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3375 error ("target attribute or pragma changes SPE ABI");
3377 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3378 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3379 || rs6000_cpu == PROCESSOR_PPCE5500)
3381 if (TARGET_ALTIVEC)
3382 error ("AltiVec not supported in this target");
3383 if (TARGET_SPE)
3384 error ("SPE not supported in this target");
3386 if (rs6000_cpu == PROCESSOR_PPCE6500)
3388 if (TARGET_SPE)
3389 error ("SPE not supported in this target");
3392 /* Disable Cell microcode if we are optimizing for the Cell
3393 and not optimizing for size. */
3394 if (rs6000_gen_cell_microcode == -1)
3395 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3396 && !optimize_size);
3398 /* If we are optimizing big endian systems for space and it's OK to
3399 use instructions that would be microcoded on the Cell, use the
3400 load/store multiple and string instructions. */
3401 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3402 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3403 | OPTION_MASK_STRING);
3405 /* Don't allow -mmultiple or -mstring on little endian systems
3406 unless the cpu is a 750, because the hardware doesn't support the
3407 instructions used in little endian mode, and causes an alignment
3408 trap. The 750 does not cause an alignment trap (except when the
3409 target is unaligned). */
3411 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3413 if (TARGET_MULTIPLE)
3415 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3416 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3417 warning (0, "-mmultiple is not supported on little endian systems");
3420 if (TARGET_STRING)
3422 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3423 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3424 warning (0, "-mstring is not supported on little endian systems");
3428 /* If little-endian, default to -mstrict-align on older processors.
3429 Testing for htm matches power8 and later. */
3430 if (!BYTES_BIG_ENDIAN
3431 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3432 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3434 /* -maltivec={le,be} implies -maltivec. */
3435 if (rs6000_altivec_element_order != 0)
3436 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3438 /* Disallow -maltivec=le in big endian mode for now. This is not
3439 known to be useful for anyone. */
3440 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3442 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3443 rs6000_altivec_element_order = 0;
3446 /* Add some warnings for VSX. */
3447 if (TARGET_VSX)
3449 const char *msg = NULL;
3450 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3451 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3453 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3454 msg = N_("-mvsx requires hardware floating point");
3455 else
3457 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3458 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3461 else if (TARGET_PAIRED_FLOAT)
3462 msg = N_("-mvsx and -mpaired are incompatible");
3463 else if (TARGET_AVOID_XFORM > 0)
3464 msg = N_("-mvsx needs indexed addressing");
3465 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3466 & OPTION_MASK_ALTIVEC))
3468 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3469 msg = N_("-mvsx and -mno-altivec are incompatible");
3470 else
3471 msg = N_("-mno-altivec disables vsx");
3474 if (msg)
3476 warning (0, msg);
3477 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3478 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3482 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3483 the -mcpu setting to enable options that conflict. */
3484 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3485 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3486 | OPTION_MASK_ALTIVEC
3487 | OPTION_MASK_VSX)) != 0)
3488 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3489 | OPTION_MASK_DIRECT_MOVE)
3490 & ~rs6000_isa_flags_explicit);
3492 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3493 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3495 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3496 unless the user explicitly used the -mno-<option> to disable the code. */
3497 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3498 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3499 else if (TARGET_VSX)
3500 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3501 else if (TARGET_POPCNTD)
3502 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3503 else if (TARGET_DFP)
3504 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3505 else if (TARGET_CMPB)
3506 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3507 else if (TARGET_FPRND)
3508 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3509 else if (TARGET_POPCNTB)
3510 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3511 else if (TARGET_ALTIVEC)
3512 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3514 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3516 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3517 error ("-mcrypto requires -maltivec");
3518 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3521 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3523 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3524 error ("-mdirect-move requires -mvsx");
3525 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3528 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3530 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3531 error ("-mpower8-vector requires -maltivec");
3532 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3535 if (TARGET_P8_VECTOR && !TARGET_VSX)
3537 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3538 error ("-mpower8-vector requires -mvsx");
3539 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3542 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3544 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3545 error ("-mvsx-timode requires -mvsx");
3546 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3549 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3551 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3552 error ("-mhard-dfp requires -mhard-float");
3553 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3556 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3557 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3558 the individual option. */
3559 if (TARGET_UPPER_REGS > 0)
3561 if (TARGET_VSX
3562 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3564 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3565 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3567 if (TARGET_P8_VECTOR
3568 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3570 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3571 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3574 else if (TARGET_UPPER_REGS == 0)
3576 if (TARGET_VSX
3577 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3579 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3580 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3582 if (TARGET_P8_VECTOR
3583 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3585 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3586 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3590 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3592 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3593 error ("-mupper-regs-df requires -mvsx");
3594 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3597 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3599 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3600 error ("-mupper-regs-sf requires -mpower8-vector");
3601 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3604 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3605 silently turn off quad memory mode. */
3606 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3608 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3609 warning (0, N_("-mquad-memory requires 64-bit mode"));
3611 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3612 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3614 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3615 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3618 /* Non-atomic quad memory load/store are disabled for little endian, since
3619 the words are reversed, but atomic operations can still be done by
3620 swapping the words. */
3621 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3623 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3624 warning (0, N_("-mquad-memory is not available in little endian mode"));
3626 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3629 /* Assume if the user asked for normal quad memory instructions, they want
3630 the atomic versions as well, unless they explicity told us not to use quad
3631 word atomic instructions. */
3632 if (TARGET_QUAD_MEMORY
3633 && !TARGET_QUAD_MEMORY_ATOMIC
3634 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3635 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3637 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3638 generating power8 instructions. */
3639 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3640 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3641 & OPTION_MASK_P8_FUSION);
3643 /* Power8 does not fuse sign extended loads with the addis. If we are
3644 optimizing at high levels for speed, convert a sign extended load into a
3645 zero extending load, and an explicit sign extension. */
3646 if (TARGET_P8_FUSION
3647 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3648 && optimize_function_for_speed_p (cfun)
3649 && optimize >= 3)
3650 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3652 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3653 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3655 /* E500mc does "better" if we inline more aggressively. Respect the
3656 user's opinion, though. */
3657 if (rs6000_block_move_inline_limit == 0
3658 && (rs6000_cpu == PROCESSOR_PPCE500MC
3659 || rs6000_cpu == PROCESSOR_PPCE500MC64
3660 || rs6000_cpu == PROCESSOR_PPCE5500
3661 || rs6000_cpu == PROCESSOR_PPCE6500))
3662 rs6000_block_move_inline_limit = 128;
3664 /* store_one_arg depends on expand_block_move to handle at least the
3665 size of reg_parm_stack_space. */
3666 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3667 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3669 if (global_init_p)
3671 /* If the appropriate debug option is enabled, replace the target hooks
3672 with debug versions that call the real version and then prints
3673 debugging information. */
3674 if (TARGET_DEBUG_COST)
3676 targetm.rtx_costs = rs6000_debug_rtx_costs;
3677 targetm.address_cost = rs6000_debug_address_cost;
3678 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3681 if (TARGET_DEBUG_ADDR)
3683 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3684 targetm.legitimize_address = rs6000_debug_legitimize_address;
3685 rs6000_secondary_reload_class_ptr
3686 = rs6000_debug_secondary_reload_class;
3687 rs6000_secondary_memory_needed_ptr
3688 = rs6000_debug_secondary_memory_needed;
3689 rs6000_cannot_change_mode_class_ptr
3690 = rs6000_debug_cannot_change_mode_class;
3691 rs6000_preferred_reload_class_ptr
3692 = rs6000_debug_preferred_reload_class;
3693 rs6000_legitimize_reload_address_ptr
3694 = rs6000_debug_legitimize_reload_address;
3695 rs6000_mode_dependent_address_ptr
3696 = rs6000_debug_mode_dependent_address;
3699 if (rs6000_veclibabi_name)
3701 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3702 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3703 else
3705 error ("unknown vectorization library ABI type (%s) for "
3706 "-mveclibabi= switch", rs6000_veclibabi_name);
3707 ret = false;
3712 if (!global_options_set.x_rs6000_long_double_type_size)
3714 if (main_target_opt != NULL
3715 && (main_target_opt->x_rs6000_long_double_type_size
3716 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3717 error ("target attribute or pragma changes long double size");
3718 else
3719 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3722 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3723 if (!global_options_set.x_rs6000_ieeequad)
3724 rs6000_ieeequad = 1;
3725 #endif
3727 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3728 target attribute or pragma which automatically enables both options,
3729 unless the altivec ABI was set. This is set by default for 64-bit, but
3730 not for 32-bit. */
3731 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3732 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3733 & ~rs6000_isa_flags_explicit);
3735 /* Enable Altivec ABI for AIX -maltivec. */
3736 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3738 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3739 error ("target attribute or pragma changes AltiVec ABI");
3740 else
3741 rs6000_altivec_abi = 1;
3744 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3745 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3746 be explicitly overridden in either case. */
3747 if (TARGET_ELF)
3749 if (!global_options_set.x_rs6000_altivec_abi
3750 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3752 if (main_target_opt != NULL &&
3753 !main_target_opt->x_rs6000_altivec_abi)
3754 error ("target attribute or pragma changes AltiVec ABI");
3755 else
3756 rs6000_altivec_abi = 1;
3760 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3761 So far, the only darwin64 targets are also MACH-O. */
3762 if (TARGET_MACHO
3763 && DEFAULT_ABI == ABI_DARWIN
3764 && TARGET_64BIT)
3766 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3767 error ("target attribute or pragma changes darwin64 ABI");
3768 else
3770 rs6000_darwin64_abi = 1;
3771 /* Default to natural alignment, for better performance. */
3772 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3776 /* Place FP constants in the constant pool instead of TOC
3777 if section anchors enabled. */
3778 if (flag_section_anchors
3779 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3780 TARGET_NO_FP_IN_TOC = 1;
3782 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3783 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3785 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3786 SUBTARGET_OVERRIDE_OPTIONS;
3787 #endif
3788 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3789 SUBSUBTARGET_OVERRIDE_OPTIONS;
3790 #endif
3791 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3792 SUB3TARGET_OVERRIDE_OPTIONS;
3793 #endif
3795 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3796 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3798 /* For the E500 family of cores, reset the single/double FP flags to let us
3799 check that they remain constant across attributes or pragmas. Also,
3800 clear a possible request for string instructions, not supported and which
3801 we might have silently queried above for -Os.
3803 For other families, clear ISEL in case it was set implicitly.
3806 switch (rs6000_cpu)
3808 case PROCESSOR_PPC8540:
3809 case PROCESSOR_PPC8548:
3810 case PROCESSOR_PPCE500MC:
3811 case PROCESSOR_PPCE500MC64:
3812 case PROCESSOR_PPCE5500:
3813 case PROCESSOR_PPCE6500:
3815 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3816 rs6000_double_float = TARGET_E500_DOUBLE;
3818 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3820 break;
3822 default:
3824 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3825 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3827 break;
3830 if (main_target_opt)
3832 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3833 error ("target attribute or pragma changes single precision floating "
3834 "point");
3835 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3836 error ("target attribute or pragma changes double precision floating "
3837 "point");
3840 /* Detect invalid option combinations with E500. */
3841 CHECK_E500_OPTIONS;
3843 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3844 && rs6000_cpu != PROCESSOR_POWER5
3845 && rs6000_cpu != PROCESSOR_POWER6
3846 && rs6000_cpu != PROCESSOR_POWER7
3847 && rs6000_cpu != PROCESSOR_POWER8
3848 && rs6000_cpu != PROCESSOR_PPCA2
3849 && rs6000_cpu != PROCESSOR_CELL
3850 && rs6000_cpu != PROCESSOR_PPC476);
3851 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3852 || rs6000_cpu == PROCESSOR_POWER5
3853 || rs6000_cpu == PROCESSOR_POWER7
3854 || rs6000_cpu == PROCESSOR_POWER8);
3855 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3856 || rs6000_cpu == PROCESSOR_POWER5
3857 || rs6000_cpu == PROCESSOR_POWER6
3858 || rs6000_cpu == PROCESSOR_POWER7
3859 || rs6000_cpu == PROCESSOR_POWER8
3860 || rs6000_cpu == PROCESSOR_PPCE500MC
3861 || rs6000_cpu == PROCESSOR_PPCE500MC64
3862 || rs6000_cpu == PROCESSOR_PPCE5500
3863 || rs6000_cpu == PROCESSOR_PPCE6500);
3865 /* Allow debug switches to override the above settings. These are set to -1
3866 in rs6000.opt to indicate the user hasn't directly set the switch. */
3867 if (TARGET_ALWAYS_HINT >= 0)
3868 rs6000_always_hint = TARGET_ALWAYS_HINT;
3870 if (TARGET_SCHED_GROUPS >= 0)
3871 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3873 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3874 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3876 rs6000_sched_restricted_insns_priority
3877 = (rs6000_sched_groups ? 1 : 0);
3879 /* Handle -msched-costly-dep option. */
3880 rs6000_sched_costly_dep
3881 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3883 if (rs6000_sched_costly_dep_str)
3885 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3886 rs6000_sched_costly_dep = no_dep_costly;
3887 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3888 rs6000_sched_costly_dep = all_deps_costly;
3889 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3890 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3891 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3892 rs6000_sched_costly_dep = store_to_load_dep_costly;
3893 else
3894 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3895 atoi (rs6000_sched_costly_dep_str));
3898 /* Handle -minsert-sched-nops option. */
3899 rs6000_sched_insert_nops
3900 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3902 if (rs6000_sched_insert_nops_str)
3904 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3905 rs6000_sched_insert_nops = sched_finish_none;
3906 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3907 rs6000_sched_insert_nops = sched_finish_pad_groups;
3908 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3909 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3910 else
3911 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3912 atoi (rs6000_sched_insert_nops_str));
3915 if (global_init_p)
3917 #ifdef TARGET_REGNAMES
3918 /* If the user desires alternate register names, copy in the
3919 alternate names now. */
3920 if (TARGET_REGNAMES)
3921 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3922 #endif
3924 /* Set aix_struct_return last, after the ABI is determined.
3925 If -maix-struct-return or -msvr4-struct-return was explicitly
3926 used, don't override with the ABI default. */
3927 if (!global_options_set.x_aix_struct_return)
3928 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3930 #if 0
3931 /* IBM XL compiler defaults to unsigned bitfields. */
3932 if (TARGET_XL_COMPAT)
3933 flag_signed_bitfields = 0;
3934 #endif
3936 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3937 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3939 if (TARGET_TOC)
3940 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3942 /* We can only guarantee the availability of DI pseudo-ops when
3943 assembling for 64-bit targets. */
3944 if (!TARGET_64BIT)
3946 targetm.asm_out.aligned_op.di = NULL;
3947 targetm.asm_out.unaligned_op.di = NULL;
3951 /* Set branch target alignment, if not optimizing for size. */
3952 if (!optimize_size)
3954 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3955 aligned 8byte to avoid misprediction by the branch predictor. */
3956 if (rs6000_cpu == PROCESSOR_TITAN
3957 || rs6000_cpu == PROCESSOR_CELL)
3959 if (align_functions <= 0)
3960 align_functions = 8;
3961 if (align_jumps <= 0)
3962 align_jumps = 8;
3963 if (align_loops <= 0)
3964 align_loops = 8;
3966 if (rs6000_align_branch_targets)
3968 if (align_functions <= 0)
3969 align_functions = 16;
3970 if (align_jumps <= 0)
3971 align_jumps = 16;
3972 if (align_loops <= 0)
3974 can_override_loop_align = 1;
3975 align_loops = 16;
3978 if (align_jumps_max_skip <= 0)
3979 align_jumps_max_skip = 15;
3980 if (align_loops_max_skip <= 0)
3981 align_loops_max_skip = 15;
3984 /* Arrange to save and restore machine status around nested functions. */
3985 init_machine_status = rs6000_init_machine_status;
3987 /* We should always be splitting complex arguments, but we can't break
3988 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3989 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3990 targetm.calls.split_complex_arg = NULL;
3993 /* Initialize rs6000_cost with the appropriate target costs. */
3994 if (optimize_size)
3995 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3996 else
3997 switch (rs6000_cpu)
3999 case PROCESSOR_RS64A:
4000 rs6000_cost = &rs64a_cost;
4001 break;
4003 case PROCESSOR_MPCCORE:
4004 rs6000_cost = &mpccore_cost;
4005 break;
4007 case PROCESSOR_PPC403:
4008 rs6000_cost = &ppc403_cost;
4009 break;
4011 case PROCESSOR_PPC405:
4012 rs6000_cost = &ppc405_cost;
4013 break;
4015 case PROCESSOR_PPC440:
4016 rs6000_cost = &ppc440_cost;
4017 break;
4019 case PROCESSOR_PPC476:
4020 rs6000_cost = &ppc476_cost;
4021 break;
4023 case PROCESSOR_PPC601:
4024 rs6000_cost = &ppc601_cost;
4025 break;
4027 case PROCESSOR_PPC603:
4028 rs6000_cost = &ppc603_cost;
4029 break;
4031 case PROCESSOR_PPC604:
4032 rs6000_cost = &ppc604_cost;
4033 break;
4035 case PROCESSOR_PPC604e:
4036 rs6000_cost = &ppc604e_cost;
4037 break;
4039 case PROCESSOR_PPC620:
4040 rs6000_cost = &ppc620_cost;
4041 break;
4043 case PROCESSOR_PPC630:
4044 rs6000_cost = &ppc630_cost;
4045 break;
4047 case PROCESSOR_CELL:
4048 rs6000_cost = &ppccell_cost;
4049 break;
4051 case PROCESSOR_PPC750:
4052 case PROCESSOR_PPC7400:
4053 rs6000_cost = &ppc750_cost;
4054 break;
4056 case PROCESSOR_PPC7450:
4057 rs6000_cost = &ppc7450_cost;
4058 break;
4060 case PROCESSOR_PPC8540:
4061 case PROCESSOR_PPC8548:
4062 rs6000_cost = &ppc8540_cost;
4063 break;
4065 case PROCESSOR_PPCE300C2:
4066 case PROCESSOR_PPCE300C3:
4067 rs6000_cost = &ppce300c2c3_cost;
4068 break;
4070 case PROCESSOR_PPCE500MC:
4071 rs6000_cost = &ppce500mc_cost;
4072 break;
4074 case PROCESSOR_PPCE500MC64:
4075 rs6000_cost = &ppce500mc64_cost;
4076 break;
4078 case PROCESSOR_PPCE5500:
4079 rs6000_cost = &ppce5500_cost;
4080 break;
4082 case PROCESSOR_PPCE6500:
4083 rs6000_cost = &ppce6500_cost;
4084 break;
4086 case PROCESSOR_TITAN:
4087 rs6000_cost = &titan_cost;
4088 break;
4090 case PROCESSOR_POWER4:
4091 case PROCESSOR_POWER5:
4092 rs6000_cost = &power4_cost;
4093 break;
4095 case PROCESSOR_POWER6:
4096 rs6000_cost = &power6_cost;
4097 break;
4099 case PROCESSOR_POWER7:
4100 rs6000_cost = &power7_cost;
4101 break;
4103 case PROCESSOR_POWER8:
4104 rs6000_cost = &power8_cost;
4105 break;
4107 case PROCESSOR_PPCA2:
4108 rs6000_cost = &ppca2_cost;
4109 break;
4111 default:
4112 gcc_unreachable ();
4115 if (global_init_p)
4117 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4118 rs6000_cost->simultaneous_prefetches,
4119 global_options.x_param_values,
4120 global_options_set.x_param_values);
4121 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4122 global_options.x_param_values,
4123 global_options_set.x_param_values);
4124 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4125 rs6000_cost->cache_line_size,
4126 global_options.x_param_values,
4127 global_options_set.x_param_values);
4128 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4129 global_options.x_param_values,
4130 global_options_set.x_param_values);
4132 /* Increase loop peeling limits based on performance analysis. */
4133 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4134 global_options.x_param_values,
4135 global_options_set.x_param_values);
4136 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4137 global_options.x_param_values,
4138 global_options_set.x_param_values);
4140 /* If using typedef char *va_list, signal that
4141 __builtin_va_start (&ap, 0) can be optimized to
4142 ap = __builtin_next_arg (0). */
4143 if (DEFAULT_ABI != ABI_V4)
4144 targetm.expand_builtin_va_start = NULL;
4147 /* Set up single/double float flags.
4148 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4149 then set both flags. */
4150 if (TARGET_HARD_FLOAT && TARGET_FPRS
4151 && rs6000_single_float == 0 && rs6000_double_float == 0)
4152 rs6000_single_float = rs6000_double_float = 1;
4154 /* If not explicitly specified via option, decide whether to generate indexed
4155 load/store instructions. */
4156 if (TARGET_AVOID_XFORM == -1)
4157 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4158 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4159 need indexed accesses and the type used is the scalar type of the element
4160 being loaded or stored. */
4161 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4162 && !TARGET_ALTIVEC);
4164 /* Set the -mrecip options. */
4165 if (rs6000_recip_name)
4167 char *p = ASTRDUP (rs6000_recip_name);
4168 char *q;
4169 unsigned int mask, i;
4170 bool invert;
4172 while ((q = strtok (p, ",")) != NULL)
4174 p = NULL;
4175 if (*q == '!')
4177 invert = true;
4178 q++;
4180 else
4181 invert = false;
4183 if (!strcmp (q, "default"))
4184 mask = ((TARGET_RECIP_PRECISION)
4185 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4186 else
4188 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4189 if (!strcmp (q, recip_options[i].string))
4191 mask = recip_options[i].mask;
4192 break;
4195 if (i == ARRAY_SIZE (recip_options))
4197 error ("unknown option for -mrecip=%s", q);
4198 invert = false;
4199 mask = 0;
4200 ret = false;
4204 if (invert)
4205 rs6000_recip_control &= ~mask;
4206 else
4207 rs6000_recip_control |= mask;
4211 /* Set the builtin mask of the various options used that could affect which
4212 builtins were used. In the past we used target_flags, but we've run out
4213 of bits, and some options like SPE and PAIRED are no longer in
4214 target_flags. */
4215 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4216 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4218 fprintf (stderr,
4219 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4220 rs6000_builtin_mask);
4221 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4224 /* Initialize all of the registers. */
4225 rs6000_init_hard_regno_mode_ok (global_init_p);
4227 /* Save the initial options in case the user does function specific options */
4228 if (global_init_p)
4229 target_option_default_node = target_option_current_node
4230 = build_target_option_node (&global_options);
4232 /* If not explicitly specified via option, decide whether to generate the
4233 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4234 if (TARGET_LINK_STACK == -1)
4235 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4237 return ret;
4240 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4241 define the target cpu type. */
4243 static void
4244 rs6000_option_override (void)
4246 (void) rs6000_option_override_internal (true);
4248 /* Register machine-specific passes. This needs to be done at start-up.
4249 It's convenient to do it here (like i386 does). */
4250 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4252 struct register_pass_info analyze_swaps_info
4253 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4255 register_pass (&analyze_swaps_info);
4259 /* Implement targetm.vectorize.builtin_mask_for_load. */
4260 static tree
4261 rs6000_builtin_mask_for_load (void)
4263 if (TARGET_ALTIVEC || TARGET_VSX)
4264 return altivec_builtin_mask_for_load;
4265 else
4266 return 0;
4269 /* Implement LOOP_ALIGN. */
4271 rs6000_loop_align (rtx label)
4273 basic_block bb;
4274 int ninsns;
4276 /* Don't override loop alignment if -falign-loops was specified. */
4277 if (!can_override_loop_align)
4278 return align_loops_log;
4280 bb = BLOCK_FOR_INSN (label);
4281 ninsns = num_loop_insns(bb->loop_father);
4283 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4284 if (ninsns > 4 && ninsns <= 8
4285 && (rs6000_cpu == PROCESSOR_POWER4
4286 || rs6000_cpu == PROCESSOR_POWER5
4287 || rs6000_cpu == PROCESSOR_POWER6
4288 || rs6000_cpu == PROCESSOR_POWER7
4289 || rs6000_cpu == PROCESSOR_POWER8))
4290 return 5;
4291 else
4292 return align_loops_log;
4295 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4296 static int
4297 rs6000_loop_align_max_skip (rtx_insn *label)
4299 return (1 << rs6000_loop_align (label)) - 1;
4302 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4303 after applying N number of iterations. This routine does not determine
4304 how may iterations are required to reach desired alignment. */
4306 static bool
4307 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4309 if (is_packed)
4310 return false;
4312 if (TARGET_32BIT)
4314 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4315 return true;
4317 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4318 return true;
4320 return false;
4322 else
4324 if (TARGET_MACHO)
4325 return false;
4327 /* Assuming that all other types are naturally aligned. CHECKME! */
4328 return true;
4332 /* Return true if the vector misalignment factor is supported by the
4333 target. */
4334 static bool
4335 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4336 const_tree type,
4337 int misalignment,
4338 bool is_packed)
4340 if (TARGET_VSX)
4342 /* Return if movmisalign pattern is not supported for this mode. */
4343 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4344 return false;
4346 if (misalignment == -1)
4348 /* Misalignment factor is unknown at compile time but we know
4349 it's word aligned. */
4350 if (rs6000_vector_alignment_reachable (type, is_packed))
4352 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4354 if (element_size == 64 || element_size == 32)
4355 return true;
4358 return false;
4361 /* VSX supports word-aligned vector. */
4362 if (misalignment % 4 == 0)
4363 return true;
4365 return false;
4368 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4369 static int
4370 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4371 tree vectype, int misalign)
4373 unsigned elements;
4374 tree elem_type;
4376 switch (type_of_cost)
4378 case scalar_stmt:
4379 case scalar_load:
4380 case scalar_store:
4381 case vector_stmt:
4382 case vector_load:
4383 case vector_store:
4384 case vec_to_scalar:
4385 case scalar_to_vec:
4386 case cond_branch_not_taken:
4387 return 1;
4389 case vec_perm:
4390 if (TARGET_VSX)
4391 return 3;
4392 else
4393 return 1;
4395 case vec_promote_demote:
4396 if (TARGET_VSX)
4397 return 4;
4398 else
4399 return 1;
4401 case cond_branch_taken:
4402 return 3;
4404 case unaligned_load:
4405 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4407 elements = TYPE_VECTOR_SUBPARTS (vectype);
4408 if (elements == 2)
4409 /* Double word aligned. */
4410 return 2;
4412 if (elements == 4)
4414 switch (misalign)
4416 case 8:
4417 /* Double word aligned. */
4418 return 2;
4420 case -1:
4421 /* Unknown misalignment. */
4422 case 4:
4423 case 12:
4424 /* Word aligned. */
4425 return 22;
4427 default:
4428 gcc_unreachable ();
4433 if (TARGET_ALTIVEC)
4434 /* Misaligned loads are not supported. */
4435 gcc_unreachable ();
4437 return 2;
4439 case unaligned_store:
4440 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4442 elements = TYPE_VECTOR_SUBPARTS (vectype);
4443 if (elements == 2)
4444 /* Double word aligned. */
4445 return 2;
4447 if (elements == 4)
4449 switch (misalign)
4451 case 8:
4452 /* Double word aligned. */
4453 return 2;
4455 case -1:
4456 /* Unknown misalignment. */
4457 case 4:
4458 case 12:
4459 /* Word aligned. */
4460 return 23;
4462 default:
4463 gcc_unreachable ();
4468 if (TARGET_ALTIVEC)
4469 /* Misaligned stores are not supported. */
4470 gcc_unreachable ();
4472 return 2;
4474 case vec_construct:
4475 elements = TYPE_VECTOR_SUBPARTS (vectype);
4476 elem_type = TREE_TYPE (vectype);
4477 /* 32-bit vectors loaded into registers are stored as double
4478 precision, so we need n/2 converts in addition to the usual
4479 n/2 merges to construct a vector of short floats from them. */
4480 if (SCALAR_FLOAT_TYPE_P (elem_type)
4481 && TYPE_PRECISION (elem_type) == 32)
4482 return elements + 1;
4483 else
4484 return elements / 2 + 1;
4486 default:
4487 gcc_unreachable ();
4491 /* Implement targetm.vectorize.preferred_simd_mode. */
4493 static machine_mode
4494 rs6000_preferred_simd_mode (machine_mode mode)
4496 if (TARGET_VSX)
4497 switch (mode)
4499 case DFmode:
4500 return V2DFmode;
4501 default:;
4503 if (TARGET_ALTIVEC || TARGET_VSX)
4504 switch (mode)
4506 case SFmode:
4507 return V4SFmode;
4508 case TImode:
4509 return V1TImode;
4510 case DImode:
4511 return V2DImode;
4512 case SImode:
4513 return V4SImode;
4514 case HImode:
4515 return V8HImode;
4516 case QImode:
4517 return V16QImode;
4518 default:;
4520 if (TARGET_SPE)
4521 switch (mode)
4523 case SFmode:
4524 return V2SFmode;
4525 case SImode:
4526 return V2SImode;
4527 default:;
4529 if (TARGET_PAIRED_FLOAT
4530 && mode == SFmode)
4531 return V2SFmode;
4532 return word_mode;
4535 typedef struct _rs6000_cost_data
4537 struct loop *loop_info;
4538 unsigned cost[3];
4539 } rs6000_cost_data;
4541 /* Test for likely overcommitment of vector hardware resources. If a
4542 loop iteration is relatively large, and too large a percentage of
4543 instructions in the loop are vectorized, the cost model may not
4544 adequately reflect delays from unavailable vector resources.
4545 Penalize the loop body cost for this case. */
4547 static void
4548 rs6000_density_test (rs6000_cost_data *data)
4550 const int DENSITY_PCT_THRESHOLD = 85;
4551 const int DENSITY_SIZE_THRESHOLD = 70;
4552 const int DENSITY_PENALTY = 10;
4553 struct loop *loop = data->loop_info;
4554 basic_block *bbs = get_loop_body (loop);
4555 int nbbs = loop->num_nodes;
4556 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4557 int i, density_pct;
4559 for (i = 0; i < nbbs; i++)
4561 basic_block bb = bbs[i];
4562 gimple_stmt_iterator gsi;
4564 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4566 gimple stmt = gsi_stmt (gsi);
4567 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4569 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4570 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4571 not_vec_cost++;
4575 free (bbs);
4576 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4578 if (density_pct > DENSITY_PCT_THRESHOLD
4579 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4581 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4582 if (dump_enabled_p ())
4583 dump_printf_loc (MSG_NOTE, vect_location,
4584 "density %d%%, cost %d exceeds threshold, penalizing "
4585 "loop body cost by %d%%", density_pct,
4586 vec_cost + not_vec_cost, DENSITY_PENALTY);
4590 /* Implement targetm.vectorize.init_cost. */
4592 static void *
4593 rs6000_init_cost (struct loop *loop_info)
4595 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4596 data->loop_info = loop_info;
4597 data->cost[vect_prologue] = 0;
4598 data->cost[vect_body] = 0;
4599 data->cost[vect_epilogue] = 0;
4600 return data;
4603 /* Implement targetm.vectorize.add_stmt_cost. */
4605 static unsigned
4606 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4607 struct _stmt_vec_info *stmt_info, int misalign,
4608 enum vect_cost_model_location where)
4610 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4611 unsigned retval = 0;
4613 if (flag_vect_cost_model)
4615 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4616 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4617 misalign);
4618 /* Statements in an inner loop relative to the loop being
4619 vectorized are weighted more heavily. The value here is
4620 arbitrary and could potentially be improved with analysis. */
4621 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4622 count *= 50; /* FIXME. */
4624 retval = (unsigned) (count * stmt_cost);
4625 cost_data->cost[where] += retval;
4628 return retval;
4631 /* Implement targetm.vectorize.finish_cost. */
4633 static void
4634 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4635 unsigned *body_cost, unsigned *epilogue_cost)
4637 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4639 if (cost_data->loop_info)
4640 rs6000_density_test (cost_data);
4642 *prologue_cost = cost_data->cost[vect_prologue];
4643 *body_cost = cost_data->cost[vect_body];
4644 *epilogue_cost = cost_data->cost[vect_epilogue];
4647 /* Implement targetm.vectorize.destroy_cost_data. */
4649 static void
4650 rs6000_destroy_cost_data (void *data)
4652 free (data);
4655 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4656 library with vectorized intrinsics. */
4658 static tree
4659 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4661 char name[32];
4662 const char *suffix = NULL;
4663 tree fntype, new_fndecl, bdecl = NULL_TREE;
4664 int n_args = 1;
4665 const char *bname;
4666 machine_mode el_mode, in_mode;
4667 int n, in_n;
4669 /* Libmass is suitable for unsafe math only as it does not correctly support
4670 parts of IEEE with the required precision such as denormals. Only support
4671 it if we have VSX to use the simd d2 or f4 functions.
4672 XXX: Add variable length support. */
4673 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4674 return NULL_TREE;
4676 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4677 n = TYPE_VECTOR_SUBPARTS (type_out);
4678 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4679 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4680 if (el_mode != in_mode
4681 || n != in_n)
4682 return NULL_TREE;
4684 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4686 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4687 switch (fn)
4689 case BUILT_IN_ATAN2:
4690 case BUILT_IN_HYPOT:
4691 case BUILT_IN_POW:
4692 n_args = 2;
4693 /* fall through */
4695 case BUILT_IN_ACOS:
4696 case BUILT_IN_ACOSH:
4697 case BUILT_IN_ASIN:
4698 case BUILT_IN_ASINH:
4699 case BUILT_IN_ATAN:
4700 case BUILT_IN_ATANH:
4701 case BUILT_IN_CBRT:
4702 case BUILT_IN_COS:
4703 case BUILT_IN_COSH:
4704 case BUILT_IN_ERF:
4705 case BUILT_IN_ERFC:
4706 case BUILT_IN_EXP2:
4707 case BUILT_IN_EXP:
4708 case BUILT_IN_EXPM1:
4709 case BUILT_IN_LGAMMA:
4710 case BUILT_IN_LOG10:
4711 case BUILT_IN_LOG1P:
4712 case BUILT_IN_LOG2:
4713 case BUILT_IN_LOG:
4714 case BUILT_IN_SIN:
4715 case BUILT_IN_SINH:
4716 case BUILT_IN_SQRT:
4717 case BUILT_IN_TAN:
4718 case BUILT_IN_TANH:
4719 bdecl = builtin_decl_implicit (fn);
4720 suffix = "d2"; /* pow -> powd2 */
4721 if (el_mode != DFmode
4722 || n != 2
4723 || !bdecl)
4724 return NULL_TREE;
4725 break;
4727 case BUILT_IN_ATAN2F:
4728 case BUILT_IN_HYPOTF:
4729 case BUILT_IN_POWF:
4730 n_args = 2;
4731 /* fall through */
4733 case BUILT_IN_ACOSF:
4734 case BUILT_IN_ACOSHF:
4735 case BUILT_IN_ASINF:
4736 case BUILT_IN_ASINHF:
4737 case BUILT_IN_ATANF:
4738 case BUILT_IN_ATANHF:
4739 case BUILT_IN_CBRTF:
4740 case BUILT_IN_COSF:
4741 case BUILT_IN_COSHF:
4742 case BUILT_IN_ERFF:
4743 case BUILT_IN_ERFCF:
4744 case BUILT_IN_EXP2F:
4745 case BUILT_IN_EXPF:
4746 case BUILT_IN_EXPM1F:
4747 case BUILT_IN_LGAMMAF:
4748 case BUILT_IN_LOG10F:
4749 case BUILT_IN_LOG1PF:
4750 case BUILT_IN_LOG2F:
4751 case BUILT_IN_LOGF:
4752 case BUILT_IN_SINF:
4753 case BUILT_IN_SINHF:
4754 case BUILT_IN_SQRTF:
4755 case BUILT_IN_TANF:
4756 case BUILT_IN_TANHF:
4757 bdecl = builtin_decl_implicit (fn);
4758 suffix = "4"; /* powf -> powf4 */
4759 if (el_mode != SFmode
4760 || n != 4
4761 || !bdecl)
4762 return NULL_TREE;
4763 break;
4765 default:
4766 return NULL_TREE;
4769 else
4770 return NULL_TREE;
4772 gcc_assert (suffix != NULL);
4773 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4774 if (!bname)
4775 return NULL_TREE;
4777 strcpy (name, bname + sizeof ("__builtin_") - 1);
4778 strcat (name, suffix);
4780 if (n_args == 1)
4781 fntype = build_function_type_list (type_out, type_in, NULL);
4782 else if (n_args == 2)
4783 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4784 else
4785 gcc_unreachable ();
4787 /* Build a function declaration for the vectorized function. */
4788 new_fndecl = build_decl (BUILTINS_LOCATION,
4789 FUNCTION_DECL, get_identifier (name), fntype);
4790 TREE_PUBLIC (new_fndecl) = 1;
4791 DECL_EXTERNAL (new_fndecl) = 1;
4792 DECL_IS_NOVOPS (new_fndecl) = 1;
4793 TREE_READONLY (new_fndecl) = 1;
4795 return new_fndecl;
4798 /* Returns a function decl for a vectorized version of the builtin function
4799 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4800 if it is not available. */
4802 static tree
4803 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4804 tree type_in)
4806 machine_mode in_mode, out_mode;
4807 int in_n, out_n;
4809 if (TARGET_DEBUG_BUILTIN)
4810 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4811 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4812 GET_MODE_NAME (TYPE_MODE (type_out)),
4813 GET_MODE_NAME (TYPE_MODE (type_in)));
4815 if (TREE_CODE (type_out) != VECTOR_TYPE
4816 || TREE_CODE (type_in) != VECTOR_TYPE
4817 || !TARGET_VECTORIZE_BUILTINS)
4818 return NULL_TREE;
4820 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4821 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4822 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4823 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4825 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4827 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4828 switch (fn)
4830 case BUILT_IN_CLZIMAX:
4831 case BUILT_IN_CLZLL:
4832 case BUILT_IN_CLZL:
4833 case BUILT_IN_CLZ:
4834 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4836 if (out_mode == QImode && out_n == 16)
4837 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4838 else if (out_mode == HImode && out_n == 8)
4839 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4840 else if (out_mode == SImode && out_n == 4)
4841 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4842 else if (out_mode == DImode && out_n == 2)
4843 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4845 break;
4846 case BUILT_IN_COPYSIGN:
4847 if (VECTOR_UNIT_VSX_P (V2DFmode)
4848 && out_mode == DFmode && out_n == 2
4849 && in_mode == DFmode && in_n == 2)
4850 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4851 break;
4852 case BUILT_IN_COPYSIGNF:
4853 if (out_mode != SFmode || out_n != 4
4854 || in_mode != SFmode || in_n != 4)
4855 break;
4856 if (VECTOR_UNIT_VSX_P (V4SFmode))
4857 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4858 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4859 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4860 break;
4861 case BUILT_IN_POPCOUNTIMAX:
4862 case BUILT_IN_POPCOUNTLL:
4863 case BUILT_IN_POPCOUNTL:
4864 case BUILT_IN_POPCOUNT:
4865 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4867 if (out_mode == QImode && out_n == 16)
4868 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4869 else if (out_mode == HImode && out_n == 8)
4870 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4871 else if (out_mode == SImode && out_n == 4)
4872 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4873 else if (out_mode == DImode && out_n == 2)
4874 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4876 break;
4877 case BUILT_IN_SQRT:
4878 if (VECTOR_UNIT_VSX_P (V2DFmode)
4879 && out_mode == DFmode && out_n == 2
4880 && in_mode == DFmode && in_n == 2)
4881 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4882 break;
4883 case BUILT_IN_SQRTF:
4884 if (VECTOR_UNIT_VSX_P (V4SFmode)
4885 && out_mode == SFmode && out_n == 4
4886 && in_mode == SFmode && in_n == 4)
4887 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4888 break;
4889 case BUILT_IN_CEIL:
4890 if (VECTOR_UNIT_VSX_P (V2DFmode)
4891 && out_mode == DFmode && out_n == 2
4892 && in_mode == DFmode && in_n == 2)
4893 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4894 break;
4895 case BUILT_IN_CEILF:
4896 if (out_mode != SFmode || out_n != 4
4897 || in_mode != SFmode || in_n != 4)
4898 break;
4899 if (VECTOR_UNIT_VSX_P (V4SFmode))
4900 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4901 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4902 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4903 break;
4904 case BUILT_IN_FLOOR:
4905 if (VECTOR_UNIT_VSX_P (V2DFmode)
4906 && out_mode == DFmode && out_n == 2
4907 && in_mode == DFmode && in_n == 2)
4908 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4909 break;
4910 case BUILT_IN_FLOORF:
4911 if (out_mode != SFmode || out_n != 4
4912 || in_mode != SFmode || in_n != 4)
4913 break;
4914 if (VECTOR_UNIT_VSX_P (V4SFmode))
4915 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4916 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4917 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4918 break;
4919 case BUILT_IN_FMA:
4920 if (VECTOR_UNIT_VSX_P (V2DFmode)
4921 && out_mode == DFmode && out_n == 2
4922 && in_mode == DFmode && in_n == 2)
4923 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4924 break;
4925 case BUILT_IN_FMAF:
4926 if (VECTOR_UNIT_VSX_P (V4SFmode)
4927 && out_mode == SFmode && out_n == 4
4928 && in_mode == SFmode && in_n == 4)
4929 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4930 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4931 && out_mode == SFmode && out_n == 4
4932 && in_mode == SFmode && in_n == 4)
4933 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4934 break;
4935 case BUILT_IN_TRUNC:
4936 if (VECTOR_UNIT_VSX_P (V2DFmode)
4937 && out_mode == DFmode && out_n == 2
4938 && in_mode == DFmode && in_n == 2)
4939 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4940 break;
4941 case BUILT_IN_TRUNCF:
4942 if (out_mode != SFmode || out_n != 4
4943 || in_mode != SFmode || in_n != 4)
4944 break;
4945 if (VECTOR_UNIT_VSX_P (V4SFmode))
4946 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4947 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4948 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4949 break;
4950 case BUILT_IN_NEARBYINT:
4951 if (VECTOR_UNIT_VSX_P (V2DFmode)
4952 && flag_unsafe_math_optimizations
4953 && out_mode == DFmode && out_n == 2
4954 && in_mode == DFmode && in_n == 2)
4955 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4956 break;
4957 case BUILT_IN_NEARBYINTF:
4958 if (VECTOR_UNIT_VSX_P (V4SFmode)
4959 && flag_unsafe_math_optimizations
4960 && out_mode == SFmode && out_n == 4
4961 && in_mode == SFmode && in_n == 4)
4962 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4963 break;
4964 case BUILT_IN_RINT:
4965 if (VECTOR_UNIT_VSX_P (V2DFmode)
4966 && !flag_trapping_math
4967 && out_mode == DFmode && out_n == 2
4968 && in_mode == DFmode && in_n == 2)
4969 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4970 break;
4971 case BUILT_IN_RINTF:
4972 if (VECTOR_UNIT_VSX_P (V4SFmode)
4973 && !flag_trapping_math
4974 && out_mode == SFmode && out_n == 4
4975 && in_mode == SFmode && in_n == 4)
4976 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4977 break;
4978 default:
4979 break;
4983 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4985 enum rs6000_builtins fn
4986 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4987 switch (fn)
4989 case RS6000_BUILTIN_RSQRTF:
4990 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4991 && out_mode == SFmode && out_n == 4
4992 && in_mode == SFmode && in_n == 4)
4993 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4994 break;
4995 case RS6000_BUILTIN_RSQRT:
4996 if (VECTOR_UNIT_VSX_P (V2DFmode)
4997 && out_mode == DFmode && out_n == 2
4998 && in_mode == DFmode && in_n == 2)
4999 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5000 break;
5001 case RS6000_BUILTIN_RECIPF:
5002 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5003 && out_mode == SFmode && out_n == 4
5004 && in_mode == SFmode && in_n == 4)
5005 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5006 break;
5007 case RS6000_BUILTIN_RECIP:
5008 if (VECTOR_UNIT_VSX_P (V2DFmode)
5009 && out_mode == DFmode && out_n == 2
5010 && in_mode == DFmode && in_n == 2)
5011 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5012 break;
5013 default:
5014 break;
5018 /* Generate calls to libmass if appropriate. */
5019 if (rs6000_veclib_handler)
5020 return rs6000_veclib_handler (fndecl, type_out, type_in);
5022 return NULL_TREE;
5025 /* Default CPU string for rs6000*_file_start functions. */
5026 static const char *rs6000_default_cpu;
5028 /* Do anything needed at the start of the asm file. */
5030 static void
5031 rs6000_file_start (void)
5033 char buffer[80];
5034 const char *start = buffer;
5035 FILE *file = asm_out_file;
5037 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5039 default_file_start ();
5041 if (flag_verbose_asm)
5043 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5045 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5047 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5048 start = "";
5051 if (global_options_set.x_rs6000_cpu_index)
5053 fprintf (file, "%s -mcpu=%s", start,
5054 processor_target_table[rs6000_cpu_index].name);
5055 start = "";
5058 if (global_options_set.x_rs6000_tune_index)
5060 fprintf (file, "%s -mtune=%s", start,
5061 processor_target_table[rs6000_tune_index].name);
5062 start = "";
5065 if (PPC405_ERRATUM77)
5067 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5068 start = "";
5071 #ifdef USING_ELFOS_H
5072 switch (rs6000_sdata)
5074 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5075 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5076 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5077 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5080 if (rs6000_sdata && g_switch_value)
5082 fprintf (file, "%s -G %d", start,
5083 g_switch_value);
5084 start = "";
5086 #endif
5088 if (*start == '\0')
5089 putc ('\n', file);
5092 #ifdef USING_ELFOS_H
5093 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5094 || !global_options_set.x_rs6000_cpu_index)
5096 fputs ("\t.machine ", asm_out_file);
5097 if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5098 fputs ("power8\n", asm_out_file);
5099 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5100 fputs ("power7\n", asm_out_file);
5101 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5102 fputs ("power6\n", asm_out_file);
5103 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5104 fputs ("power5\n", asm_out_file);
5105 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5106 fputs ("power4\n", asm_out_file);
5107 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5108 fputs ("ppc64\n", asm_out_file);
5109 else
5110 fputs ("ppc\n", asm_out_file);
5112 #endif
5114 if (DEFAULT_ABI == ABI_ELFv2)
5115 fprintf (file, "\t.abiversion 2\n");
5117 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5118 || (TARGET_ELF && flag_pic == 2))
5120 switch_to_section (toc_section);
5121 switch_to_section (text_section);
5126 /* Return nonzero if this function is known to have a null epilogue. */
5129 direct_return (void)
5131 if (reload_completed)
5133 rs6000_stack_t *info = rs6000_stack_info ();
5135 if (info->first_gp_reg_save == 32
5136 && info->first_fp_reg_save == 64
5137 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5138 && ! info->lr_save_p
5139 && ! info->cr_save_p
5140 && info->vrsave_mask == 0
5141 && ! info->push_p)
5142 return 1;
5145 return 0;
5148 /* Return the number of instructions it takes to form a constant in an
5149 integer register. */
5152 num_insns_constant_wide (HOST_WIDE_INT value)
5154 /* signed constant loadable with addi */
5155 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5156 return 1;
5158 /* constant loadable with addis */
5159 else if ((value & 0xffff) == 0
5160 && (value >> 31 == -1 || value >> 31 == 0))
5161 return 1;
5163 else if (TARGET_POWERPC64)
5165 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5166 HOST_WIDE_INT high = value >> 31;
5168 if (high == 0 || high == -1)
5169 return 2;
5171 high >>= 1;
5173 if (low == 0)
5174 return num_insns_constant_wide (high) + 1;
5175 else if (high == 0)
5176 return num_insns_constant_wide (low) + 1;
5177 else
5178 return (num_insns_constant_wide (high)
5179 + num_insns_constant_wide (low) + 1);
5182 else
5183 return 2;
5187 num_insns_constant (rtx op, machine_mode mode)
5189 HOST_WIDE_INT low, high;
5191 switch (GET_CODE (op))
5193 case CONST_INT:
5194 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5195 && mask64_operand (op, mode))
5196 return 2;
5197 else
5198 return num_insns_constant_wide (INTVAL (op));
5200 case CONST_WIDE_INT:
5202 int i;
5203 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5204 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5205 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5206 return ins;
5209 case CONST_DOUBLE:
5210 if (mode == SFmode || mode == SDmode)
5212 long l;
5213 REAL_VALUE_TYPE rv;
5215 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5216 if (DECIMAL_FLOAT_MODE_P (mode))
5217 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5218 else
5219 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5220 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5223 long l[2];
5224 REAL_VALUE_TYPE rv;
5226 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5227 if (DECIMAL_FLOAT_MODE_P (mode))
5228 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5229 else
5230 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5231 high = l[WORDS_BIG_ENDIAN == 0];
5232 low = l[WORDS_BIG_ENDIAN != 0];
5234 if (TARGET_32BIT)
5235 return (num_insns_constant_wide (low)
5236 + num_insns_constant_wide (high));
5237 else
5239 if ((high == 0 && low >= 0)
5240 || (high == -1 && low < 0))
5241 return num_insns_constant_wide (low);
5243 else if (mask64_operand (op, mode))
5244 return 2;
5246 else if (low == 0)
5247 return num_insns_constant_wide (high) + 1;
5249 else
5250 return (num_insns_constant_wide (high)
5251 + num_insns_constant_wide (low) + 1);
5254 default:
5255 gcc_unreachable ();
5259 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5260 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5261 corresponding element of the vector, but for V4SFmode and V2SFmode,
5262 the corresponding "float" is interpreted as an SImode integer. */
5264 HOST_WIDE_INT
5265 const_vector_elt_as_int (rtx op, unsigned int elt)
5267 rtx tmp;
5269 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5270 gcc_assert (GET_MODE (op) != V2DImode
5271 && GET_MODE (op) != V2DFmode);
5273 tmp = CONST_VECTOR_ELT (op, elt);
5274 if (GET_MODE (op) == V4SFmode
5275 || GET_MODE (op) == V2SFmode)
5276 tmp = gen_lowpart (SImode, tmp);
5277 return INTVAL (tmp);
5280 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5281 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5282 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5283 all items are set to the same value and contain COPIES replicas of the
5284 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5285 operand and the others are set to the value of the operand's msb. */
5287 static bool
5288 vspltis_constant (rtx op, unsigned step, unsigned copies)
5290 machine_mode mode = GET_MODE (op);
5291 machine_mode inner = GET_MODE_INNER (mode);
5293 unsigned i;
5294 unsigned nunits;
5295 unsigned bitsize;
5296 unsigned mask;
5298 HOST_WIDE_INT val;
5299 HOST_WIDE_INT splat_val;
5300 HOST_WIDE_INT msb_val;
5302 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5303 return false;
5305 nunits = GET_MODE_NUNITS (mode);
5306 bitsize = GET_MODE_BITSIZE (inner);
5307 mask = GET_MODE_MASK (inner);
5309 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5310 splat_val = val;
5311 msb_val = val >= 0 ? 0 : -1;
5313 /* Construct the value to be splatted, if possible. If not, return 0. */
5314 for (i = 2; i <= copies; i *= 2)
5316 HOST_WIDE_INT small_val;
5317 bitsize /= 2;
5318 small_val = splat_val >> bitsize;
5319 mask >>= bitsize;
5320 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5321 return false;
5322 splat_val = small_val;
5325 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5326 if (EASY_VECTOR_15 (splat_val))
5329 /* Also check if we can splat, and then add the result to itself. Do so if
5330 the value is positive, of if the splat instruction is using OP's mode;
5331 for splat_val < 0, the splat and the add should use the same mode. */
5332 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5333 && (splat_val >= 0 || (step == 1 && copies == 1)))
5336 /* Also check if are loading up the most significant bit which can be done by
5337 loading up -1 and shifting the value left by -1. */
5338 else if (EASY_VECTOR_MSB (splat_val, inner))
5341 else
5342 return false;
5344 /* Check if VAL is present in every STEP-th element, and the
5345 other elements are filled with its most significant bit. */
5346 for (i = 1; i < nunits; ++i)
5348 HOST_WIDE_INT desired_val;
5349 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5350 if ((i & (step - 1)) == 0)
5351 desired_val = val;
5352 else
5353 desired_val = msb_val;
5355 if (desired_val != const_vector_elt_as_int (op, elt))
5356 return false;
5359 return true;
5363 /* Return true if OP is of the given MODE and can be synthesized
5364 with a vspltisb, vspltish or vspltisw. */
5366 bool
5367 easy_altivec_constant (rtx op, machine_mode mode)
5369 unsigned step, copies;
5371 if (mode == VOIDmode)
5372 mode = GET_MODE (op);
5373 else if (mode != GET_MODE (op))
5374 return false;
5376 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5377 constants. */
5378 if (mode == V2DFmode)
5379 return zero_constant (op, mode);
5381 else if (mode == V2DImode)
5383 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5384 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5385 return false;
5387 if (zero_constant (op, mode))
5388 return true;
5390 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5391 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5392 return true;
5394 return false;
5397 /* V1TImode is a special container for TImode. Ignore for now. */
5398 else if (mode == V1TImode)
5399 return false;
5401 /* Start with a vspltisw. */
5402 step = GET_MODE_NUNITS (mode) / 4;
5403 copies = 1;
5405 if (vspltis_constant (op, step, copies))
5406 return true;
5408 /* Then try with a vspltish. */
5409 if (step == 1)
5410 copies <<= 1;
5411 else
5412 step >>= 1;
5414 if (vspltis_constant (op, step, copies))
5415 return true;
5417 /* And finally a vspltisb. */
5418 if (step == 1)
5419 copies <<= 1;
5420 else
5421 step >>= 1;
5423 if (vspltis_constant (op, step, copies))
5424 return true;
5426 return false;
5429 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5430 result is OP. Abort if it is not possible. */
5433 gen_easy_altivec_constant (rtx op)
5435 machine_mode mode = GET_MODE (op);
5436 int nunits = GET_MODE_NUNITS (mode);
5437 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5438 unsigned step = nunits / 4;
5439 unsigned copies = 1;
5441 /* Start with a vspltisw. */
5442 if (vspltis_constant (op, step, copies))
5443 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5445 /* Then try with a vspltish. */
5446 if (step == 1)
5447 copies <<= 1;
5448 else
5449 step >>= 1;
5451 if (vspltis_constant (op, step, copies))
5452 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5454 /* And finally a vspltisb. */
5455 if (step == 1)
5456 copies <<= 1;
5457 else
5458 step >>= 1;
5460 if (vspltis_constant (op, step, copies))
5461 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5463 gcc_unreachable ();
5466 const char *
5467 output_vec_const_move (rtx *operands)
5469 int cst, cst2;
5470 machine_mode mode;
5471 rtx dest, vec;
5473 dest = operands[0];
5474 vec = operands[1];
5475 mode = GET_MODE (dest);
5477 if (TARGET_VSX)
5479 if (zero_constant (vec, mode))
5480 return "xxlxor %x0,%x0,%x0";
5482 if ((mode == V2DImode || mode == V1TImode)
5483 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5484 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5485 return "vspltisw %0,-1";
5488 if (TARGET_ALTIVEC)
5490 rtx splat_vec;
5491 if (zero_constant (vec, mode))
5492 return "vxor %0,%0,%0";
5494 splat_vec = gen_easy_altivec_constant (vec);
5495 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5496 operands[1] = XEXP (splat_vec, 0);
5497 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5498 return "#";
5500 switch (GET_MODE (splat_vec))
5502 case V4SImode:
5503 return "vspltisw %0,%1";
5505 case V8HImode:
5506 return "vspltish %0,%1";
5508 case V16QImode:
5509 return "vspltisb %0,%1";
5511 default:
5512 gcc_unreachable ();
5516 gcc_assert (TARGET_SPE);
5518 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5519 pattern of V1DI, V4HI, and V2SF.
5521 FIXME: We should probably return # and add post reload
5522 splitters for these, but this way is so easy ;-). */
5523 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5524 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5525 operands[1] = CONST_VECTOR_ELT (vec, 0);
5526 operands[2] = CONST_VECTOR_ELT (vec, 1);
5527 if (cst == cst2)
5528 return "li %0,%1\n\tevmergelo %0,%0,%0";
5529 else if (WORDS_BIG_ENDIAN)
5530 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5531 else
5532 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5535 /* Initialize TARGET of vector PAIRED to VALS. */
5537 void
5538 paired_expand_vector_init (rtx target, rtx vals)
5540 machine_mode mode = GET_MODE (target);
5541 int n_elts = GET_MODE_NUNITS (mode);
5542 int n_var = 0;
5543 rtx x, new_rtx, tmp, constant_op, op1, op2;
5544 int i;
5546 for (i = 0; i < n_elts; ++i)
5548 x = XVECEXP (vals, 0, i);
5549 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5550 ++n_var;
5552 if (n_var == 0)
5554 /* Load from constant pool. */
5555 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5556 return;
5559 if (n_var == 2)
5561 /* The vector is initialized only with non-constants. */
5562 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5563 XVECEXP (vals, 0, 1));
5565 emit_move_insn (target, new_rtx);
5566 return;
5569 /* One field is non-constant and the other one is a constant. Load the
5570 constant from the constant pool and use ps_merge instruction to
5571 construct the whole vector. */
5572 op1 = XVECEXP (vals, 0, 0);
5573 op2 = XVECEXP (vals, 0, 1);
5575 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5577 tmp = gen_reg_rtx (GET_MODE (constant_op));
5578 emit_move_insn (tmp, constant_op);
5580 if (CONSTANT_P (op1))
5581 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5582 else
5583 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5585 emit_move_insn (target, new_rtx);
5588 void
5589 paired_expand_vector_move (rtx operands[])
5591 rtx op0 = operands[0], op1 = operands[1];
5593 emit_move_insn (op0, op1);
5596 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5597 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5598 operands for the relation operation COND. This is a recursive
5599 function. */
5601 static void
5602 paired_emit_vector_compare (enum rtx_code rcode,
5603 rtx dest, rtx op0, rtx op1,
5604 rtx cc_op0, rtx cc_op1)
5606 rtx tmp = gen_reg_rtx (V2SFmode);
5607 rtx tmp1, max, min;
5609 gcc_assert (TARGET_PAIRED_FLOAT);
5610 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5612 switch (rcode)
5614 case LT:
5615 case LTU:
5616 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5617 return;
5618 case GE:
5619 case GEU:
5620 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5621 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5622 return;
5623 case LE:
5624 case LEU:
5625 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5626 return;
5627 case GT:
5628 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5629 return;
5630 case EQ:
5631 tmp1 = gen_reg_rtx (V2SFmode);
5632 max = gen_reg_rtx (V2SFmode);
5633 min = gen_reg_rtx (V2SFmode);
5634 gen_reg_rtx (V2SFmode);
5636 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5637 emit_insn (gen_selv2sf4
5638 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5639 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5640 emit_insn (gen_selv2sf4
5641 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5642 emit_insn (gen_subv2sf3 (tmp1, min, max));
5643 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5644 return;
5645 case NE:
5646 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5647 return;
5648 case UNLE:
5649 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5650 return;
5651 case UNLT:
5652 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5653 return;
5654 case UNGE:
5655 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5656 return;
5657 case UNGT:
5658 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5659 return;
5660 default:
5661 gcc_unreachable ();
5664 return;
5667 /* Emit vector conditional expression.
5668 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5669 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5672 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5673 rtx cond, rtx cc_op0, rtx cc_op1)
5675 enum rtx_code rcode = GET_CODE (cond);
5677 if (!TARGET_PAIRED_FLOAT)
5678 return 0;
5680 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5682 return 1;
5685 /* Initialize vector TARGET to VALS. */
5687 void
5688 rs6000_expand_vector_init (rtx target, rtx vals)
5690 machine_mode mode = GET_MODE (target);
5691 machine_mode inner_mode = GET_MODE_INNER (mode);
5692 int n_elts = GET_MODE_NUNITS (mode);
5693 int n_var = 0, one_var = -1;
5694 bool all_same = true, all_const_zero = true;
5695 rtx x, mem;
5696 int i;
5698 for (i = 0; i < n_elts; ++i)
5700 x = XVECEXP (vals, 0, i);
5701 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5702 ++n_var, one_var = i;
5703 else if (x != CONST0_RTX (inner_mode))
5704 all_const_zero = false;
5706 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5707 all_same = false;
5710 if (n_var == 0)
5712 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5713 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5714 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5716 /* Zero register. */
5717 emit_insn (gen_rtx_SET (VOIDmode, target,
5718 gen_rtx_XOR (mode, target, target)));
5719 return;
5721 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5723 /* Splat immediate. */
5724 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5725 return;
5727 else
5729 /* Load from constant pool. */
5730 emit_move_insn (target, const_vec);
5731 return;
5735 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5736 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5738 rtx op0 = XVECEXP (vals, 0, 0);
5739 rtx op1 = XVECEXP (vals, 0, 1);
5740 if (all_same)
5742 if (!MEM_P (op0) && !REG_P (op0))
5743 op0 = force_reg (inner_mode, op0);
5744 if (mode == V2DFmode)
5745 emit_insn (gen_vsx_splat_v2df (target, op0));
5746 else
5747 emit_insn (gen_vsx_splat_v2di (target, op0));
5749 else
5751 op0 = force_reg (inner_mode, op0);
5752 op1 = force_reg (inner_mode, op1);
5753 if (mode == V2DFmode)
5754 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5755 else
5756 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5758 return;
5761 /* With single precision floating point on VSX, know that internally single
5762 precision is actually represented as a double, and either make 2 V2DF
5763 vectors, and convert these vectors to single precision, or do one
5764 conversion, and splat the result to the other elements. */
5765 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5767 if (all_same)
5769 rtx freg = gen_reg_rtx (V4SFmode);
5770 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5771 rtx cvt = ((TARGET_XSCVDPSPN)
5772 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5773 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5775 emit_insn (cvt);
5776 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5778 else
5780 rtx dbl_even = gen_reg_rtx (V2DFmode);
5781 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5782 rtx flt_even = gen_reg_rtx (V4SFmode);
5783 rtx flt_odd = gen_reg_rtx (V4SFmode);
5784 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5785 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5786 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5787 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5789 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5790 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5791 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5792 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5793 rs6000_expand_extract_even (target, flt_even, flt_odd);
5795 return;
5798 /* Store value to stack temp. Load vector element. Splat. However, splat
5799 of 64-bit items is not supported on Altivec. */
5800 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5802 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5803 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5804 XVECEXP (vals, 0, 0));
5805 x = gen_rtx_UNSPEC (VOIDmode,
5806 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5807 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5808 gen_rtvec (2,
5809 gen_rtx_SET (VOIDmode,
5810 target, mem),
5811 x)));
5812 x = gen_rtx_VEC_SELECT (inner_mode, target,
5813 gen_rtx_PARALLEL (VOIDmode,
5814 gen_rtvec (1, const0_rtx)));
5815 emit_insn (gen_rtx_SET (VOIDmode, target,
5816 gen_rtx_VEC_DUPLICATE (mode, x)));
5817 return;
5820 /* One field is non-constant. Load constant then overwrite
5821 varying field. */
5822 if (n_var == 1)
5824 rtx copy = copy_rtx (vals);
5826 /* Load constant part of vector, substitute neighboring value for
5827 varying element. */
5828 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5829 rs6000_expand_vector_init (target, copy);
5831 /* Insert variable. */
5832 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5833 return;
5836 /* Construct the vector in memory one field at a time
5837 and load the whole vector. */
5838 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5839 for (i = 0; i < n_elts; i++)
5840 emit_move_insn (adjust_address_nv (mem, inner_mode,
5841 i * GET_MODE_SIZE (inner_mode)),
5842 XVECEXP (vals, 0, i));
5843 emit_move_insn (target, mem);
5846 /* Set field ELT of TARGET to VAL. */
5848 void
5849 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5851 machine_mode mode = GET_MODE (target);
5852 machine_mode inner_mode = GET_MODE_INNER (mode);
5853 rtx reg = gen_reg_rtx (mode);
5854 rtx mask, mem, x;
5855 int width = GET_MODE_SIZE (inner_mode);
5856 int i;
5858 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5860 rtx (*set_func) (rtx, rtx, rtx, rtx)
5861 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5862 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5863 return;
5866 /* Simplify setting single element vectors like V1TImode. */
5867 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5869 emit_move_insn (target, gen_lowpart (mode, val));
5870 return;
5873 /* Load single variable value. */
5874 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5875 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5876 x = gen_rtx_UNSPEC (VOIDmode,
5877 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5878 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5879 gen_rtvec (2,
5880 gen_rtx_SET (VOIDmode,
5881 reg, mem),
5882 x)));
5884 /* Linear sequence. */
5885 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5886 for (i = 0; i < 16; ++i)
5887 XVECEXP (mask, 0, i) = GEN_INT (i);
5889 /* Set permute mask to insert element into target. */
5890 for (i = 0; i < width; ++i)
5891 XVECEXP (mask, 0, elt*width + i)
5892 = GEN_INT (i + 0x10);
5893 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5895 if (BYTES_BIG_ENDIAN)
5896 x = gen_rtx_UNSPEC (mode,
5897 gen_rtvec (3, target, reg,
5898 force_reg (V16QImode, x)),
5899 UNSPEC_VPERM);
5900 else
5902 /* Invert selector. We prefer to generate VNAND on P8 so
5903 that future fusion opportunities can kick in, but must
5904 generate VNOR elsewhere. */
5905 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5906 rtx iorx = (TARGET_P8_VECTOR
5907 ? gen_rtx_IOR (V16QImode, notx, notx)
5908 : gen_rtx_AND (V16QImode, notx, notx));
5909 rtx tmp = gen_reg_rtx (V16QImode);
5910 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5912 /* Permute with operands reversed and adjusted selector. */
5913 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5914 UNSPEC_VPERM);
5917 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5920 /* Extract field ELT from VEC into TARGET. */
5922 void
5923 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5925 machine_mode mode = GET_MODE (vec);
5926 machine_mode inner_mode = GET_MODE_INNER (mode);
5927 rtx mem;
5929 if (VECTOR_MEM_VSX_P (mode))
5931 switch (mode)
5933 default:
5934 break;
5935 case V1TImode:
5936 gcc_assert (elt == 0 && inner_mode == TImode);
5937 emit_move_insn (target, gen_lowpart (TImode, vec));
5938 break;
5939 case V2DFmode:
5940 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5941 return;
5942 case V2DImode:
5943 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5944 return;
5945 case V4SFmode:
5946 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5947 return;
5951 /* Allocate mode-sized buffer. */
5952 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5954 emit_move_insn (mem, vec);
5956 /* Add offset to field within buffer matching vector element. */
5957 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5959 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5962 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5963 implement ANDing by the mask IN. */
5964 void
5965 build_mask64_2_operands (rtx in, rtx *out)
5967 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5968 int shift;
5970 gcc_assert (GET_CODE (in) == CONST_INT);
5972 c = INTVAL (in);
5973 if (c & 1)
5975 /* Assume c initially something like 0x00fff000000fffff. The idea
5976 is to rotate the word so that the middle ^^^^^^ group of zeros
5977 is at the MS end and can be cleared with an rldicl mask. We then
5978 rotate back and clear off the MS ^^ group of zeros with a
5979 second rldicl. */
5980 c = ~c; /* c == 0xff000ffffff00000 */
5981 lsb = c & -c; /* lsb == 0x0000000000100000 */
5982 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5983 c = ~c; /* c == 0x00fff000000fffff */
5984 c &= -lsb; /* c == 0x00fff00000000000 */
5985 lsb = c & -c; /* lsb == 0x0000100000000000 */
5986 c = ~c; /* c == 0xff000fffffffffff */
5987 c &= -lsb; /* c == 0xff00000000000000 */
5988 shift = 0;
5989 while ((lsb >>= 1) != 0)
5990 shift++; /* shift == 44 on exit from loop */
5991 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5992 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5993 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5995 else
5997 /* Assume c initially something like 0xff000f0000000000. The idea
5998 is to rotate the word so that the ^^^ middle group of zeros
5999 is at the LS end and can be cleared with an rldicr mask. We then
6000 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
6001 a second rldicr. */
6002 lsb = c & -c; /* lsb == 0x0000010000000000 */
6003 m2 = -lsb; /* m2 == 0xffffff0000000000 */
6004 c = ~c; /* c == 0x00fff0ffffffffff */
6005 c &= -lsb; /* c == 0x00fff00000000000 */
6006 lsb = c & -c; /* lsb == 0x0000100000000000 */
6007 c = ~c; /* c == 0xff000fffffffffff */
6008 c &= -lsb; /* c == 0xff00000000000000 */
6009 shift = 0;
6010 while ((lsb >>= 1) != 0)
6011 shift++; /* shift == 44 on exit from loop */
6012 m1 = ~c; /* m1 == 0x00ffffffffffffff */
6013 m1 >>= shift; /* m1 == 0x0000000000000fff */
6014 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
6017 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
6018 masks will be all 1's. We are guaranteed more than one transition. */
6019 out[0] = GEN_INT (64 - shift);
6020 out[1] = GEN_INT (m1);
6021 out[2] = GEN_INT (shift);
6022 out[3] = GEN_INT (m2);
6025 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6027 bool
6028 invalid_e500_subreg (rtx op, machine_mode mode)
6030 if (TARGET_E500_DOUBLE)
6032 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6033 subreg:TI and reg:TF. Decimal float modes are like integer
6034 modes (only low part of each register used) for this
6035 purpose. */
6036 if (GET_CODE (op) == SUBREG
6037 && (mode == SImode || mode == DImode || mode == TImode
6038 || mode == DDmode || mode == TDmode || mode == PTImode)
6039 && REG_P (SUBREG_REG (op))
6040 && (GET_MODE (SUBREG_REG (op)) == DFmode
6041 || GET_MODE (SUBREG_REG (op)) == TFmode))
6042 return true;
6044 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6045 reg:TI. */
6046 if (GET_CODE (op) == SUBREG
6047 && (mode == DFmode || mode == TFmode)
6048 && REG_P (SUBREG_REG (op))
6049 && (GET_MODE (SUBREG_REG (op)) == DImode
6050 || GET_MODE (SUBREG_REG (op)) == TImode
6051 || GET_MODE (SUBREG_REG (op)) == PTImode
6052 || GET_MODE (SUBREG_REG (op)) == DDmode
6053 || GET_MODE (SUBREG_REG (op)) == TDmode))
6054 return true;
6057 if (TARGET_SPE
6058 && GET_CODE (op) == SUBREG
6059 && mode == SImode
6060 && REG_P (SUBREG_REG (op))
6061 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6062 return true;
6064 return false;
6067 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6068 selects whether the alignment is abi mandated, optional, or
6069 both abi and optional alignment. */
6071 unsigned int
6072 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6074 if (how != align_opt)
6076 if (TREE_CODE (type) == VECTOR_TYPE)
6078 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6079 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6081 if (align < 64)
6082 align = 64;
6084 else if (align < 128)
6085 align = 128;
6087 else if (TARGET_E500_DOUBLE
6088 && TREE_CODE (type) == REAL_TYPE
6089 && TYPE_MODE (type) == DFmode)
6091 if (align < 64)
6092 align = 64;
6096 if (how != align_abi)
6098 if (TREE_CODE (type) == ARRAY_TYPE
6099 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6101 if (align < BITS_PER_WORD)
6102 align = BITS_PER_WORD;
6106 return align;
6109 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6111 bool
6112 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6114 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6116 if (computed != 128)
6118 static bool warned;
6119 if (!warned && warn_psabi)
6121 warned = true;
6122 inform (input_location,
6123 "the layout of aggregates containing vectors with"
6124 " %d-byte alignment has changed in GCC 5",
6125 computed / BITS_PER_UNIT);
6128 /* In current GCC there is no special case. */
6129 return false;
6132 return false;
6135 /* AIX increases natural record alignment to doubleword if the first
6136 field is an FP double while the FP fields remain word aligned. */
6138 unsigned int
6139 rs6000_special_round_type_align (tree type, unsigned int computed,
6140 unsigned int specified)
6142 unsigned int align = MAX (computed, specified);
6143 tree field = TYPE_FIELDS (type);
6145 /* Skip all non field decls */
6146 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6147 field = DECL_CHAIN (field);
6149 if (field != NULL && field != type)
6151 type = TREE_TYPE (field);
6152 while (TREE_CODE (type) == ARRAY_TYPE)
6153 type = TREE_TYPE (type);
6155 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6156 align = MAX (align, 64);
6159 return align;
6162 /* Darwin increases record alignment to the natural alignment of
6163 the first field. */
6165 unsigned int
6166 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6167 unsigned int specified)
6169 unsigned int align = MAX (computed, specified);
6171 if (TYPE_PACKED (type))
6172 return align;
6174 /* Find the first field, looking down into aggregates. */
6175 do {
6176 tree field = TYPE_FIELDS (type);
6177 /* Skip all non field decls */
6178 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6179 field = DECL_CHAIN (field);
6180 if (! field)
6181 break;
6182 /* A packed field does not contribute any extra alignment. */
6183 if (DECL_PACKED (field))
6184 return align;
6185 type = TREE_TYPE (field);
6186 while (TREE_CODE (type) == ARRAY_TYPE)
6187 type = TREE_TYPE (type);
6188 } while (AGGREGATE_TYPE_P (type));
6190 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6191 align = MAX (align, TYPE_ALIGN (type));
6193 return align;
6196 /* Return 1 for an operand in small memory on V.4/eabi. */
6199 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6200 machine_mode mode ATTRIBUTE_UNUSED)
6202 #if TARGET_ELF
6203 rtx sym_ref;
6205 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6206 return 0;
6208 if (DEFAULT_ABI != ABI_V4)
6209 return 0;
6211 /* Vector and float memory instructions have a limited offset on the
6212 SPE, so using a vector or float variable directly as an operand is
6213 not useful. */
6214 if (TARGET_SPE
6215 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6216 return 0;
6218 if (GET_CODE (op) == SYMBOL_REF)
6219 sym_ref = op;
6221 else if (GET_CODE (op) != CONST
6222 || GET_CODE (XEXP (op, 0)) != PLUS
6223 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6224 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6225 return 0;
6227 else
6229 rtx sum = XEXP (op, 0);
6230 HOST_WIDE_INT summand;
6232 /* We have to be careful here, because it is the referenced address
6233 that must be 32k from _SDA_BASE_, not just the symbol. */
6234 summand = INTVAL (XEXP (sum, 1));
6235 if (summand < 0 || summand > g_switch_value)
6236 return 0;
6238 sym_ref = XEXP (sum, 0);
6241 return SYMBOL_REF_SMALL_P (sym_ref);
6242 #else
6243 return 0;
6244 #endif
6247 /* Return true if either operand is a general purpose register. */
6249 bool
6250 gpr_or_gpr_p (rtx op0, rtx op1)
6252 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6253 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6256 /* Return true if this is a move direct operation between GPR registers and
6257 floating point/VSX registers. */
6259 bool
6260 direct_move_p (rtx op0, rtx op1)
6262 int regno0, regno1;
6264 if (!REG_P (op0) || !REG_P (op1))
6265 return false;
6267 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6268 return false;
6270 regno0 = REGNO (op0);
6271 regno1 = REGNO (op1);
6272 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6273 return false;
6275 if (INT_REGNO_P (regno0))
6276 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6278 else if (INT_REGNO_P (regno1))
6280 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6281 return true;
6283 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6284 return true;
6287 return false;
6290 /* Return true if this is a load or store quad operation. This function does
6291 not handle the atomic quad memory instructions. */
6293 bool
6294 quad_load_store_p (rtx op0, rtx op1)
6296 bool ret;
6298 if (!TARGET_QUAD_MEMORY)
6299 ret = false;
6301 else if (REG_P (op0) && MEM_P (op1))
6302 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6303 && quad_memory_operand (op1, GET_MODE (op1))
6304 && !reg_overlap_mentioned_p (op0, op1));
6306 else if (MEM_P (op0) && REG_P (op1))
6307 ret = (quad_memory_operand (op0, GET_MODE (op0))
6308 && quad_int_reg_operand (op1, GET_MODE (op1)));
6310 else
6311 ret = false;
6313 if (TARGET_DEBUG_ADDR)
6315 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6316 ret ? "true" : "false");
6317 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6320 return ret;
6323 /* Given an address, return a constant offset term if one exists. */
6325 static rtx
6326 address_offset (rtx op)
6328 if (GET_CODE (op) == PRE_INC
6329 || GET_CODE (op) == PRE_DEC)
6330 op = XEXP (op, 0);
6331 else if (GET_CODE (op) == PRE_MODIFY
6332 || GET_CODE (op) == LO_SUM)
6333 op = XEXP (op, 1);
6335 if (GET_CODE (op) == CONST)
6336 op = XEXP (op, 0);
6338 if (GET_CODE (op) == PLUS)
6339 op = XEXP (op, 1);
6341 if (CONST_INT_P (op))
6342 return op;
6344 return NULL_RTX;
6347 /* Return true if the MEM operand is a memory operand suitable for use
6348 with a (full width, possibly multiple) gpr load/store. On
6349 powerpc64 this means the offset must be divisible by 4.
6350 Implements 'Y' constraint.
6352 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6353 a constraint function we know the operand has satisfied a suitable
6354 memory predicate. Also accept some odd rtl generated by reload
6355 (see rs6000_legitimize_reload_address for various forms). It is
6356 important that reload rtl be accepted by appropriate constraints
6357 but not by the operand predicate.
6359 Offsetting a lo_sum should not be allowed, except where we know by
6360 alignment that a 32k boundary is not crossed, but see the ???
6361 comment in rs6000_legitimize_reload_address. Note that by
6362 "offsetting" here we mean a further offset to access parts of the
6363 MEM. It's fine to have a lo_sum where the inner address is offset
6364 from a sym, since the same sym+offset will appear in the high part
6365 of the address calculation. */
6367 bool
6368 mem_operand_gpr (rtx op, machine_mode mode)
6370 unsigned HOST_WIDE_INT offset;
6371 int extra;
6372 rtx addr = XEXP (op, 0);
6374 op = address_offset (addr);
6375 if (op == NULL_RTX)
6376 return true;
6378 offset = INTVAL (op);
6379 if (TARGET_POWERPC64 && (offset & 3) != 0)
6380 return false;
6382 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6383 if (extra < 0)
6384 extra = 0;
6386 if (GET_CODE (addr) == LO_SUM)
6387 /* For lo_sum addresses, we must allow any offset except one that
6388 causes a wrap, so test only the low 16 bits. */
6389 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6391 return offset + 0x8000 < 0x10000u - extra;
6394 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6396 static bool
6397 reg_offset_addressing_ok_p (machine_mode mode)
6399 switch (mode)
6401 case V16QImode:
6402 case V8HImode:
6403 case V4SFmode:
6404 case V4SImode:
6405 case V2DFmode:
6406 case V2DImode:
6407 case V1TImode:
6408 case TImode:
6409 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6410 TImode is not a vector mode, if we want to use the VSX registers to
6411 move it around, we need to restrict ourselves to reg+reg
6412 addressing. */
6413 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6414 return false;
6415 break;
6417 case V4HImode:
6418 case V2SImode:
6419 case V1DImode:
6420 case V2SFmode:
6421 /* Paired vector modes. Only reg+reg addressing is valid. */
6422 if (TARGET_PAIRED_FLOAT)
6423 return false;
6424 break;
6426 case SDmode:
6427 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6428 addressing for the LFIWZX and STFIWX instructions. */
6429 if (TARGET_NO_SDMODE_STACK)
6430 return false;
6431 break;
6433 default:
6434 break;
6437 return true;
6440 static bool
6441 virtual_stack_registers_memory_p (rtx op)
6443 int regnum;
6445 if (GET_CODE (op) == REG)
6446 regnum = REGNO (op);
6448 else if (GET_CODE (op) == PLUS
6449 && GET_CODE (XEXP (op, 0)) == REG
6450 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6451 regnum = REGNO (XEXP (op, 0));
6453 else
6454 return false;
6456 return (regnum >= FIRST_VIRTUAL_REGISTER
6457 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6460 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6461 is known to not straddle a 32k boundary. */
6463 static bool
6464 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6465 machine_mode mode)
6467 tree decl, type;
6468 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6470 if (GET_CODE (op) != SYMBOL_REF)
6471 return false;
6473 dsize = GET_MODE_SIZE (mode);
6474 decl = SYMBOL_REF_DECL (op);
6475 if (!decl)
6477 if (dsize == 0)
6478 return false;
6480 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6481 replacing memory addresses with an anchor plus offset. We
6482 could find the decl by rummaging around in the block->objects
6483 VEC for the given offset but that seems like too much work. */
6484 dalign = BITS_PER_UNIT;
6485 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6486 && SYMBOL_REF_ANCHOR_P (op)
6487 && SYMBOL_REF_BLOCK (op) != NULL)
6489 struct object_block *block = SYMBOL_REF_BLOCK (op);
6491 dalign = block->alignment;
6492 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6494 else if (CONSTANT_POOL_ADDRESS_P (op))
6496 /* It would be nice to have get_pool_align().. */
6497 machine_mode cmode = get_pool_mode (op);
6499 dalign = GET_MODE_ALIGNMENT (cmode);
6502 else if (DECL_P (decl))
6504 dalign = DECL_ALIGN (decl);
6506 if (dsize == 0)
6508 /* Allow BLKmode when the entire object is known to not
6509 cross a 32k boundary. */
6510 if (!DECL_SIZE_UNIT (decl))
6511 return false;
6513 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6514 return false;
6516 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6517 if (dsize > 32768)
6518 return false;
6520 return dalign / BITS_PER_UNIT >= dsize;
6523 else
6525 type = TREE_TYPE (decl);
6527 dalign = TYPE_ALIGN (type);
6528 if (CONSTANT_CLASS_P (decl))
6529 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6530 else
6531 dalign = DATA_ALIGNMENT (decl, dalign);
6533 if (dsize == 0)
6535 /* BLKmode, check the entire object. */
6536 if (TREE_CODE (decl) == STRING_CST)
6537 dsize = TREE_STRING_LENGTH (decl);
6538 else if (TYPE_SIZE_UNIT (type)
6539 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6540 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6541 else
6542 return false;
6543 if (dsize > 32768)
6544 return false;
6546 return dalign / BITS_PER_UNIT >= dsize;
6550 /* Find how many bits of the alignment we know for this access. */
6551 mask = dalign / BITS_PER_UNIT - 1;
6552 lsb = offset & -offset;
6553 mask &= lsb - 1;
6554 dalign = mask + 1;
6556 return dalign >= dsize;
6559 static bool
6560 constant_pool_expr_p (rtx op)
6562 rtx base, offset;
6564 split_const (op, &base, &offset);
6565 return (GET_CODE (base) == SYMBOL_REF
6566 && CONSTANT_POOL_ADDRESS_P (base)
6567 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6570 static const_rtx tocrel_base, tocrel_offset;
6572 /* Return true if OP is a toc pointer relative address (the output
6573 of create_TOC_reference). If STRICT, do not match high part or
6574 non-split -mcmodel=large/medium toc pointer relative addresses. */
6576 bool
6577 toc_relative_expr_p (const_rtx op, bool strict)
6579 if (!TARGET_TOC)
6580 return false;
6582 if (TARGET_CMODEL != CMODEL_SMALL)
6584 /* Only match the low part. */
6585 if (GET_CODE (op) == LO_SUM
6586 && REG_P (XEXP (op, 0))
6587 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6588 op = XEXP (op, 1);
6589 else if (strict)
6590 return false;
6593 tocrel_base = op;
6594 tocrel_offset = const0_rtx;
6595 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6597 tocrel_base = XEXP (op, 0);
6598 tocrel_offset = XEXP (op, 1);
6601 return (GET_CODE (tocrel_base) == UNSPEC
6602 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6605 /* Return true if X is a constant pool address, and also for cmodel=medium
6606 if X is a toc-relative address known to be offsettable within MODE. */
6608 bool
6609 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6610 bool strict)
6612 return (toc_relative_expr_p (x, strict)
6613 && (TARGET_CMODEL != CMODEL_MEDIUM
6614 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6615 || mode == QImode
6616 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6617 INTVAL (tocrel_offset), mode)));
6620 static bool
6621 legitimate_small_data_p (machine_mode mode, rtx x)
6623 return (DEFAULT_ABI == ABI_V4
6624 && !flag_pic && !TARGET_TOC
6625 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6626 && small_data_operand (x, mode));
6629 /* SPE offset addressing is limited to 5-bits worth of double words. */
6630 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6632 bool
6633 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6634 bool strict, bool worst_case)
6636 unsigned HOST_WIDE_INT offset;
6637 unsigned int extra;
6639 if (GET_CODE (x) != PLUS)
6640 return false;
6641 if (!REG_P (XEXP (x, 0)))
6642 return false;
6643 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6644 return false;
6645 if (!reg_offset_addressing_ok_p (mode))
6646 return virtual_stack_registers_memory_p (x);
6647 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6648 return true;
6649 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6650 return false;
6652 offset = INTVAL (XEXP (x, 1));
6653 extra = 0;
6654 switch (mode)
6656 case V4HImode:
6657 case V2SImode:
6658 case V1DImode:
6659 case V2SFmode:
6660 /* SPE vector modes. */
6661 return SPE_CONST_OFFSET_OK (offset);
6663 case DFmode:
6664 case DDmode:
6665 case DImode:
6666 /* On e500v2, we may have:
6668 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6670 Which gets addressed with evldd instructions. */
6671 if (TARGET_E500_DOUBLE)
6672 return SPE_CONST_OFFSET_OK (offset);
6674 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6675 addressing. */
6676 if (VECTOR_MEM_VSX_P (mode))
6677 return false;
6679 if (!worst_case)
6680 break;
6681 if (!TARGET_POWERPC64)
6682 extra = 4;
6683 else if (offset & 3)
6684 return false;
6685 break;
6687 case TFmode:
6688 if (TARGET_E500_DOUBLE)
6689 return (SPE_CONST_OFFSET_OK (offset)
6690 && SPE_CONST_OFFSET_OK (offset + 8));
6691 /* fall through */
6693 case TDmode:
6694 case TImode:
6695 case PTImode:
6696 extra = 8;
6697 if (!worst_case)
6698 break;
6699 if (!TARGET_POWERPC64)
6700 extra = 12;
6701 else if (offset & 3)
6702 return false;
6703 break;
6705 default:
6706 break;
6709 offset += 0x8000;
6710 return offset < 0x10000 - extra;
6713 bool
6714 legitimate_indexed_address_p (rtx x, int strict)
6716 rtx op0, op1;
6718 if (GET_CODE (x) != PLUS)
6719 return false;
6721 op0 = XEXP (x, 0);
6722 op1 = XEXP (x, 1);
6724 /* Recognize the rtl generated by reload which we know will later be
6725 replaced with proper base and index regs. */
6726 if (!strict
6727 && reload_in_progress
6728 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6729 && REG_P (op1))
6730 return true;
6732 return (REG_P (op0) && REG_P (op1)
6733 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6734 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6735 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6736 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6739 bool
6740 avoiding_indexed_address_p (machine_mode mode)
6742 /* Avoid indexed addressing for modes that have non-indexed
6743 load/store instruction forms. */
6744 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6747 bool
6748 legitimate_indirect_address_p (rtx x, int strict)
6750 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6753 bool
6754 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6756 if (!TARGET_MACHO || !flag_pic
6757 || mode != SImode || GET_CODE (x) != MEM)
6758 return false;
6759 x = XEXP (x, 0);
6761 if (GET_CODE (x) != LO_SUM)
6762 return false;
6763 if (GET_CODE (XEXP (x, 0)) != REG)
6764 return false;
6765 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6766 return false;
6767 x = XEXP (x, 1);
6769 return CONSTANT_P (x);
6772 static bool
6773 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6775 if (GET_CODE (x) != LO_SUM)
6776 return false;
6777 if (GET_CODE (XEXP (x, 0)) != REG)
6778 return false;
6779 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6780 return false;
6781 /* Restrict addressing for DI because of our SUBREG hackery. */
6782 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6783 return false;
6784 x = XEXP (x, 1);
6786 if (TARGET_ELF || TARGET_MACHO)
6788 bool large_toc_ok;
6790 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6791 return false;
6792 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6793 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6794 recognizes some LO_SUM addresses as valid although this
6795 function says opposite. In most cases, LRA through different
6796 transformations can generate correct code for address reloads.
6797 It can not manage only some LO_SUM cases. So we need to add
6798 code analogous to one in rs6000_legitimize_reload_address for
6799 LOW_SUM here saying that some addresses are still valid. */
6800 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6801 && small_toc_ref (x, VOIDmode));
6802 if (TARGET_TOC && ! large_toc_ok)
6803 return false;
6804 if (GET_MODE_NUNITS (mode) != 1)
6805 return false;
6806 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6807 && !(/* ??? Assume floating point reg based on mode? */
6808 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6809 && (mode == DFmode || mode == DDmode)))
6810 return false;
6812 return CONSTANT_P (x) || large_toc_ok;
6815 return false;
6819 /* Try machine-dependent ways of modifying an illegitimate address
6820 to be legitimate. If we find one, return the new, valid address.
6821 This is used from only one place: `memory_address' in explow.c.
6823 OLDX is the address as it was before break_out_memory_refs was
6824 called. In some cases it is useful to look at this to decide what
6825 needs to be done.
6827 It is always safe for this function to do nothing. It exists to
6828 recognize opportunities to optimize the output.
6830 On RS/6000, first check for the sum of a register with a constant
6831 integer that is out of range. If so, generate code to add the
6832 constant with the low-order 16 bits masked to the register and force
6833 this result into another register (this can be done with `cau').
6834 Then generate an address of REG+(CONST&0xffff), allowing for the
6835 possibility of bit 16 being a one.
6837 Then check for the sum of a register and something not constant, try to
6838 load the other things into a register and return the sum. */
6840 static rtx
6841 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6842 machine_mode mode)
6844 unsigned int extra;
6846 if (!reg_offset_addressing_ok_p (mode))
6848 if (virtual_stack_registers_memory_p (x))
6849 return x;
6851 /* In theory we should not be seeing addresses of the form reg+0,
6852 but just in case it is generated, optimize it away. */
6853 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6854 return force_reg (Pmode, XEXP (x, 0));
6856 /* For TImode with load/store quad, restrict addresses to just a single
6857 pointer, so it works with both GPRs and VSX registers. */
6858 /* Make sure both operands are registers. */
6859 else if (GET_CODE (x) == PLUS
6860 && (mode != TImode || !TARGET_QUAD_MEMORY))
6861 return gen_rtx_PLUS (Pmode,
6862 force_reg (Pmode, XEXP (x, 0)),
6863 force_reg (Pmode, XEXP (x, 1)));
6864 else
6865 return force_reg (Pmode, x);
6867 if (GET_CODE (x) == SYMBOL_REF)
6869 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6870 if (model != 0)
6871 return rs6000_legitimize_tls_address (x, model);
6874 extra = 0;
6875 switch (mode)
6877 case TFmode:
6878 case TDmode:
6879 case TImode:
6880 case PTImode:
6881 /* As in legitimate_offset_address_p we do not assume
6882 worst-case. The mode here is just a hint as to the registers
6883 used. A TImode is usually in gprs, but may actually be in
6884 fprs. Leave worst-case scenario for reload to handle via
6885 insn constraints. PTImode is only GPRs. */
6886 extra = 8;
6887 break;
6888 default:
6889 break;
6892 if (GET_CODE (x) == PLUS
6893 && GET_CODE (XEXP (x, 0)) == REG
6894 && GET_CODE (XEXP (x, 1)) == CONST_INT
6895 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6896 >= 0x10000 - extra)
6897 && !(SPE_VECTOR_MODE (mode)
6898 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6900 HOST_WIDE_INT high_int, low_int;
6901 rtx sum;
6902 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6903 if (low_int >= 0x8000 - extra)
6904 low_int = 0;
6905 high_int = INTVAL (XEXP (x, 1)) - low_int;
6906 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6907 GEN_INT (high_int)), 0);
6908 return plus_constant (Pmode, sum, low_int);
6910 else if (GET_CODE (x) == PLUS
6911 && GET_CODE (XEXP (x, 0)) == REG
6912 && GET_CODE (XEXP (x, 1)) != CONST_INT
6913 && GET_MODE_NUNITS (mode) == 1
6914 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6915 || (/* ??? Assume floating point reg based on mode? */
6916 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6917 && (mode == DFmode || mode == DDmode)))
6918 && !avoiding_indexed_address_p (mode))
6920 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6921 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6923 else if (SPE_VECTOR_MODE (mode)
6924 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6926 if (mode == DImode)
6927 return x;
6928 /* We accept [reg + reg] and [reg + OFFSET]. */
6930 if (GET_CODE (x) == PLUS)
6932 rtx op1 = XEXP (x, 0);
6933 rtx op2 = XEXP (x, 1);
6934 rtx y;
6936 op1 = force_reg (Pmode, op1);
6938 if (GET_CODE (op2) != REG
6939 && (GET_CODE (op2) != CONST_INT
6940 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6941 || (GET_MODE_SIZE (mode) > 8
6942 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6943 op2 = force_reg (Pmode, op2);
6945 /* We can't always do [reg + reg] for these, because [reg +
6946 reg + offset] is not a legitimate addressing mode. */
6947 y = gen_rtx_PLUS (Pmode, op1, op2);
6949 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6950 return force_reg (Pmode, y);
6951 else
6952 return y;
6955 return force_reg (Pmode, x);
6957 else if ((TARGET_ELF
6958 #if TARGET_MACHO
6959 || !MACHO_DYNAMIC_NO_PIC_P
6960 #endif
6962 && TARGET_32BIT
6963 && TARGET_NO_TOC
6964 && ! flag_pic
6965 && GET_CODE (x) != CONST_INT
6966 && GET_CODE (x) != CONST_WIDE_INT
6967 && GET_CODE (x) != CONST_DOUBLE
6968 && CONSTANT_P (x)
6969 && GET_MODE_NUNITS (mode) == 1
6970 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6971 || (/* ??? Assume floating point reg based on mode? */
6972 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6973 && (mode == DFmode || mode == DDmode))))
6975 rtx reg = gen_reg_rtx (Pmode);
6976 if (TARGET_ELF)
6977 emit_insn (gen_elf_high (reg, x));
6978 else
6979 emit_insn (gen_macho_high (reg, x));
6980 return gen_rtx_LO_SUM (Pmode, reg, x);
6982 else if (TARGET_TOC
6983 && GET_CODE (x) == SYMBOL_REF
6984 && constant_pool_expr_p (x)
6985 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6986 return create_TOC_reference (x, NULL_RTX);
6987 else
6988 return x;
6991 /* Debug version of rs6000_legitimize_address. */
6992 static rtx
6993 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
6995 rtx ret;
6996 rtx_insn *insns;
6998 start_sequence ();
6999 ret = rs6000_legitimize_address (x, oldx, mode);
7000 insns = get_insns ();
7001 end_sequence ();
7003 if (ret != x)
7005 fprintf (stderr,
7006 "\nrs6000_legitimize_address: mode %s, old code %s, "
7007 "new code %s, modified\n",
7008 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7009 GET_RTX_NAME (GET_CODE (ret)));
7011 fprintf (stderr, "Original address:\n");
7012 debug_rtx (x);
7014 fprintf (stderr, "oldx:\n");
7015 debug_rtx (oldx);
7017 fprintf (stderr, "New address:\n");
7018 debug_rtx (ret);
7020 if (insns)
7022 fprintf (stderr, "Insns added:\n");
7023 debug_rtx_list (insns, 20);
7026 else
7028 fprintf (stderr,
7029 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7030 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7032 debug_rtx (x);
7035 if (insns)
7036 emit_insn (insns);
7038 return ret;
7041 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7042 We need to emit DTP-relative relocations. */
7044 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7045 static void
7046 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7048 switch (size)
7050 case 4:
7051 fputs ("\t.long\t", file);
7052 break;
7053 case 8:
7054 fputs (DOUBLE_INT_ASM_OP, file);
7055 break;
7056 default:
7057 gcc_unreachable ();
7059 output_addr_const (file, x);
7060 fputs ("@dtprel+0x8000", file);
7063 /* Return true if X is a symbol that refers to real (rather than emulated)
7064 TLS. */
7066 static bool
7067 rs6000_real_tls_symbol_ref_p (rtx x)
7069 return (GET_CODE (x) == SYMBOL_REF
7070 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7073 /* In the name of slightly smaller debug output, and to cater to
7074 general assembler lossage, recognize various UNSPEC sequences
7075 and turn them back into a direct symbol reference. */
7077 static rtx
7078 rs6000_delegitimize_address (rtx orig_x)
7080 rtx x, y, offset;
7082 orig_x = delegitimize_mem_from_attrs (orig_x);
7083 x = orig_x;
7084 if (MEM_P (x))
7085 x = XEXP (x, 0);
7087 y = x;
7088 if (TARGET_CMODEL != CMODEL_SMALL
7089 && GET_CODE (y) == LO_SUM)
7090 y = XEXP (y, 1);
7092 offset = NULL_RTX;
7093 if (GET_CODE (y) == PLUS
7094 && GET_MODE (y) == Pmode
7095 && CONST_INT_P (XEXP (y, 1)))
7097 offset = XEXP (y, 1);
7098 y = XEXP (y, 0);
7101 if (GET_CODE (y) == UNSPEC
7102 && XINT (y, 1) == UNSPEC_TOCREL)
7104 y = XVECEXP (y, 0, 0);
7106 #ifdef HAVE_AS_TLS
7107 /* Do not associate thread-local symbols with the original
7108 constant pool symbol. */
7109 if (TARGET_XCOFF
7110 && GET_CODE (y) == SYMBOL_REF
7111 && CONSTANT_POOL_ADDRESS_P (y)
7112 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7113 return orig_x;
7114 #endif
7116 if (offset != NULL_RTX)
7117 y = gen_rtx_PLUS (Pmode, y, offset);
7118 if (!MEM_P (orig_x))
7119 return y;
7120 else
7121 return replace_equiv_address_nv (orig_x, y);
7124 if (TARGET_MACHO
7125 && GET_CODE (orig_x) == LO_SUM
7126 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7128 y = XEXP (XEXP (orig_x, 1), 0);
7129 if (GET_CODE (y) == UNSPEC
7130 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7131 return XVECEXP (y, 0, 0);
7134 return orig_x;
7137 /* Return true if X shouldn't be emitted into the debug info.
7138 The linker doesn't like .toc section references from
7139 .debug_* sections, so reject .toc section symbols. */
7141 static bool
7142 rs6000_const_not_ok_for_debug_p (rtx x)
7144 if (GET_CODE (x) == SYMBOL_REF
7145 && CONSTANT_POOL_ADDRESS_P (x))
7147 rtx c = get_pool_constant (x);
7148 machine_mode cmode = get_pool_mode (x);
7149 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7150 return true;
7153 return false;
7156 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7158 static GTY(()) rtx rs6000_tls_symbol;
7159 static rtx
7160 rs6000_tls_get_addr (void)
7162 if (!rs6000_tls_symbol)
7163 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7165 return rs6000_tls_symbol;
7168 /* Construct the SYMBOL_REF for TLS GOT references. */
7170 static GTY(()) rtx rs6000_got_symbol;
7171 static rtx
7172 rs6000_got_sym (void)
7174 if (!rs6000_got_symbol)
7176 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7177 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7178 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7181 return rs6000_got_symbol;
7184 /* AIX Thread-Local Address support. */
7186 static rtx
7187 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7189 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7190 const char *name;
7191 char *tlsname;
7193 name = XSTR (addr, 0);
7194 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7195 or the symbol will be in TLS private data section. */
7196 if (name[strlen (name) - 1] != ']'
7197 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7198 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7200 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7201 strcpy (tlsname, name);
7202 strcat (tlsname,
7203 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7204 tlsaddr = copy_rtx (addr);
7205 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7207 else
7208 tlsaddr = addr;
7210 /* Place addr into TOC constant pool. */
7211 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7213 /* Output the TOC entry and create the MEM referencing the value. */
7214 if (constant_pool_expr_p (XEXP (sym, 0))
7215 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7217 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7218 mem = gen_const_mem (Pmode, tocref);
7219 set_mem_alias_set (mem, get_TOC_alias_set ());
7221 else
7222 return sym;
7224 /* Use global-dynamic for local-dynamic. */
7225 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7226 || model == TLS_MODEL_LOCAL_DYNAMIC)
7228 /* Create new TOC reference for @m symbol. */
7229 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7230 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7231 strcpy (tlsname, "*LCM");
7232 strcat (tlsname, name + 3);
7233 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7234 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7235 tocref = create_TOC_reference (modaddr, NULL_RTX);
7236 rtx modmem = gen_const_mem (Pmode, tocref);
7237 set_mem_alias_set (modmem, get_TOC_alias_set ());
7239 rtx modreg = gen_reg_rtx (Pmode);
7240 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7242 tmpreg = gen_reg_rtx (Pmode);
7243 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7245 dest = gen_reg_rtx (Pmode);
7246 if (TARGET_32BIT)
7247 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7248 else
7249 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7250 return dest;
7252 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7253 else if (TARGET_32BIT)
7255 tlsreg = gen_reg_rtx (SImode);
7256 emit_insn (gen_tls_get_tpointer (tlsreg));
7258 else
7259 tlsreg = gen_rtx_REG (DImode, 13);
7261 /* Load the TOC value into temporary register. */
7262 tmpreg = gen_reg_rtx (Pmode);
7263 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7264 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7265 gen_rtx_MINUS (Pmode, addr, tlsreg));
7267 /* Add TOC symbol value to TLS pointer. */
7268 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7270 return dest;
7273 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7274 this (thread-local) address. */
7276 static rtx
7277 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7279 rtx dest, insn;
7281 if (TARGET_XCOFF)
7282 return rs6000_legitimize_tls_address_aix (addr, model);
7284 dest = gen_reg_rtx (Pmode);
7285 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7287 rtx tlsreg;
7289 if (TARGET_64BIT)
7291 tlsreg = gen_rtx_REG (Pmode, 13);
7292 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7294 else
7296 tlsreg = gen_rtx_REG (Pmode, 2);
7297 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7299 emit_insn (insn);
7301 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7303 rtx tlsreg, tmp;
7305 tmp = gen_reg_rtx (Pmode);
7306 if (TARGET_64BIT)
7308 tlsreg = gen_rtx_REG (Pmode, 13);
7309 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7311 else
7313 tlsreg = gen_rtx_REG (Pmode, 2);
7314 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7316 emit_insn (insn);
7317 if (TARGET_64BIT)
7318 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7319 else
7320 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7321 emit_insn (insn);
7323 else
7325 rtx r3, got, tga, tmp1, tmp2, call_insn;
7327 /* We currently use relocations like @got@tlsgd for tls, which
7328 means the linker will handle allocation of tls entries, placing
7329 them in the .got section. So use a pointer to the .got section,
7330 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7331 or to secondary GOT sections used by 32-bit -fPIC. */
7332 if (TARGET_64BIT)
7333 got = gen_rtx_REG (Pmode, 2);
7334 else
7336 if (flag_pic == 1)
7337 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7338 else
7340 rtx gsym = rs6000_got_sym ();
7341 got = gen_reg_rtx (Pmode);
7342 if (flag_pic == 0)
7343 rs6000_emit_move (got, gsym, Pmode);
7344 else
7346 rtx mem, lab, last;
7348 tmp1 = gen_reg_rtx (Pmode);
7349 tmp2 = gen_reg_rtx (Pmode);
7350 mem = gen_const_mem (Pmode, tmp1);
7351 lab = gen_label_rtx ();
7352 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7353 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7354 if (TARGET_LINK_STACK)
7355 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7356 emit_move_insn (tmp2, mem);
7357 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7358 set_unique_reg_note (last, REG_EQUAL, gsym);
7363 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7365 tga = rs6000_tls_get_addr ();
7366 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7367 1, const0_rtx, Pmode);
7369 r3 = gen_rtx_REG (Pmode, 3);
7370 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7372 if (TARGET_64BIT)
7373 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7374 else
7375 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7377 else if (DEFAULT_ABI == ABI_V4)
7378 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7379 else
7380 gcc_unreachable ();
7381 call_insn = last_call_insn ();
7382 PATTERN (call_insn) = insn;
7383 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7384 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7385 pic_offset_table_rtx);
7387 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7389 tga = rs6000_tls_get_addr ();
7390 tmp1 = gen_reg_rtx (Pmode);
7391 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7392 1, const0_rtx, Pmode);
7394 r3 = gen_rtx_REG (Pmode, 3);
7395 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7397 if (TARGET_64BIT)
7398 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7399 else
7400 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7402 else if (DEFAULT_ABI == ABI_V4)
7403 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7404 else
7405 gcc_unreachable ();
7406 call_insn = last_call_insn ();
7407 PATTERN (call_insn) = insn;
7408 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7409 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7410 pic_offset_table_rtx);
7412 if (rs6000_tls_size == 16)
7414 if (TARGET_64BIT)
7415 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7416 else
7417 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7419 else if (rs6000_tls_size == 32)
7421 tmp2 = gen_reg_rtx (Pmode);
7422 if (TARGET_64BIT)
7423 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7424 else
7425 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7426 emit_insn (insn);
7427 if (TARGET_64BIT)
7428 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7429 else
7430 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7432 else
7434 tmp2 = gen_reg_rtx (Pmode);
7435 if (TARGET_64BIT)
7436 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7437 else
7438 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7439 emit_insn (insn);
7440 insn = gen_rtx_SET (Pmode, dest,
7441 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7443 emit_insn (insn);
7445 else
7447 /* IE, or 64-bit offset LE. */
7448 tmp2 = gen_reg_rtx (Pmode);
7449 if (TARGET_64BIT)
7450 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7451 else
7452 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7453 emit_insn (insn);
7454 if (TARGET_64BIT)
7455 insn = gen_tls_tls_64 (dest, tmp2, addr);
7456 else
7457 insn = gen_tls_tls_32 (dest, tmp2, addr);
7458 emit_insn (insn);
7462 return dest;
7465 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7467 static bool
7468 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7470 if (GET_CODE (x) == HIGH
7471 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7472 return true;
7474 /* A TLS symbol in the TOC cannot contain a sum. */
7475 if (GET_CODE (x) == CONST
7476 && GET_CODE (XEXP (x, 0)) == PLUS
7477 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7478 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7479 return true;
7481 /* Do not place an ELF TLS symbol in the constant pool. */
7482 return TARGET_ELF && tls_referenced_p (x);
7485 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7486 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7487 can be addressed relative to the toc pointer. */
7489 static bool
7490 use_toc_relative_ref (rtx sym)
7492 return ((constant_pool_expr_p (sym)
7493 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7494 get_pool_mode (sym)))
7495 || (TARGET_CMODEL == CMODEL_MEDIUM
7496 && SYMBOL_REF_LOCAL_P (sym)));
7499 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7500 replace the input X, or the original X if no replacement is called for.
7501 The output parameter *WIN is 1 if the calling macro should goto WIN,
7502 0 if it should not.
7504 For RS/6000, we wish to handle large displacements off a base
7505 register by splitting the addend across an addiu/addis and the mem insn.
7506 This cuts number of extra insns needed from 3 to 1.
7508 On Darwin, we use this to generate code for floating point constants.
7509 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7510 The Darwin code is inside #if TARGET_MACHO because only then are the
7511 machopic_* functions defined. */
7512 static rtx
7513 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7514 int opnum, int type,
7515 int ind_levels ATTRIBUTE_UNUSED, int *win)
7517 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7519 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7520 DFmode/DImode MEM. */
7521 if (reg_offset_p
7522 && opnum == 1
7523 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7524 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7525 reg_offset_p = false;
7527 /* We must recognize output that we have already generated ourselves. */
7528 if (GET_CODE (x) == PLUS
7529 && GET_CODE (XEXP (x, 0)) == PLUS
7530 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7531 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7532 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7534 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7535 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7536 opnum, (enum reload_type) type);
7537 *win = 1;
7538 return x;
7541 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7542 if (GET_CODE (x) == LO_SUM
7543 && GET_CODE (XEXP (x, 0)) == HIGH)
7545 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7546 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7547 opnum, (enum reload_type) type);
7548 *win = 1;
7549 return x;
7552 #if TARGET_MACHO
7553 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7554 && GET_CODE (x) == LO_SUM
7555 && GET_CODE (XEXP (x, 0)) == PLUS
7556 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7557 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7558 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7559 && machopic_operand_p (XEXP (x, 1)))
7561 /* Result of previous invocation of this function on Darwin
7562 floating point constant. */
7563 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7564 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7565 opnum, (enum reload_type) type);
7566 *win = 1;
7567 return x;
7569 #endif
7571 if (TARGET_CMODEL != CMODEL_SMALL
7572 && reg_offset_p
7573 && small_toc_ref (x, VOIDmode))
7575 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7576 x = gen_rtx_LO_SUM (Pmode, hi, x);
7577 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7578 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7579 opnum, (enum reload_type) type);
7580 *win = 1;
7581 return x;
7584 if (GET_CODE (x) == PLUS
7585 && GET_CODE (XEXP (x, 0)) == REG
7586 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7587 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7588 && GET_CODE (XEXP (x, 1)) == CONST_INT
7589 && reg_offset_p
7590 && !SPE_VECTOR_MODE (mode)
7591 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7592 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7594 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7595 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7596 HOST_WIDE_INT high
7597 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7599 /* Check for 32-bit overflow. */
7600 if (high + low != val)
7602 *win = 0;
7603 return x;
7606 /* Reload the high part into a base reg; leave the low part
7607 in the mem directly. */
7609 x = gen_rtx_PLUS (GET_MODE (x),
7610 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7611 GEN_INT (high)),
7612 GEN_INT (low));
7614 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7615 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7616 opnum, (enum reload_type) type);
7617 *win = 1;
7618 return x;
7621 if (GET_CODE (x) == SYMBOL_REF
7622 && reg_offset_p
7623 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7624 && !SPE_VECTOR_MODE (mode)
7625 #if TARGET_MACHO
7626 && DEFAULT_ABI == ABI_DARWIN
7627 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7628 && machopic_symbol_defined_p (x)
7629 #else
7630 && DEFAULT_ABI == ABI_V4
7631 && !flag_pic
7632 #endif
7633 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7634 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7635 without fprs.
7636 ??? Assume floating point reg based on mode? This assumption is
7637 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7638 where reload ends up doing a DFmode load of a constant from
7639 mem using two gprs. Unfortunately, at this point reload
7640 hasn't yet selected regs so poking around in reload data
7641 won't help and even if we could figure out the regs reliably,
7642 we'd still want to allow this transformation when the mem is
7643 naturally aligned. Since we say the address is good here, we
7644 can't disable offsets from LO_SUMs in mem_operand_gpr.
7645 FIXME: Allow offset from lo_sum for other modes too, when
7646 mem is sufficiently aligned.
7648 Also disallow this if the type can go in VMX/Altivec registers, since
7649 those registers do not have d-form (reg+offset) address modes. */
7650 && !reg_addr[mode].scalar_in_vmx_p
7651 && mode != TFmode
7652 && mode != TDmode
7653 && (mode != TImode || !TARGET_VSX_TIMODE)
7654 && mode != PTImode
7655 && (mode != DImode || TARGET_POWERPC64)
7656 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7657 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7659 #if TARGET_MACHO
7660 if (flag_pic)
7662 rtx offset = machopic_gen_offset (x);
7663 x = gen_rtx_LO_SUM (GET_MODE (x),
7664 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7665 gen_rtx_HIGH (Pmode, offset)), offset);
7667 else
7668 #endif
7669 x = gen_rtx_LO_SUM (GET_MODE (x),
7670 gen_rtx_HIGH (Pmode, x), x);
7672 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7673 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7674 opnum, (enum reload_type) type);
7675 *win = 1;
7676 return x;
7679 /* Reload an offset address wrapped by an AND that represents the
7680 masking of the lower bits. Strip the outer AND and let reload
7681 convert the offset address into an indirect address. For VSX,
7682 force reload to create the address with an AND in a separate
7683 register, because we can't guarantee an altivec register will
7684 be used. */
7685 if (VECTOR_MEM_ALTIVEC_P (mode)
7686 && GET_CODE (x) == AND
7687 && GET_CODE (XEXP (x, 0)) == PLUS
7688 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7689 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7690 && GET_CODE (XEXP (x, 1)) == CONST_INT
7691 && INTVAL (XEXP (x, 1)) == -16)
7693 x = XEXP (x, 0);
7694 *win = 1;
7695 return x;
7698 if (TARGET_TOC
7699 && reg_offset_p
7700 && GET_CODE (x) == SYMBOL_REF
7701 && use_toc_relative_ref (x))
7703 x = create_TOC_reference (x, NULL_RTX);
7704 if (TARGET_CMODEL != CMODEL_SMALL)
7705 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7706 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7707 opnum, (enum reload_type) type);
7708 *win = 1;
7709 return x;
7711 *win = 0;
7712 return x;
7715 /* Debug version of rs6000_legitimize_reload_address. */
7716 static rtx
7717 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7718 int opnum, int type,
7719 int ind_levels, int *win)
7721 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7722 ind_levels, win);
7723 fprintf (stderr,
7724 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7725 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7726 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7727 debug_rtx (x);
7729 if (x == ret)
7730 fprintf (stderr, "Same address returned\n");
7731 else if (!ret)
7732 fprintf (stderr, "NULL returned\n");
7733 else
7735 fprintf (stderr, "New address:\n");
7736 debug_rtx (ret);
7739 return ret;
7742 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7743 that is a valid memory address for an instruction.
7744 The MODE argument is the machine mode for the MEM expression
7745 that wants to use this address.
7747 On the RS/6000, there are four valid address: a SYMBOL_REF that
7748 refers to a constant pool entry of an address (or the sum of it
7749 plus a constant), a short (16-bit signed) constant plus a register,
7750 the sum of two registers, or a register indirect, possibly with an
7751 auto-increment. For DFmode, DDmode and DImode with a constant plus
7752 register, we must ensure that both words are addressable or PowerPC64
7753 with offset word aligned.
7755 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7756 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7757 because adjacent memory cells are accessed by adding word-sized offsets
7758 during assembly output. */
7759 static bool
7760 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7762 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7764 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7765 if (VECTOR_MEM_ALTIVEC_P (mode)
7766 && GET_CODE (x) == AND
7767 && GET_CODE (XEXP (x, 1)) == CONST_INT
7768 && INTVAL (XEXP (x, 1)) == -16)
7769 x = XEXP (x, 0);
7771 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7772 return 0;
7773 if (legitimate_indirect_address_p (x, reg_ok_strict))
7774 return 1;
7775 if (TARGET_UPDATE
7776 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7777 && mode_supports_pre_incdec_p (mode)
7778 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7779 return 1;
7780 if (virtual_stack_registers_memory_p (x))
7781 return 1;
7782 if (reg_offset_p && legitimate_small_data_p (mode, x))
7783 return 1;
7784 if (reg_offset_p
7785 && legitimate_constant_pool_address_p (x, mode,
7786 reg_ok_strict || lra_in_progress))
7787 return 1;
7788 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7789 allow register indirect addresses. This will allow the values to go in
7790 either GPRs or VSX registers without reloading. The vector types would
7791 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7792 somewhat split, in that some uses are GPR based, and some VSX based. */
7793 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7794 return 0;
7795 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7796 if (! reg_ok_strict
7797 && reg_offset_p
7798 && GET_CODE (x) == PLUS
7799 && GET_CODE (XEXP (x, 0)) == REG
7800 && (XEXP (x, 0) == virtual_stack_vars_rtx
7801 || XEXP (x, 0) == arg_pointer_rtx)
7802 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7803 return 1;
7804 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7805 return 1;
7806 if (mode != TFmode
7807 && mode != TDmode
7808 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7809 || TARGET_POWERPC64
7810 || (mode != DFmode && mode != DDmode)
7811 || (TARGET_E500_DOUBLE && mode != DDmode))
7812 && (TARGET_POWERPC64 || mode != DImode)
7813 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7814 && mode != PTImode
7815 && !avoiding_indexed_address_p (mode)
7816 && legitimate_indexed_address_p (x, reg_ok_strict))
7817 return 1;
7818 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7819 && mode_supports_pre_modify_p (mode)
7820 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7821 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7822 reg_ok_strict, false)
7823 || (!avoiding_indexed_address_p (mode)
7824 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7825 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7826 return 1;
7827 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7828 return 1;
7829 return 0;
7832 /* Debug version of rs6000_legitimate_address_p. */
7833 static bool
7834 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7835 bool reg_ok_strict)
7837 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7838 fprintf (stderr,
7839 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7840 "strict = %d, reload = %s, code = %s\n",
7841 ret ? "true" : "false",
7842 GET_MODE_NAME (mode),
7843 reg_ok_strict,
7844 (reload_completed
7845 ? "after"
7846 : (reload_in_progress ? "progress" : "before")),
7847 GET_RTX_NAME (GET_CODE (x)));
7848 debug_rtx (x);
7850 return ret;
7853 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7855 static bool
7856 rs6000_mode_dependent_address_p (const_rtx addr,
7857 addr_space_t as ATTRIBUTE_UNUSED)
7859 return rs6000_mode_dependent_address_ptr (addr);
7862 /* Go to LABEL if ADDR (a legitimate address expression)
7863 has an effect that depends on the machine mode it is used for.
7865 On the RS/6000 this is true of all integral offsets (since AltiVec
7866 and VSX modes don't allow them) or is a pre-increment or decrement.
7868 ??? Except that due to conceptual problems in offsettable_address_p
7869 we can't really report the problems of integral offsets. So leave
7870 this assuming that the adjustable offset must be valid for the
7871 sub-words of a TFmode operand, which is what we had before. */
7873 static bool
7874 rs6000_mode_dependent_address (const_rtx addr)
7876 switch (GET_CODE (addr))
7878 case PLUS:
7879 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7880 is considered a legitimate address before reload, so there
7881 are no offset restrictions in that case. Note that this
7882 condition is safe in strict mode because any address involving
7883 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7884 been rejected as illegitimate. */
7885 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7886 && XEXP (addr, 0) != arg_pointer_rtx
7887 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7889 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7890 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7892 break;
7894 case LO_SUM:
7895 /* Anything in the constant pool is sufficiently aligned that
7896 all bytes have the same high part address. */
7897 return !legitimate_constant_pool_address_p (addr, QImode, false);
7899 /* Auto-increment cases are now treated generically in recog.c. */
7900 case PRE_MODIFY:
7901 return TARGET_UPDATE;
7903 /* AND is only allowed in Altivec loads. */
7904 case AND:
7905 return true;
7907 default:
7908 break;
7911 return false;
7914 /* Debug version of rs6000_mode_dependent_address. */
7915 static bool
7916 rs6000_debug_mode_dependent_address (const_rtx addr)
7918 bool ret = rs6000_mode_dependent_address (addr);
7920 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7921 ret ? "true" : "false");
7922 debug_rtx (addr);
7924 return ret;
7927 /* Implement FIND_BASE_TERM. */
7930 rs6000_find_base_term (rtx op)
7932 rtx base;
7934 base = op;
7935 if (GET_CODE (base) == CONST)
7936 base = XEXP (base, 0);
7937 if (GET_CODE (base) == PLUS)
7938 base = XEXP (base, 0);
7939 if (GET_CODE (base) == UNSPEC)
7940 switch (XINT (base, 1))
7942 case UNSPEC_TOCREL:
7943 case UNSPEC_MACHOPIC_OFFSET:
7944 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7945 for aliasing purposes. */
7946 return XVECEXP (base, 0, 0);
7949 return op;
7952 /* More elaborate version of recog's offsettable_memref_p predicate
7953 that works around the ??? note of rs6000_mode_dependent_address.
7954 In particular it accepts
7956 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7958 in 32-bit mode, that the recog predicate rejects. */
7960 static bool
7961 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
7963 bool worst_case;
7965 if (!MEM_P (op))
7966 return false;
7968 /* First mimic offsettable_memref_p. */
7969 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7970 return true;
7972 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7973 the latter predicate knows nothing about the mode of the memory
7974 reference and, therefore, assumes that it is the largest supported
7975 mode (TFmode). As a consequence, legitimate offsettable memory
7976 references are rejected. rs6000_legitimate_offset_address_p contains
7977 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7978 at least with a little bit of help here given that we know the
7979 actual registers used. */
7980 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7981 || GET_MODE_SIZE (reg_mode) == 4);
7982 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7983 true, worst_case);
7986 /* Change register usage conditional on target flags. */
7987 static void
7988 rs6000_conditional_register_usage (void)
7990 int i;
7992 if (TARGET_DEBUG_TARGET)
7993 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7995 /* Set MQ register fixed (already call_used) so that it will not be
7996 allocated. */
7997 fixed_regs[64] = 1;
7999 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8000 if (TARGET_64BIT)
8001 fixed_regs[13] = call_used_regs[13]
8002 = call_really_used_regs[13] = 1;
8004 /* Conditionally disable FPRs. */
8005 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8006 for (i = 32; i < 64; i++)
8007 fixed_regs[i] = call_used_regs[i]
8008 = call_really_used_regs[i] = 1;
8010 /* The TOC register is not killed across calls in a way that is
8011 visible to the compiler. */
8012 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8013 call_really_used_regs[2] = 0;
8015 if (DEFAULT_ABI == ABI_V4
8016 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8017 && flag_pic == 2)
8018 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8020 if (DEFAULT_ABI == ABI_V4
8021 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8022 && flag_pic == 1)
8023 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8024 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8025 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8027 if (DEFAULT_ABI == ABI_DARWIN
8028 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8029 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8030 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8031 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8033 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8034 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8035 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8037 if (TARGET_SPE)
8039 global_regs[SPEFSCR_REGNO] = 1;
8040 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8041 registers in prologues and epilogues. We no longer use r14
8042 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8043 pool for link-compatibility with older versions of GCC. Once
8044 "old" code has died out, we can return r14 to the allocation
8045 pool. */
8046 fixed_regs[14]
8047 = call_used_regs[14]
8048 = call_really_used_regs[14] = 1;
8051 if (!TARGET_ALTIVEC && !TARGET_VSX)
8053 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8054 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8055 call_really_used_regs[VRSAVE_REGNO] = 1;
8058 if (TARGET_ALTIVEC || TARGET_VSX)
8059 global_regs[VSCR_REGNO] = 1;
8061 if (TARGET_ALTIVEC_ABI)
8063 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8064 call_used_regs[i] = call_really_used_regs[i] = 1;
8066 /* AIX reserves VR20:31 in non-extended ABI mode. */
8067 if (TARGET_XCOFF)
8068 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8069 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8074 /* Output insns to set DEST equal to the constant SOURCE as a series of
8075 lis, ori and shl instructions and return TRUE. */
8077 bool
8078 rs6000_emit_set_const (rtx dest, rtx source)
8080 machine_mode mode = GET_MODE (dest);
8081 rtx temp, set;
8082 rtx_insn *insn;
8083 HOST_WIDE_INT c;
8085 gcc_checking_assert (CONST_INT_P (source));
8086 c = INTVAL (source);
8087 switch (mode)
8089 case QImode:
8090 case HImode:
8091 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8092 return true;
8094 case SImode:
8095 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8097 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8098 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8099 emit_insn (gen_rtx_SET (VOIDmode, dest,
8100 gen_rtx_IOR (SImode, copy_rtx (temp),
8101 GEN_INT (c & 0xffff))));
8102 break;
8104 case DImode:
8105 if (!TARGET_POWERPC64)
8107 rtx hi, lo;
8109 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8110 DImode);
8111 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8112 DImode);
8113 emit_move_insn (hi, GEN_INT (c >> 32));
8114 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8115 emit_move_insn (lo, GEN_INT (c));
8117 else
8118 rs6000_emit_set_long_const (dest, c);
8119 break;
8121 default:
8122 gcc_unreachable ();
8125 insn = get_last_insn ();
8126 set = single_set (insn);
8127 if (! CONSTANT_P (SET_SRC (set)))
8128 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8130 return true;
8133 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8134 Output insns to set DEST equal to the constant C as a series of
8135 lis, ori and shl instructions. */
8137 static void
8138 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8140 rtx temp;
8141 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8143 ud1 = c & 0xffff;
8144 c = c >> 16;
8145 ud2 = c & 0xffff;
8146 c = c >> 16;
8147 ud3 = c & 0xffff;
8148 c = c >> 16;
8149 ud4 = c & 0xffff;
8151 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8152 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8153 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8155 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8156 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8158 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8160 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8161 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8162 if (ud1 != 0)
8163 emit_move_insn (dest,
8164 gen_rtx_IOR (DImode, copy_rtx (temp),
8165 GEN_INT (ud1)));
8167 else if (ud3 == 0 && ud4 == 0)
8169 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8171 gcc_assert (ud2 & 0x8000);
8172 emit_move_insn (copy_rtx (temp),
8173 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8174 if (ud1 != 0)
8175 emit_move_insn (copy_rtx (temp),
8176 gen_rtx_IOR (DImode, copy_rtx (temp),
8177 GEN_INT (ud1)));
8178 emit_move_insn (dest,
8179 gen_rtx_ZERO_EXTEND (DImode,
8180 gen_lowpart (SImode,
8181 copy_rtx (temp))));
8183 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8184 || (ud4 == 0 && ! (ud3 & 0x8000)))
8186 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8188 emit_move_insn (copy_rtx (temp),
8189 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8190 if (ud2 != 0)
8191 emit_move_insn (copy_rtx (temp),
8192 gen_rtx_IOR (DImode, copy_rtx (temp),
8193 GEN_INT (ud2)));
8194 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8195 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8196 GEN_INT (16)));
8197 if (ud1 != 0)
8198 emit_move_insn (dest,
8199 gen_rtx_IOR (DImode, copy_rtx (temp),
8200 GEN_INT (ud1)));
8202 else
8204 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8206 emit_move_insn (copy_rtx (temp),
8207 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8208 if (ud3 != 0)
8209 emit_move_insn (copy_rtx (temp),
8210 gen_rtx_IOR (DImode, copy_rtx (temp),
8211 GEN_INT (ud3)));
8213 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8214 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8215 GEN_INT (32)));
8216 if (ud2 != 0)
8217 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8218 gen_rtx_IOR (DImode, copy_rtx (temp),
8219 GEN_INT (ud2 << 16)));
8220 if (ud1 != 0)
8221 emit_move_insn (dest,
8222 gen_rtx_IOR (DImode, copy_rtx (temp),
8223 GEN_INT (ud1)));
8227 /* Helper for the following. Get rid of [r+r] memory refs
8228 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8230 static void
8231 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8233 if (reload_in_progress)
8234 return;
8236 if (GET_CODE (operands[0]) == MEM
8237 && GET_CODE (XEXP (operands[0], 0)) != REG
8238 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8239 GET_MODE (operands[0]), false))
8240 operands[0]
8241 = replace_equiv_address (operands[0],
8242 copy_addr_to_reg (XEXP (operands[0], 0)));
8244 if (GET_CODE (operands[1]) == MEM
8245 && GET_CODE (XEXP (operands[1], 0)) != REG
8246 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8247 GET_MODE (operands[1]), false))
8248 operands[1]
8249 = replace_equiv_address (operands[1],
8250 copy_addr_to_reg (XEXP (operands[1], 0)));
8253 /* Generate a vector of constants to permute MODE for a little-endian
8254 storage operation by swapping the two halves of a vector. */
8255 static rtvec
8256 rs6000_const_vec (machine_mode mode)
8258 int i, subparts;
8259 rtvec v;
8261 switch (mode)
8263 case V1TImode:
8264 subparts = 1;
8265 break;
8266 case V2DFmode:
8267 case V2DImode:
8268 subparts = 2;
8269 break;
8270 case V4SFmode:
8271 case V4SImode:
8272 subparts = 4;
8273 break;
8274 case V8HImode:
8275 subparts = 8;
8276 break;
8277 case V16QImode:
8278 subparts = 16;
8279 break;
8280 default:
8281 gcc_unreachable();
8284 v = rtvec_alloc (subparts);
8286 for (i = 0; i < subparts / 2; ++i)
8287 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8288 for (i = subparts / 2; i < subparts; ++i)
8289 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8291 return v;
8294 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8295 for a VSX load or store operation. */
8297 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8299 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8300 return gen_rtx_VEC_SELECT (mode, source, par);
8303 /* Emit a little-endian load from vector memory location SOURCE to VSX
8304 register DEST in mode MODE. The load is done with two permuting
8305 insn's that represent an lxvd2x and xxpermdi. */
8306 void
8307 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8309 rtx tmp, permute_mem, permute_reg;
8311 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8312 V1TImode). */
8313 if (mode == TImode || mode == V1TImode)
8315 mode = V2DImode;
8316 dest = gen_lowpart (V2DImode, dest);
8317 source = adjust_address (source, V2DImode, 0);
8320 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8321 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8322 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8323 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8324 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8327 /* Emit a little-endian store to vector memory location DEST from VSX
8328 register SOURCE in mode MODE. The store is done with two permuting
8329 insn's that represent an xxpermdi and an stxvd2x. */
8330 void
8331 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8333 rtx tmp, permute_src, permute_tmp;
8335 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8336 V1TImode). */
8337 if (mode == TImode || mode == V1TImode)
8339 mode = V2DImode;
8340 dest = adjust_address (dest, V2DImode, 0);
8341 source = gen_lowpart (V2DImode, source);
8344 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8345 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8346 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8347 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8348 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8351 /* Emit a sequence representing a little-endian VSX load or store,
8352 moving data from SOURCE to DEST in mode MODE. This is done
8353 separately from rs6000_emit_move to ensure it is called only
8354 during expand. LE VSX loads and stores introduced later are
8355 handled with a split. The expand-time RTL generation allows
8356 us to optimize away redundant pairs of register-permutes. */
8357 void
8358 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8360 gcc_assert (!BYTES_BIG_ENDIAN
8361 && VECTOR_MEM_VSX_P (mode)
8362 && !gpr_or_gpr_p (dest, source)
8363 && (MEM_P (source) ^ MEM_P (dest)));
8365 if (MEM_P (source))
8367 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8368 rs6000_emit_le_vsx_load (dest, source, mode);
8370 else
8372 if (!REG_P (source))
8373 source = force_reg (mode, source);
8374 rs6000_emit_le_vsx_store (dest, source, mode);
8378 /* Emit a move from SOURCE to DEST in mode MODE. */
8379 void
8380 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8382 rtx operands[2];
8383 operands[0] = dest;
8384 operands[1] = source;
8386 if (TARGET_DEBUG_ADDR)
8388 fprintf (stderr,
8389 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8390 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8391 GET_MODE_NAME (mode),
8392 reload_in_progress,
8393 reload_completed,
8394 can_create_pseudo_p ());
8395 debug_rtx (dest);
8396 fprintf (stderr, "source:\n");
8397 debug_rtx (source);
8400 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8401 if (CONST_WIDE_INT_P (operands[1])
8402 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8404 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8405 gcc_unreachable ();
8408 /* Check if GCC is setting up a block move that will end up using FP
8409 registers as temporaries. We must make sure this is acceptable. */
8410 if (GET_CODE (operands[0]) == MEM
8411 && GET_CODE (operands[1]) == MEM
8412 && mode == DImode
8413 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8414 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8415 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8416 ? 32 : MEM_ALIGN (operands[0])))
8417 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8418 ? 32
8419 : MEM_ALIGN (operands[1]))))
8420 && ! MEM_VOLATILE_P (operands [0])
8421 && ! MEM_VOLATILE_P (operands [1]))
8423 emit_move_insn (adjust_address (operands[0], SImode, 0),
8424 adjust_address (operands[1], SImode, 0));
8425 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8426 adjust_address (copy_rtx (operands[1]), SImode, 4));
8427 return;
8430 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8431 && !gpc_reg_operand (operands[1], mode))
8432 operands[1] = force_reg (mode, operands[1]);
8434 /* Recognize the case where operand[1] is a reference to thread-local
8435 data and load its address to a register. */
8436 if (tls_referenced_p (operands[1]))
8438 enum tls_model model;
8439 rtx tmp = operands[1];
8440 rtx addend = NULL;
8442 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8444 addend = XEXP (XEXP (tmp, 0), 1);
8445 tmp = XEXP (XEXP (tmp, 0), 0);
8448 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8449 model = SYMBOL_REF_TLS_MODEL (tmp);
8450 gcc_assert (model != 0);
8452 tmp = rs6000_legitimize_tls_address (tmp, model);
8453 if (addend)
8455 tmp = gen_rtx_PLUS (mode, tmp, addend);
8456 tmp = force_operand (tmp, operands[0]);
8458 operands[1] = tmp;
8461 /* Handle the case where reload calls us with an invalid address. */
8462 if (reload_in_progress && mode == Pmode
8463 && (! general_operand (operands[1], mode)
8464 || ! nonimmediate_operand (operands[0], mode)))
8465 goto emit_set;
8467 /* 128-bit constant floating-point values on Darwin should really be loaded
8468 as two parts. However, this premature splitting is a problem when DFmode
8469 values can go into Altivec registers. */
8470 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8471 && !reg_addr[DFmode].scalar_in_vmx_p
8472 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8474 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8475 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8476 DFmode);
8477 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8478 GET_MODE_SIZE (DFmode)),
8479 simplify_gen_subreg (DFmode, operands[1], mode,
8480 GET_MODE_SIZE (DFmode)),
8481 DFmode);
8482 return;
8485 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8486 cfun->machine->sdmode_stack_slot =
8487 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8490 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8491 p1:SD) if p1 is not of floating point class and p0 is spilled as
8492 we can have no analogous movsd_store for this. */
8493 if (lra_in_progress && mode == DDmode
8494 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8495 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8496 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8497 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8499 enum reg_class cl;
8500 int regno = REGNO (SUBREG_REG (operands[1]));
8502 if (regno >= FIRST_PSEUDO_REGISTER)
8504 cl = reg_preferred_class (regno);
8505 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8507 if (regno >= 0 && ! FP_REGNO_P (regno))
8509 mode = SDmode;
8510 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8511 operands[1] = SUBREG_REG (operands[1]);
8514 if (lra_in_progress
8515 && mode == SDmode
8516 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8517 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8518 && (REG_P (operands[1])
8519 || (GET_CODE (operands[1]) == SUBREG
8520 && REG_P (SUBREG_REG (operands[1])))))
8522 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8523 ? SUBREG_REG (operands[1]) : operands[1]);
8524 enum reg_class cl;
8526 if (regno >= FIRST_PSEUDO_REGISTER)
8528 cl = reg_preferred_class (regno);
8529 gcc_assert (cl != NO_REGS);
8530 regno = ira_class_hard_regs[cl][0];
8532 if (FP_REGNO_P (regno))
8534 if (GET_MODE (operands[0]) != DDmode)
8535 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8536 emit_insn (gen_movsd_store (operands[0], operands[1]));
8538 else if (INT_REGNO_P (regno))
8539 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8540 else
8541 gcc_unreachable();
8542 return;
8544 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8545 p:DD)) if p0 is not of floating point class and p1 is spilled as
8546 we can have no analogous movsd_load for this. */
8547 if (lra_in_progress && mode == DDmode
8548 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8549 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8550 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8551 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8553 enum reg_class cl;
8554 int regno = REGNO (SUBREG_REG (operands[0]));
8556 if (regno >= FIRST_PSEUDO_REGISTER)
8558 cl = reg_preferred_class (regno);
8559 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8561 if (regno >= 0 && ! FP_REGNO_P (regno))
8563 mode = SDmode;
8564 operands[0] = SUBREG_REG (operands[0]);
8565 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8568 if (lra_in_progress
8569 && mode == SDmode
8570 && (REG_P (operands[0])
8571 || (GET_CODE (operands[0]) == SUBREG
8572 && REG_P (SUBREG_REG (operands[0]))))
8573 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8574 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8576 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8577 ? SUBREG_REG (operands[0]) : operands[0]);
8578 enum reg_class cl;
8580 if (regno >= FIRST_PSEUDO_REGISTER)
8582 cl = reg_preferred_class (regno);
8583 gcc_assert (cl != NO_REGS);
8584 regno = ira_class_hard_regs[cl][0];
8586 if (FP_REGNO_P (regno))
8588 if (GET_MODE (operands[1]) != DDmode)
8589 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8590 emit_insn (gen_movsd_load (operands[0], operands[1]));
8592 else if (INT_REGNO_P (regno))
8593 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8594 else
8595 gcc_unreachable();
8596 return;
8599 if (reload_in_progress
8600 && mode == SDmode
8601 && cfun->machine->sdmode_stack_slot != NULL_RTX
8602 && MEM_P (operands[0])
8603 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8604 && REG_P (operands[1]))
8606 if (FP_REGNO_P (REGNO (operands[1])))
8608 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8609 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8610 emit_insn (gen_movsd_store (mem, operands[1]));
8612 else if (INT_REGNO_P (REGNO (operands[1])))
8614 rtx mem = operands[0];
8615 if (BYTES_BIG_ENDIAN)
8616 mem = adjust_address_nv (mem, mode, 4);
8617 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8618 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8620 else
8621 gcc_unreachable();
8622 return;
8624 if (reload_in_progress
8625 && mode == SDmode
8626 && REG_P (operands[0])
8627 && MEM_P (operands[1])
8628 && cfun->machine->sdmode_stack_slot != NULL_RTX
8629 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8631 if (FP_REGNO_P (REGNO (operands[0])))
8633 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8634 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8635 emit_insn (gen_movsd_load (operands[0], mem));
8637 else if (INT_REGNO_P (REGNO (operands[0])))
8639 rtx mem = operands[1];
8640 if (BYTES_BIG_ENDIAN)
8641 mem = adjust_address_nv (mem, mode, 4);
8642 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8643 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8645 else
8646 gcc_unreachable();
8647 return;
8650 /* FIXME: In the long term, this switch statement should go away
8651 and be replaced by a sequence of tests based on things like
8652 mode == Pmode. */
8653 switch (mode)
8655 case HImode:
8656 case QImode:
8657 if (CONSTANT_P (operands[1])
8658 && GET_CODE (operands[1]) != CONST_INT)
8659 operands[1] = force_const_mem (mode, operands[1]);
8660 break;
8662 case TFmode:
8663 case TDmode:
8664 rs6000_eliminate_indexed_memrefs (operands);
8665 /* fall through */
8667 case DFmode:
8668 case DDmode:
8669 case SFmode:
8670 case SDmode:
8671 if (CONSTANT_P (operands[1])
8672 && ! easy_fp_constant (operands[1], mode))
8673 operands[1] = force_const_mem (mode, operands[1]);
8674 break;
8676 case V16QImode:
8677 case V8HImode:
8678 case V4SFmode:
8679 case V4SImode:
8680 case V4HImode:
8681 case V2SFmode:
8682 case V2SImode:
8683 case V1DImode:
8684 case V2DFmode:
8685 case V2DImode:
8686 case V1TImode:
8687 if (CONSTANT_P (operands[1])
8688 && !easy_vector_constant (operands[1], mode))
8689 operands[1] = force_const_mem (mode, operands[1]);
8690 break;
8692 case SImode:
8693 case DImode:
8694 /* Use default pattern for address of ELF small data */
8695 if (TARGET_ELF
8696 && mode == Pmode
8697 && DEFAULT_ABI == ABI_V4
8698 && (GET_CODE (operands[1]) == SYMBOL_REF
8699 || GET_CODE (operands[1]) == CONST)
8700 && small_data_operand (operands[1], mode))
8702 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8703 return;
8706 if (DEFAULT_ABI == ABI_V4
8707 && mode == Pmode && mode == SImode
8708 && flag_pic == 1 && got_operand (operands[1], mode))
8710 emit_insn (gen_movsi_got (operands[0], operands[1]));
8711 return;
8714 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8715 && TARGET_NO_TOC
8716 && ! flag_pic
8717 && mode == Pmode
8718 && CONSTANT_P (operands[1])
8719 && GET_CODE (operands[1]) != HIGH
8720 && GET_CODE (operands[1]) != CONST_INT)
8722 rtx target = (!can_create_pseudo_p ()
8723 ? operands[0]
8724 : gen_reg_rtx (mode));
8726 /* If this is a function address on -mcall-aixdesc,
8727 convert it to the address of the descriptor. */
8728 if (DEFAULT_ABI == ABI_AIX
8729 && GET_CODE (operands[1]) == SYMBOL_REF
8730 && XSTR (operands[1], 0)[0] == '.')
8732 const char *name = XSTR (operands[1], 0);
8733 rtx new_ref;
8734 while (*name == '.')
8735 name++;
8736 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8737 CONSTANT_POOL_ADDRESS_P (new_ref)
8738 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8739 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8740 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8741 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8742 operands[1] = new_ref;
8745 if (DEFAULT_ABI == ABI_DARWIN)
8747 #if TARGET_MACHO
8748 if (MACHO_DYNAMIC_NO_PIC_P)
8750 /* Take care of any required data indirection. */
8751 operands[1] = rs6000_machopic_legitimize_pic_address (
8752 operands[1], mode, operands[0]);
8753 if (operands[0] != operands[1])
8754 emit_insn (gen_rtx_SET (VOIDmode,
8755 operands[0], operands[1]));
8756 return;
8758 #endif
8759 emit_insn (gen_macho_high (target, operands[1]));
8760 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8761 return;
8764 emit_insn (gen_elf_high (target, operands[1]));
8765 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8766 return;
8769 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8770 and we have put it in the TOC, we just need to make a TOC-relative
8771 reference to it. */
8772 if (TARGET_TOC
8773 && GET_CODE (operands[1]) == SYMBOL_REF
8774 && use_toc_relative_ref (operands[1]))
8775 operands[1] = create_TOC_reference (operands[1], operands[0]);
8776 else if (mode == Pmode
8777 && CONSTANT_P (operands[1])
8778 && GET_CODE (operands[1]) != HIGH
8779 && ((GET_CODE (operands[1]) != CONST_INT
8780 && ! easy_fp_constant (operands[1], mode))
8781 || (GET_CODE (operands[1]) == CONST_INT
8782 && (num_insns_constant (operands[1], mode)
8783 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8784 || (GET_CODE (operands[0]) == REG
8785 && FP_REGNO_P (REGNO (operands[0]))))
8786 && !toc_relative_expr_p (operands[1], false)
8787 && (TARGET_CMODEL == CMODEL_SMALL
8788 || can_create_pseudo_p ()
8789 || (REG_P (operands[0])
8790 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8793 #if TARGET_MACHO
8794 /* Darwin uses a special PIC legitimizer. */
8795 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8797 operands[1] =
8798 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8799 operands[0]);
8800 if (operands[0] != operands[1])
8801 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8802 return;
8804 #endif
8806 /* If we are to limit the number of things we put in the TOC and
8807 this is a symbol plus a constant we can add in one insn,
8808 just put the symbol in the TOC and add the constant. Don't do
8809 this if reload is in progress. */
8810 if (GET_CODE (operands[1]) == CONST
8811 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8812 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8813 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8814 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8815 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8816 && ! side_effects_p (operands[0]))
8818 rtx sym =
8819 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8820 rtx other = XEXP (XEXP (operands[1], 0), 1);
8822 sym = force_reg (mode, sym);
8823 emit_insn (gen_add3_insn (operands[0], sym, other));
8824 return;
8827 operands[1] = force_const_mem (mode, operands[1]);
8829 if (TARGET_TOC
8830 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8831 && constant_pool_expr_p (XEXP (operands[1], 0))
8832 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8833 get_pool_constant (XEXP (operands[1], 0)),
8834 get_pool_mode (XEXP (operands[1], 0))))
8836 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8837 operands[0]);
8838 operands[1] = gen_const_mem (mode, tocref);
8839 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8842 break;
8844 case TImode:
8845 if (!VECTOR_MEM_VSX_P (TImode))
8846 rs6000_eliminate_indexed_memrefs (operands);
8847 break;
8849 case PTImode:
8850 rs6000_eliminate_indexed_memrefs (operands);
8851 break;
8853 default:
8854 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8857 /* Above, we may have called force_const_mem which may have returned
8858 an invalid address. If we can, fix this up; otherwise, reload will
8859 have to deal with it. */
8860 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8861 operands[1] = validize_mem (operands[1]);
8863 emit_set:
8864 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8867 /* Return true if a structure, union or array containing FIELD should be
8868 accessed using `BLKMODE'.
8870 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8871 entire thing in a DI and use subregs to access the internals.
8872 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8873 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8874 best thing to do is set structs to BLKmode and avoid Severe Tire
8875 Damage.
8877 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8878 fit into 1, whereas DI still needs two. */
8880 static bool
8881 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8883 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8884 || (TARGET_E500_DOUBLE && mode == DFmode));
8887 /* Nonzero if we can use a floating-point register to pass this arg. */
8888 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8889 (SCALAR_FLOAT_MODE_P (MODE) \
8890 && (CUM)->fregno <= FP_ARG_MAX_REG \
8891 && TARGET_HARD_FLOAT && TARGET_FPRS)
8893 /* Nonzero if we can use an AltiVec register to pass this arg. */
8894 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8895 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8896 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8897 && TARGET_ALTIVEC_ABI \
8898 && (NAMED))
8900 /* Walk down the type tree of TYPE counting consecutive base elements.
8901 If *MODEP is VOIDmode, then set it to the first valid floating point
8902 or vector type. If a non-floating point or vector type is found, or
8903 if a floating point or vector type that doesn't match a non-VOIDmode
8904 *MODEP is found, then return -1, otherwise return the count in the
8905 sub-tree. */
8907 static int
8908 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8910 machine_mode mode;
8911 HOST_WIDE_INT size;
8913 switch (TREE_CODE (type))
8915 case REAL_TYPE:
8916 mode = TYPE_MODE (type);
8917 if (!SCALAR_FLOAT_MODE_P (mode))
8918 return -1;
8920 if (*modep == VOIDmode)
8921 *modep = mode;
8923 if (*modep == mode)
8924 return 1;
8926 break;
8928 case COMPLEX_TYPE:
8929 mode = TYPE_MODE (TREE_TYPE (type));
8930 if (!SCALAR_FLOAT_MODE_P (mode))
8931 return -1;
8933 if (*modep == VOIDmode)
8934 *modep = mode;
8936 if (*modep == mode)
8937 return 2;
8939 break;
8941 case VECTOR_TYPE:
8942 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8943 return -1;
8945 /* Use V4SImode as representative of all 128-bit vector types. */
8946 size = int_size_in_bytes (type);
8947 switch (size)
8949 case 16:
8950 mode = V4SImode;
8951 break;
8952 default:
8953 return -1;
8956 if (*modep == VOIDmode)
8957 *modep = mode;
8959 /* Vector modes are considered to be opaque: two vectors are
8960 equivalent for the purposes of being homogeneous aggregates
8961 if they are the same size. */
8962 if (*modep == mode)
8963 return 1;
8965 break;
8967 case ARRAY_TYPE:
8969 int count;
8970 tree index = TYPE_DOMAIN (type);
8972 /* Can't handle incomplete types nor sizes that are not
8973 fixed. */
8974 if (!COMPLETE_TYPE_P (type)
8975 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8976 return -1;
8978 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8979 if (count == -1
8980 || !index
8981 || !TYPE_MAX_VALUE (index)
8982 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8983 || !TYPE_MIN_VALUE (index)
8984 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8985 || count < 0)
8986 return -1;
8988 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8989 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8991 /* There must be no padding. */
8992 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8993 return -1;
8995 return count;
8998 case RECORD_TYPE:
9000 int count = 0;
9001 int sub_count;
9002 tree field;
9004 /* Can't handle incomplete types nor sizes that are not
9005 fixed. */
9006 if (!COMPLETE_TYPE_P (type)
9007 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9008 return -1;
9010 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9012 if (TREE_CODE (field) != FIELD_DECL)
9013 continue;
9015 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9016 if (sub_count < 0)
9017 return -1;
9018 count += sub_count;
9021 /* There must be no padding. */
9022 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9023 return -1;
9025 return count;
9028 case UNION_TYPE:
9029 case QUAL_UNION_TYPE:
9031 /* These aren't very interesting except in a degenerate case. */
9032 int count = 0;
9033 int sub_count;
9034 tree field;
9036 /* Can't handle incomplete types nor sizes that are not
9037 fixed. */
9038 if (!COMPLETE_TYPE_P (type)
9039 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9040 return -1;
9042 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9044 if (TREE_CODE (field) != FIELD_DECL)
9045 continue;
9047 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9048 if (sub_count < 0)
9049 return -1;
9050 count = count > sub_count ? count : sub_count;
9053 /* There must be no padding. */
9054 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9055 return -1;
9057 return count;
9060 default:
9061 break;
9064 return -1;
9067 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9068 float or vector aggregate that shall be passed in FP/vector registers
9069 according to the ELFv2 ABI, return the homogeneous element mode in
9070 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9072 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9074 static bool
9075 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9076 machine_mode *elt_mode,
9077 int *n_elts)
9079 /* Note that we do not accept complex types at the top level as
9080 homogeneous aggregates; these types are handled via the
9081 targetm.calls.split_complex_arg mechanism. Complex types
9082 can be elements of homogeneous aggregates, however. */
9083 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9085 machine_mode field_mode = VOIDmode;
9086 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9088 if (field_count > 0)
9090 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9091 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9093 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9094 up to AGGR_ARG_NUM_REG registers. */
9095 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9097 if (elt_mode)
9098 *elt_mode = field_mode;
9099 if (n_elts)
9100 *n_elts = field_count;
9101 return true;
9106 if (elt_mode)
9107 *elt_mode = mode;
9108 if (n_elts)
9109 *n_elts = 1;
9110 return false;
9113 /* Return a nonzero value to say to return the function value in
9114 memory, just as large structures are always returned. TYPE will be
9115 the data type of the value, and FNTYPE will be the type of the
9116 function doing the returning, or @code{NULL} for libcalls.
9118 The AIX ABI for the RS/6000 specifies that all structures are
9119 returned in memory. The Darwin ABI does the same.
9121 For the Darwin 64 Bit ABI, a function result can be returned in
9122 registers or in memory, depending on the size of the return data
9123 type. If it is returned in registers, the value occupies the same
9124 registers as it would if it were the first and only function
9125 argument. Otherwise, the function places its result in memory at
9126 the location pointed to by GPR3.
9128 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9129 but a draft put them in memory, and GCC used to implement the draft
9130 instead of the final standard. Therefore, aix_struct_return
9131 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9132 compatibility can change DRAFT_V4_STRUCT_RET to override the
9133 default, and -m switches get the final word. See
9134 rs6000_option_override_internal for more details.
9136 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9137 long double support is enabled. These values are returned in memory.
9139 int_size_in_bytes returns -1 for variable size objects, which go in
9140 memory always. The cast to unsigned makes -1 > 8. */
9142 static bool
9143 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9145 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9146 if (TARGET_MACHO
9147 && rs6000_darwin64_abi
9148 && TREE_CODE (type) == RECORD_TYPE
9149 && int_size_in_bytes (type) > 0)
9151 CUMULATIVE_ARGS valcum;
9152 rtx valret;
9154 valcum.words = 0;
9155 valcum.fregno = FP_ARG_MIN_REG;
9156 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9157 /* Do a trial code generation as if this were going to be passed
9158 as an argument; if any part goes in memory, we return NULL. */
9159 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9160 if (valret)
9161 return false;
9162 /* Otherwise fall through to more conventional ABI rules. */
9165 #if HAVE_UPC_PTS_STRUCT_REP
9166 if (POINTER_TYPE_P (type) && upc_shared_type_p (TREE_TYPE (type)))
9167 return true;
9168 #endif
9170 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9171 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9172 NULL, NULL))
9173 return false;
9175 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9176 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9177 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9178 return false;
9180 if (AGGREGATE_TYPE_P (type)
9181 && (aix_struct_return
9182 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9183 return true;
9185 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9186 modes only exist for GCC vector types if -maltivec. */
9187 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9188 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9189 return false;
9191 /* Return synthetic vectors in memory. */
9192 if (TREE_CODE (type) == VECTOR_TYPE
9193 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9195 static bool warned_for_return_big_vectors = false;
9196 if (!warned_for_return_big_vectors)
9198 warning (0, "GCC vector returned by reference: "
9199 "non-standard ABI extension with no compatibility guarantee");
9200 warned_for_return_big_vectors = true;
9202 return true;
9205 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9206 return true;
9208 return false;
9211 /* Specify whether values returned in registers should be at the most
9212 significant end of a register. We want aggregates returned by
9213 value to match the way aggregates are passed to functions. */
9215 static bool
9216 rs6000_return_in_msb (const_tree valtype)
9218 return (DEFAULT_ABI == ABI_ELFv2
9219 && BYTES_BIG_ENDIAN
9220 && AGGREGATE_TYPE_P (valtype)
9221 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9224 #ifdef HAVE_AS_GNU_ATTRIBUTE
9225 /* Return TRUE if a call to function FNDECL may be one that
9226 potentially affects the function calling ABI of the object file. */
9228 static bool
9229 call_ABI_of_interest (tree fndecl)
9231 if (symtab->state == EXPANSION)
9233 struct cgraph_node *c_node;
9235 /* Libcalls are always interesting. */
9236 if (fndecl == NULL_TREE)
9237 return true;
9239 /* Any call to an external function is interesting. */
9240 if (DECL_EXTERNAL (fndecl))
9241 return true;
9243 /* Interesting functions that we are emitting in this object file. */
9244 c_node = cgraph_node::get (fndecl);
9245 c_node = c_node->ultimate_alias_target ();
9246 return !c_node->only_called_directly_p ();
9248 return false;
9250 #endif
9252 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9253 for a call to a function whose data type is FNTYPE.
9254 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9256 For incoming args we set the number of arguments in the prototype large
9257 so we never return a PARALLEL. */
9259 void
9260 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9261 rtx libname ATTRIBUTE_UNUSED, int incoming,
9262 int libcall, int n_named_args,
9263 tree fndecl ATTRIBUTE_UNUSED,
9264 machine_mode return_mode ATTRIBUTE_UNUSED)
9266 static CUMULATIVE_ARGS zero_cumulative;
9268 *cum = zero_cumulative;
9269 cum->words = 0;
9270 cum->fregno = FP_ARG_MIN_REG;
9271 cum->vregno = ALTIVEC_ARG_MIN_REG;
9272 cum->prototype = (fntype && prototype_p (fntype));
9273 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9274 ? CALL_LIBCALL : CALL_NORMAL);
9275 cum->sysv_gregno = GP_ARG_MIN_REG;
9276 cum->stdarg = stdarg_p (fntype);
9278 cum->nargs_prototype = 0;
9279 if (incoming || cum->prototype)
9280 cum->nargs_prototype = n_named_args;
9282 /* Check for a longcall attribute. */
9283 if ((!fntype && rs6000_default_long_calls)
9284 || (fntype
9285 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9286 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9287 cum->call_cookie |= CALL_LONG;
9289 if (TARGET_DEBUG_ARG)
9291 fprintf (stderr, "\ninit_cumulative_args:");
9292 if (fntype)
9294 tree ret_type = TREE_TYPE (fntype);
9295 fprintf (stderr, " ret code = %s,",
9296 get_tree_code_name (TREE_CODE (ret_type)));
9299 if (cum->call_cookie & CALL_LONG)
9300 fprintf (stderr, " longcall,");
9302 fprintf (stderr, " proto = %d, nargs = %d\n",
9303 cum->prototype, cum->nargs_prototype);
9306 #ifdef HAVE_AS_GNU_ATTRIBUTE
9307 if (DEFAULT_ABI == ABI_V4)
9309 cum->escapes = call_ABI_of_interest (fndecl);
9310 if (cum->escapes)
9312 tree return_type;
9314 if (fntype)
9316 return_type = TREE_TYPE (fntype);
9317 return_mode = TYPE_MODE (return_type);
9319 else
9320 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9322 if (return_type != NULL)
9324 if (TREE_CODE (return_type) == RECORD_TYPE
9325 && TYPE_TRANSPARENT_AGGR (return_type))
9327 return_type = TREE_TYPE (first_field (return_type));
9328 return_mode = TYPE_MODE (return_type);
9330 if (AGGREGATE_TYPE_P (return_type)
9331 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9332 <= 8))
9333 rs6000_returns_struct = true;
9335 if (SCALAR_FLOAT_MODE_P (return_mode))
9336 rs6000_passes_float = true;
9337 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9338 || SPE_VECTOR_MODE (return_mode))
9339 rs6000_passes_vector = true;
9342 #endif
9344 if (fntype
9345 && !TARGET_ALTIVEC
9346 && TARGET_ALTIVEC_ABI
9347 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9349 error ("cannot return value in vector register because"
9350 " altivec instructions are disabled, use -maltivec"
9351 " to enable them");
9355 /* The mode the ABI uses for a word. This is not the same as word_mode
9356 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9358 static machine_mode
9359 rs6000_abi_word_mode (void)
9361 return TARGET_32BIT ? SImode : DImode;
9364 /* On rs6000, function arguments are promoted, as are function return
9365 values. */
9367 static machine_mode
9368 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9369 machine_mode mode,
9370 int *punsignedp ATTRIBUTE_UNUSED,
9371 const_tree, int)
9373 PROMOTE_MODE (mode, *punsignedp, type);
9375 return mode;
9378 /* Return true if TYPE must be passed on the stack and not in registers. */
9380 static bool
9381 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9383 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9384 return must_pass_in_stack_var_size (mode, type);
9385 else
9386 return must_pass_in_stack_var_size_or_pad (mode, type);
9389 /* If defined, a C expression which determines whether, and in which
9390 direction, to pad out an argument with extra space. The value
9391 should be of type `enum direction': either `upward' to pad above
9392 the argument, `downward' to pad below, or `none' to inhibit
9393 padding.
9395 For the AIX ABI structs are always stored left shifted in their
9396 argument slot. */
9398 enum direction
9399 function_arg_padding (machine_mode mode, const_tree type)
9401 #ifndef AGGREGATE_PADDING_FIXED
9402 #define AGGREGATE_PADDING_FIXED 0
9403 #endif
9404 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9405 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9406 #endif
9408 if (!AGGREGATE_PADDING_FIXED)
9410 /* GCC used to pass structures of the same size as integer types as
9411 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9412 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9413 passed padded downward, except that -mstrict-align further
9414 muddied the water in that multi-component structures of 2 and 4
9415 bytes in size were passed padded upward.
9417 The following arranges for best compatibility with previous
9418 versions of gcc, but removes the -mstrict-align dependency. */
9419 if (BYTES_BIG_ENDIAN)
9421 HOST_WIDE_INT size = 0;
9423 if (mode == BLKmode)
9425 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9426 size = int_size_in_bytes (type);
9428 else
9429 size = GET_MODE_SIZE (mode);
9431 if (size == 1 || size == 2 || size == 4)
9432 return downward;
9434 return upward;
9437 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9439 if (type != 0 && AGGREGATE_TYPE_P (type))
9440 return upward;
9443 /* Fall back to the default. */
9444 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9447 /* If defined, a C expression that gives the alignment boundary, in bits,
9448 of an argument with the specified mode and type. If it is not defined,
9449 PARM_BOUNDARY is used for all arguments.
9451 V.4 wants long longs and doubles to be double word aligned. Just
9452 testing the mode size is a boneheaded way to do this as it means
9453 that other types such as complex int are also double word aligned.
9454 However, we're stuck with this because changing the ABI might break
9455 existing library interfaces.
9457 Doubleword align SPE vectors.
9458 Quadword align Altivec/VSX vectors.
9459 Quadword align large synthetic vector types. */
9461 static unsigned int
9462 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9464 machine_mode elt_mode;
9465 int n_elts;
9467 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9469 if (DEFAULT_ABI == ABI_V4
9470 && (GET_MODE_SIZE (mode) == 8
9471 || (TARGET_HARD_FLOAT
9472 && TARGET_FPRS
9473 && (mode == TFmode || mode == TDmode))))
9474 return 64;
9475 else if (SPE_VECTOR_MODE (mode)
9476 || (type && TREE_CODE (type) == VECTOR_TYPE
9477 && int_size_in_bytes (type) >= 8
9478 && int_size_in_bytes (type) < 16))
9479 return 64;
9480 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9481 || (type && TREE_CODE (type) == VECTOR_TYPE
9482 && int_size_in_bytes (type) >= 16))
9483 return 128;
9485 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9486 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9487 -mcompat-align-parm is used. */
9488 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9489 || DEFAULT_ABI == ABI_ELFv2)
9490 && type && TYPE_ALIGN (type) > 64)
9492 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9493 or homogeneous float/vector aggregates here. We already handled
9494 vector aggregates above, but still need to check for float here. */
9495 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9496 && !SCALAR_FLOAT_MODE_P (elt_mode));
9498 /* We used to check for BLKmode instead of the above aggregate type
9499 check. Warn when this results in any difference to the ABI. */
9500 if (aggregate_p != (mode == BLKmode))
9502 static bool warned;
9503 if (!warned && warn_psabi)
9505 warned = true;
9506 inform (input_location,
9507 "the ABI of passing aggregates with %d-byte alignment"
9508 " has changed in GCC 5",
9509 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9513 if (aggregate_p)
9514 return 128;
9517 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9518 implement the "aggregate type" check as a BLKmode check here; this
9519 means certain aggregate types are in fact not aligned. */
9520 if (TARGET_MACHO && rs6000_darwin64_abi
9521 && mode == BLKmode
9522 && type && TYPE_ALIGN (type) > 64)
9523 return 128;
9525 return PARM_BOUNDARY;
9528 /* The offset in words to the start of the parameter save area. */
9530 static unsigned int
9531 rs6000_parm_offset (void)
9533 return (DEFAULT_ABI == ABI_V4 ? 2
9534 : DEFAULT_ABI == ABI_ELFv2 ? 4
9535 : 6);
9538 /* For a function parm of MODE and TYPE, return the starting word in
9539 the parameter area. NWORDS of the parameter area are already used. */
9541 static unsigned int
9542 rs6000_parm_start (machine_mode mode, const_tree type,
9543 unsigned int nwords)
9545 unsigned int align;
9547 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9548 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9551 /* Compute the size (in words) of a function argument. */
9553 static unsigned long
9554 rs6000_arg_size (machine_mode mode, const_tree type)
9556 unsigned long size;
9558 if (mode != BLKmode)
9559 size = GET_MODE_SIZE (mode);
9560 else
9561 size = int_size_in_bytes (type);
9563 if (TARGET_32BIT)
9564 return (size + 3) >> 2;
9565 else
9566 return (size + 7) >> 3;
9569 /* Use this to flush pending int fields. */
9571 static void
9572 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9573 HOST_WIDE_INT bitpos, int final)
9575 unsigned int startbit, endbit;
9576 int intregs, intoffset;
9577 machine_mode mode;
9579 /* Handle the situations where a float is taking up the first half
9580 of the GPR, and the other half is empty (typically due to
9581 alignment restrictions). We can detect this by a 8-byte-aligned
9582 int field, or by seeing that this is the final flush for this
9583 argument. Count the word and continue on. */
9584 if (cum->floats_in_gpr == 1
9585 && (cum->intoffset % 64 == 0
9586 || (cum->intoffset == -1 && final)))
9588 cum->words++;
9589 cum->floats_in_gpr = 0;
9592 if (cum->intoffset == -1)
9593 return;
9595 intoffset = cum->intoffset;
9596 cum->intoffset = -1;
9597 cum->floats_in_gpr = 0;
9599 if (intoffset % BITS_PER_WORD != 0)
9601 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9602 MODE_INT, 0);
9603 if (mode == BLKmode)
9605 /* We couldn't find an appropriate mode, which happens,
9606 e.g., in packed structs when there are 3 bytes to load.
9607 Back intoffset back to the beginning of the word in this
9608 case. */
9609 intoffset = intoffset & -BITS_PER_WORD;
9613 startbit = intoffset & -BITS_PER_WORD;
9614 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9615 intregs = (endbit - startbit) / BITS_PER_WORD;
9616 cum->words += intregs;
9617 /* words should be unsigned. */
9618 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9620 int pad = (endbit/BITS_PER_WORD) - cum->words;
9621 cum->words += pad;
9625 /* The darwin64 ABI calls for us to recurse down through structs,
9626 looking for elements passed in registers. Unfortunately, we have
9627 to track int register count here also because of misalignments
9628 in powerpc alignment mode. */
9630 static void
9631 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9632 const_tree type,
9633 HOST_WIDE_INT startbitpos)
9635 tree f;
9637 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9638 if (TREE_CODE (f) == FIELD_DECL)
9640 HOST_WIDE_INT bitpos = startbitpos;
9641 tree ftype = TREE_TYPE (f);
9642 machine_mode mode;
9643 if (ftype == error_mark_node)
9644 continue;
9645 mode = TYPE_MODE (ftype);
9647 if (DECL_SIZE (f) != 0
9648 && tree_fits_uhwi_p (bit_position (f)))
9649 bitpos += int_bit_position (f);
9651 /* ??? FIXME: else assume zero offset. */
9653 if (TREE_CODE (ftype) == RECORD_TYPE)
9654 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9655 else if (USE_FP_FOR_ARG_P (cum, mode))
9657 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9658 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9659 cum->fregno += n_fpregs;
9660 /* Single-precision floats present a special problem for
9661 us, because they are smaller than an 8-byte GPR, and so
9662 the structure-packing rules combined with the standard
9663 varargs behavior mean that we want to pack float/float
9664 and float/int combinations into a single register's
9665 space. This is complicated by the arg advance flushing,
9666 which works on arbitrarily large groups of int-type
9667 fields. */
9668 if (mode == SFmode)
9670 if (cum->floats_in_gpr == 1)
9672 /* Two floats in a word; count the word and reset
9673 the float count. */
9674 cum->words++;
9675 cum->floats_in_gpr = 0;
9677 else if (bitpos % 64 == 0)
9679 /* A float at the beginning of an 8-byte word;
9680 count it and put off adjusting cum->words until
9681 we see if a arg advance flush is going to do it
9682 for us. */
9683 cum->floats_in_gpr++;
9685 else
9687 /* The float is at the end of a word, preceded
9688 by integer fields, so the arg advance flush
9689 just above has already set cum->words and
9690 everything is taken care of. */
9693 else
9694 cum->words += n_fpregs;
9696 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9698 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9699 cum->vregno++;
9700 cum->words += 2;
9702 else if (cum->intoffset == -1)
9703 cum->intoffset = bitpos;
9707 /* Check for an item that needs to be considered specially under the darwin 64
9708 bit ABI. These are record types where the mode is BLK or the structure is
9709 8 bytes in size. */
9710 static int
9711 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9713 return rs6000_darwin64_abi
9714 && ((mode == BLKmode
9715 && TREE_CODE (type) == RECORD_TYPE
9716 && int_size_in_bytes (type) > 0)
9717 || (type && TREE_CODE (type) == RECORD_TYPE
9718 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9721 /* Update the data in CUM to advance over an argument
9722 of mode MODE and data type TYPE.
9723 (TYPE is null for libcalls where that information may not be available.)
9725 Note that for args passed by reference, function_arg will be called
9726 with MODE and TYPE set to that of the pointer to the arg, not the arg
9727 itself. */
9729 static void
9730 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9731 const_tree type, bool named, int depth)
9733 machine_mode elt_mode;
9734 int n_elts;
9736 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9738 /* Only tick off an argument if we're not recursing. */
9739 if (depth == 0)
9740 cum->nargs_prototype--;
9742 #ifdef HAVE_AS_GNU_ATTRIBUTE
9743 if (DEFAULT_ABI == ABI_V4
9744 && cum->escapes)
9746 if (SCALAR_FLOAT_MODE_P (mode))
9747 rs6000_passes_float = true;
9748 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9749 rs6000_passes_vector = true;
9750 else if (SPE_VECTOR_MODE (mode)
9751 && !cum->stdarg
9752 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9753 rs6000_passes_vector = true;
9755 #endif
9757 if (TARGET_ALTIVEC_ABI
9758 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9759 || (type && TREE_CODE (type) == VECTOR_TYPE
9760 && int_size_in_bytes (type) == 16)))
9762 bool stack = false;
9764 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9766 cum->vregno += n_elts;
9768 if (!TARGET_ALTIVEC)
9769 error ("cannot pass argument in vector register because"
9770 " altivec instructions are disabled, use -maltivec"
9771 " to enable them");
9773 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9774 even if it is going to be passed in a vector register.
9775 Darwin does the same for variable-argument functions. */
9776 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9777 && TARGET_64BIT)
9778 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9779 stack = true;
9781 else
9782 stack = true;
9784 if (stack)
9786 int align;
9788 /* Vector parameters must be 16-byte aligned. In 32-bit
9789 mode this means we need to take into account the offset
9790 to the parameter save area. In 64-bit mode, they just
9791 have to start on an even word, since the parameter save
9792 area is 16-byte aligned. */
9793 if (TARGET_32BIT)
9794 align = -(rs6000_parm_offset () + cum->words) & 3;
9795 else
9796 align = cum->words & 1;
9797 cum->words += align + rs6000_arg_size (mode, type);
9799 if (TARGET_DEBUG_ARG)
9801 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9802 cum->words, align);
9803 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9804 cum->nargs_prototype, cum->prototype,
9805 GET_MODE_NAME (mode));
9809 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9810 && !cum->stdarg
9811 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9812 cum->sysv_gregno++;
9814 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9816 int size = int_size_in_bytes (type);
9817 /* Variable sized types have size == -1 and are
9818 treated as if consisting entirely of ints.
9819 Pad to 16 byte boundary if needed. */
9820 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9821 && (cum->words % 2) != 0)
9822 cum->words++;
9823 /* For varargs, we can just go up by the size of the struct. */
9824 if (!named)
9825 cum->words += (size + 7) / 8;
9826 else
9828 /* It is tempting to say int register count just goes up by
9829 sizeof(type)/8, but this is wrong in a case such as
9830 { int; double; int; } [powerpc alignment]. We have to
9831 grovel through the fields for these too. */
9832 cum->intoffset = 0;
9833 cum->floats_in_gpr = 0;
9834 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9835 rs6000_darwin64_record_arg_advance_flush (cum,
9836 size * BITS_PER_UNIT, 1);
9838 if (TARGET_DEBUG_ARG)
9840 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9841 cum->words, TYPE_ALIGN (type), size);
9842 fprintf (stderr,
9843 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9844 cum->nargs_prototype, cum->prototype,
9845 GET_MODE_NAME (mode));
9848 else if (DEFAULT_ABI == ABI_V4)
9850 if (TARGET_HARD_FLOAT && TARGET_FPRS
9851 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9852 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9853 || (mode == TFmode && !TARGET_IEEEQUAD)
9854 || mode == SDmode || mode == DDmode || mode == TDmode))
9856 /* _Decimal128 must use an even/odd register pair. This assumes
9857 that the register number is odd when fregno is odd. */
9858 if (mode == TDmode && (cum->fregno % 2) == 1)
9859 cum->fregno++;
9861 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9862 <= FP_ARG_V4_MAX_REG)
9863 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9864 else
9866 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9867 if (mode == DFmode || mode == TFmode
9868 || mode == DDmode || mode == TDmode)
9869 cum->words += cum->words & 1;
9870 cum->words += rs6000_arg_size (mode, type);
9873 else
9875 int n_words = rs6000_arg_size (mode, type);
9876 int gregno = cum->sysv_gregno;
9878 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9879 (r7,r8) or (r9,r10). As does any other 2 word item such
9880 as complex int due to a historical mistake. */
9881 if (n_words == 2)
9882 gregno += (1 - gregno) & 1;
9884 /* Multi-reg args are not split between registers and stack. */
9885 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9887 /* Long long and SPE vectors are aligned on the stack.
9888 So are other 2 word items such as complex int due to
9889 a historical mistake. */
9890 if (n_words == 2)
9891 cum->words += cum->words & 1;
9892 cum->words += n_words;
9895 /* Note: continuing to accumulate gregno past when we've started
9896 spilling to the stack indicates the fact that we've started
9897 spilling to the stack to expand_builtin_saveregs. */
9898 cum->sysv_gregno = gregno + n_words;
9901 if (TARGET_DEBUG_ARG)
9903 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9904 cum->words, cum->fregno);
9905 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9906 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9907 fprintf (stderr, "mode = %4s, named = %d\n",
9908 GET_MODE_NAME (mode), named);
9911 else
9913 int n_words = rs6000_arg_size (mode, type);
9914 int start_words = cum->words;
9915 int align_words = rs6000_parm_start (mode, type, start_words);
9917 cum->words = align_words + n_words;
9919 if (SCALAR_FLOAT_MODE_P (elt_mode)
9920 && TARGET_HARD_FLOAT && TARGET_FPRS)
9922 /* _Decimal128 must be passed in an even/odd float register pair.
9923 This assumes that the register number is odd when fregno is
9924 odd. */
9925 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9926 cum->fregno++;
9927 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9930 if (TARGET_DEBUG_ARG)
9932 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9933 cum->words, cum->fregno);
9934 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9935 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9936 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9937 named, align_words - start_words, depth);
9942 static void
9943 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
9944 const_tree type, bool named)
9946 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9950 static rtx
9951 spe_build_register_parallel (machine_mode mode, int gregno)
9953 rtx r1, r3, r5, r7;
9955 switch (mode)
9957 case DFmode:
9958 r1 = gen_rtx_REG (DImode, gregno);
9959 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9960 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9962 case DCmode:
9963 case TFmode:
9964 r1 = gen_rtx_REG (DImode, gregno);
9965 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9966 r3 = gen_rtx_REG (DImode, gregno + 2);
9967 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9968 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9970 case TCmode:
9971 r1 = gen_rtx_REG (DImode, gregno);
9972 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9973 r3 = gen_rtx_REG (DImode, gregno + 2);
9974 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9975 r5 = gen_rtx_REG (DImode, gregno + 4);
9976 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9977 r7 = gen_rtx_REG (DImode, gregno + 6);
9978 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9979 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9981 default:
9982 gcc_unreachable ();
9986 /* Determine where to put a SIMD argument on the SPE. */
9987 static rtx
9988 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
9989 const_tree type)
9991 int gregno = cum->sysv_gregno;
9993 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9994 are passed and returned in a pair of GPRs for ABI compatibility. */
9995 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9996 || mode == DCmode || mode == TCmode))
9998 int n_words = rs6000_arg_size (mode, type);
10000 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10001 if (mode == DFmode)
10002 gregno += (1 - gregno) & 1;
10004 /* Multi-reg args are not split between registers and stack. */
10005 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10006 return NULL_RTX;
10008 return spe_build_register_parallel (mode, gregno);
10010 if (cum->stdarg)
10012 int n_words = rs6000_arg_size (mode, type);
10014 /* SPE vectors are put in odd registers. */
10015 if (n_words == 2 && (gregno & 1) == 0)
10016 gregno += 1;
10018 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10020 rtx r1, r2;
10021 machine_mode m = SImode;
10023 r1 = gen_rtx_REG (m, gregno);
10024 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10025 r2 = gen_rtx_REG (m, gregno + 1);
10026 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10027 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10029 else
10030 return NULL_RTX;
10032 else
10034 if (gregno <= GP_ARG_MAX_REG)
10035 return gen_rtx_REG (mode, gregno);
10036 else
10037 return NULL_RTX;
10041 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10042 structure between cum->intoffset and bitpos to integer registers. */
10044 static void
10045 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10046 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10048 machine_mode mode;
10049 unsigned int regno;
10050 unsigned int startbit, endbit;
10051 int this_regno, intregs, intoffset;
10052 rtx reg;
10054 if (cum->intoffset == -1)
10055 return;
10057 intoffset = cum->intoffset;
10058 cum->intoffset = -1;
10060 /* If this is the trailing part of a word, try to only load that
10061 much into the register. Otherwise load the whole register. Note
10062 that in the latter case we may pick up unwanted bits. It's not a
10063 problem at the moment but may wish to revisit. */
10065 if (intoffset % BITS_PER_WORD != 0)
10067 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10068 MODE_INT, 0);
10069 if (mode == BLKmode)
10071 /* We couldn't find an appropriate mode, which happens,
10072 e.g., in packed structs when there are 3 bytes to load.
10073 Back intoffset back to the beginning of the word in this
10074 case. */
10075 intoffset = intoffset & -BITS_PER_WORD;
10076 mode = word_mode;
10079 else
10080 mode = word_mode;
10082 startbit = intoffset & -BITS_PER_WORD;
10083 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
10084 intregs = (endbit - startbit) / BITS_PER_WORD;
10085 this_regno = cum->words + intoffset / BITS_PER_WORD;
10087 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10088 cum->use_stack = 1;
10090 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10091 if (intregs <= 0)
10092 return;
10094 intoffset /= BITS_PER_UNIT;
10097 regno = GP_ARG_MIN_REG + this_regno;
10098 reg = gen_rtx_REG (mode, regno);
10099 rvec[(*k)++] =
10100 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10102 this_regno += 1;
10103 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10104 mode = word_mode;
10105 intregs -= 1;
10107 while (intregs > 0);
10110 /* Recursive workhorse for the following. */
10112 static void
10113 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10114 HOST_WIDE_INT startbitpos, rtx rvec[],
10115 int *k)
10117 tree f;
10119 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10120 if (TREE_CODE (f) == FIELD_DECL)
10122 HOST_WIDE_INT bitpos = startbitpos;
10123 tree ftype = TREE_TYPE (f);
10124 machine_mode mode;
10125 if (ftype == error_mark_node)
10126 continue;
10127 mode = TYPE_MODE (ftype);
10129 if (DECL_SIZE (f) != 0
10130 && tree_fits_uhwi_p (bit_position (f)))
10131 bitpos += int_bit_position (f);
10133 /* ??? FIXME: else assume zero offset. */
10135 if (TREE_CODE (ftype) == RECORD_TYPE)
10136 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10137 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10139 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10140 #if 0
10141 switch (mode)
10143 case SCmode: mode = SFmode; break;
10144 case DCmode: mode = DFmode; break;
10145 case TCmode: mode = TFmode; break;
10146 default: break;
10148 #endif
10149 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10150 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10152 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10153 && (mode == TFmode || mode == TDmode));
10154 /* Long double or _Decimal128 split over regs and memory. */
10155 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10156 cum->use_stack=1;
10158 rvec[(*k)++]
10159 = gen_rtx_EXPR_LIST (VOIDmode,
10160 gen_rtx_REG (mode, cum->fregno++),
10161 GEN_INT (bitpos / BITS_PER_UNIT));
10162 if (mode == TFmode || mode == TDmode)
10163 cum->fregno++;
10165 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10167 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10168 rvec[(*k)++]
10169 = gen_rtx_EXPR_LIST (VOIDmode,
10170 gen_rtx_REG (mode, cum->vregno++),
10171 GEN_INT (bitpos / BITS_PER_UNIT));
10173 else if (cum->intoffset == -1)
10174 cum->intoffset = bitpos;
10178 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10179 the register(s) to be used for each field and subfield of a struct
10180 being passed by value, along with the offset of where the
10181 register's value may be found in the block. FP fields go in FP
10182 register, vector fields go in vector registers, and everything
10183 else goes in int registers, packed as in memory.
10185 This code is also used for function return values. RETVAL indicates
10186 whether this is the case.
10188 Much of this is taken from the SPARC V9 port, which has a similar
10189 calling convention. */
10191 static rtx
10192 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10193 bool named, bool retval)
10195 rtx rvec[FIRST_PSEUDO_REGISTER];
10196 int k = 1, kbase = 1;
10197 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10198 /* This is a copy; modifications are not visible to our caller. */
10199 CUMULATIVE_ARGS copy_cum = *orig_cum;
10200 CUMULATIVE_ARGS *cum = &copy_cum;
10202 /* Pad to 16 byte boundary if needed. */
10203 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10204 && (cum->words % 2) != 0)
10205 cum->words++;
10207 cum->intoffset = 0;
10208 cum->use_stack = 0;
10209 cum->named = named;
10211 /* Put entries into rvec[] for individual FP and vector fields, and
10212 for the chunks of memory that go in int regs. Note we start at
10213 element 1; 0 is reserved for an indication of using memory, and
10214 may or may not be filled in below. */
10215 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10216 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10218 /* If any part of the struct went on the stack put all of it there.
10219 This hack is because the generic code for
10220 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10221 parts of the struct are not at the beginning. */
10222 if (cum->use_stack)
10224 if (retval)
10225 return NULL_RTX; /* doesn't go in registers at all */
10226 kbase = 0;
10227 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10229 if (k > 1 || cum->use_stack)
10230 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10231 else
10232 return NULL_RTX;
10235 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10237 static rtx
10238 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10239 int align_words)
10241 int n_units;
10242 int i, k;
10243 rtx rvec[GP_ARG_NUM_REG + 1];
10245 if (align_words >= GP_ARG_NUM_REG)
10246 return NULL_RTX;
10248 n_units = rs6000_arg_size (mode, type);
10250 /* Optimize the simple case where the arg fits in one gpr, except in
10251 the case of BLKmode due to assign_parms assuming that registers are
10252 BITS_PER_WORD wide. */
10253 if (n_units == 0
10254 || (n_units == 1 && mode != BLKmode))
10255 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10257 k = 0;
10258 if (align_words + n_units > GP_ARG_NUM_REG)
10259 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10260 using a magic NULL_RTX component.
10261 This is not strictly correct. Only some of the arg belongs in
10262 memory, not all of it. However, the normal scheme using
10263 function_arg_partial_nregs can result in unusual subregs, eg.
10264 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10265 store the whole arg to memory is often more efficient than code
10266 to store pieces, and we know that space is available in the right
10267 place for the whole arg. */
10268 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10270 i = 0;
10273 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10274 rtx off = GEN_INT (i++ * 4);
10275 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10277 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10279 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10282 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10283 but must also be copied into the parameter save area starting at
10284 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10285 to the GPRs and/or memory. Return the number of elements used. */
10287 static int
10288 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10289 int align_words, rtx *rvec)
10291 int k = 0;
10293 if (align_words < GP_ARG_NUM_REG)
10295 int n_words = rs6000_arg_size (mode, type);
10297 if (align_words + n_words > GP_ARG_NUM_REG
10298 || mode == BLKmode
10299 || (TARGET_32BIT && TARGET_POWERPC64))
10301 /* If this is partially on the stack, then we only
10302 include the portion actually in registers here. */
10303 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10304 int i = 0;
10306 if (align_words + n_words > GP_ARG_NUM_REG)
10308 /* Not all of the arg fits in gprs. Say that it goes in memory
10309 too, using a magic NULL_RTX component. Also see comment in
10310 rs6000_mixed_function_arg for why the normal
10311 function_arg_partial_nregs scheme doesn't work in this case. */
10312 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10317 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10318 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10319 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10321 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10323 else
10325 /* The whole arg fits in gprs. */
10326 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10327 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10330 else
10332 /* It's entirely in memory. */
10333 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10336 return k;
10339 /* RVEC is a vector of K components of an argument of mode MODE.
10340 Construct the final function_arg return value from it. */
10342 static rtx
10343 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10345 gcc_assert (k >= 1);
10347 /* Avoid returning a PARALLEL in the trivial cases. */
10348 if (k == 1)
10350 if (XEXP (rvec[0], 0) == NULL_RTX)
10351 return NULL_RTX;
10353 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10354 return XEXP (rvec[0], 0);
10357 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10360 /* Determine where to put an argument to a function.
10361 Value is zero to push the argument on the stack,
10362 or a hard register in which to store the argument.
10364 MODE is the argument's machine mode.
10365 TYPE is the data type of the argument (as a tree).
10366 This is null for libcalls where that information may
10367 not be available.
10368 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10369 the preceding args and about the function being called. It is
10370 not modified in this routine.
10371 NAMED is nonzero if this argument is a named parameter
10372 (otherwise it is an extra parameter matching an ellipsis).
10374 On RS/6000 the first eight words of non-FP are normally in registers
10375 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10376 Under V.4, the first 8 FP args are in registers.
10378 If this is floating-point and no prototype is specified, we use
10379 both an FP and integer register (or possibly FP reg and stack). Library
10380 functions (when CALL_LIBCALL is set) always have the proper types for args,
10381 so we can pass the FP value just in one register. emit_library_function
10382 doesn't support PARALLEL anyway.
10384 Note that for args passed by reference, function_arg will be called
10385 with MODE and TYPE set to that of the pointer to the arg, not the arg
10386 itself. */
10388 static rtx
10389 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10390 const_tree type, bool named)
10392 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10393 enum rs6000_abi abi = DEFAULT_ABI;
10394 machine_mode elt_mode;
10395 int n_elts;
10397 /* Return a marker to indicate whether CR1 needs to set or clear the
10398 bit that V.4 uses to say fp args were passed in registers.
10399 Assume that we don't need the marker for software floating point,
10400 or compiler generated library calls. */
10401 if (mode == VOIDmode)
10403 if (abi == ABI_V4
10404 && (cum->call_cookie & CALL_LIBCALL) == 0
10405 && (cum->stdarg
10406 || (cum->nargs_prototype < 0
10407 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10409 /* For the SPE, we need to crxor CR6 always. */
10410 if (TARGET_SPE_ABI)
10411 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10412 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10413 return GEN_INT (cum->call_cookie
10414 | ((cum->fregno == FP_ARG_MIN_REG)
10415 ? CALL_V4_SET_FP_ARGS
10416 : CALL_V4_CLEAR_FP_ARGS));
10419 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10422 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10424 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10426 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10427 if (rslt != NULL_RTX)
10428 return rslt;
10429 /* Else fall through to usual handling. */
10432 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10434 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10435 rtx r, off;
10436 int i, k = 0;
10438 /* Do we also need to pass this argument in the parameter
10439 save area? */
10440 if (TARGET_64BIT && ! cum->prototype)
10442 int align_words = (cum->words + 1) & ~1;
10443 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10446 /* Describe where this argument goes in the vector registers. */
10447 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10449 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10450 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10451 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10454 return rs6000_finish_function_arg (mode, rvec, k);
10456 else if (TARGET_ALTIVEC_ABI
10457 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10458 || (type && TREE_CODE (type) == VECTOR_TYPE
10459 && int_size_in_bytes (type) == 16)))
10461 if (named || abi == ABI_V4)
10462 return NULL_RTX;
10463 else
10465 /* Vector parameters to varargs functions under AIX or Darwin
10466 get passed in memory and possibly also in GPRs. */
10467 int align, align_words, n_words;
10468 machine_mode part_mode;
10470 /* Vector parameters must be 16-byte aligned. In 32-bit
10471 mode this means we need to take into account the offset
10472 to the parameter save area. In 64-bit mode, they just
10473 have to start on an even word, since the parameter save
10474 area is 16-byte aligned. */
10475 if (TARGET_32BIT)
10476 align = -(rs6000_parm_offset () + cum->words) & 3;
10477 else
10478 align = cum->words & 1;
10479 align_words = cum->words + align;
10481 /* Out of registers? Memory, then. */
10482 if (align_words >= GP_ARG_NUM_REG)
10483 return NULL_RTX;
10485 if (TARGET_32BIT && TARGET_POWERPC64)
10486 return rs6000_mixed_function_arg (mode, type, align_words);
10488 /* The vector value goes in GPRs. Only the part of the
10489 value in GPRs is reported here. */
10490 part_mode = mode;
10491 n_words = rs6000_arg_size (mode, type);
10492 if (align_words + n_words > GP_ARG_NUM_REG)
10493 /* Fortunately, there are only two possibilities, the value
10494 is either wholly in GPRs or half in GPRs and half not. */
10495 part_mode = DImode;
10497 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10500 else if (TARGET_SPE_ABI && TARGET_SPE
10501 && (SPE_VECTOR_MODE (mode)
10502 || (TARGET_E500_DOUBLE && (mode == DFmode
10503 || mode == DCmode
10504 || mode == TFmode
10505 || mode == TCmode))))
10506 return rs6000_spe_function_arg (cum, mode, type);
10508 else if (abi == ABI_V4)
10510 if (TARGET_HARD_FLOAT && TARGET_FPRS
10511 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10512 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10513 || (mode == TFmode && !TARGET_IEEEQUAD)
10514 || mode == SDmode || mode == DDmode || mode == TDmode))
10516 /* _Decimal128 must use an even/odd register pair. This assumes
10517 that the register number is odd when fregno is odd. */
10518 if (mode == TDmode && (cum->fregno % 2) == 1)
10519 cum->fregno++;
10521 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10522 <= FP_ARG_V4_MAX_REG)
10523 return gen_rtx_REG (mode, cum->fregno);
10524 else
10525 return NULL_RTX;
10527 else
10529 int n_words = rs6000_arg_size (mode, type);
10530 int gregno = cum->sysv_gregno;
10532 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10533 (r7,r8) or (r9,r10). As does any other 2 word item such
10534 as complex int due to a historical mistake. */
10535 if (n_words == 2)
10536 gregno += (1 - gregno) & 1;
10538 /* Multi-reg args are not split between registers and stack. */
10539 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10540 return NULL_RTX;
10542 if (TARGET_32BIT && TARGET_POWERPC64)
10543 return rs6000_mixed_function_arg (mode, type,
10544 gregno - GP_ARG_MIN_REG);
10545 return gen_rtx_REG (mode, gregno);
10548 else
10550 int align_words = rs6000_parm_start (mode, type, cum->words);
10552 /* _Decimal128 must be passed in an even/odd float register pair.
10553 This assumes that the register number is odd when fregno is odd. */
10554 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10555 cum->fregno++;
10557 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10559 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10560 rtx r, off;
10561 int i, k = 0;
10562 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10563 int fpr_words;
10565 /* Do we also need to pass this argument in the parameter
10566 save area? */
10567 if (type && (cum->nargs_prototype <= 0
10568 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10569 && TARGET_XL_COMPAT
10570 && align_words >= GP_ARG_NUM_REG)))
10571 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10573 /* Describe where this argument goes in the fprs. */
10574 for (i = 0; i < n_elts
10575 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10577 /* Check if the argument is split over registers and memory.
10578 This can only ever happen for long double or _Decimal128;
10579 complex types are handled via split_complex_arg. */
10580 machine_mode fmode = elt_mode;
10581 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10583 gcc_assert (fmode == TFmode || fmode == TDmode);
10584 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10587 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10588 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10589 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10592 /* If there were not enough FPRs to hold the argument, the rest
10593 usually goes into memory. However, if the current position
10594 is still within the register parameter area, a portion may
10595 actually have to go into GPRs.
10597 Note that it may happen that the portion of the argument
10598 passed in the first "half" of the first GPR was already
10599 passed in the last FPR as well.
10601 For unnamed arguments, we already set up GPRs to cover the
10602 whole argument in rs6000_psave_function_arg, so there is
10603 nothing further to do at this point. */
10604 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10605 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10606 && cum->nargs_prototype > 0)
10608 static bool warned;
10610 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10611 int n_words = rs6000_arg_size (mode, type);
10613 align_words += fpr_words;
10614 n_words -= fpr_words;
10618 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10619 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10620 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10622 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10624 if (!warned && warn_psabi)
10626 warned = true;
10627 inform (input_location,
10628 "the ABI of passing homogeneous float aggregates"
10629 " has changed in GCC 5");
10633 return rs6000_finish_function_arg (mode, rvec, k);
10635 else if (align_words < GP_ARG_NUM_REG)
10637 if (TARGET_32BIT && TARGET_POWERPC64)
10638 return rs6000_mixed_function_arg (mode, type, align_words);
10640 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10642 else
10643 return NULL_RTX;
10647 /* For an arg passed partly in registers and partly in memory, this is
10648 the number of bytes passed in registers. For args passed entirely in
10649 registers or entirely in memory, zero. When an arg is described by a
10650 PARALLEL, perhaps using more than one register type, this function
10651 returns the number of bytes used by the first element of the PARALLEL. */
10653 static int
10654 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10655 tree type, bool named)
10657 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10658 bool passed_in_gprs = true;
10659 int ret = 0;
10660 int align_words;
10661 machine_mode elt_mode;
10662 int n_elts;
10664 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10666 if (DEFAULT_ABI == ABI_V4)
10667 return 0;
10669 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10671 /* If we are passing this arg in the fixed parameter save area
10672 (gprs or memory) as well as VRs, we do not use the partial
10673 bytes mechanism; instead, rs6000_function_arg will return a
10674 PARALLEL including a memory element as necessary. */
10675 if (TARGET_64BIT && ! cum->prototype)
10676 return 0;
10678 /* Otherwise, we pass in VRs only. Check for partial copies. */
10679 passed_in_gprs = false;
10680 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10681 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10684 /* In this complicated case we just disable the partial_nregs code. */
10685 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10686 return 0;
10688 align_words = rs6000_parm_start (mode, type, cum->words);
10690 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10692 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10694 /* If we are passing this arg in the fixed parameter save area
10695 (gprs or memory) as well as FPRs, we do not use the partial
10696 bytes mechanism; instead, rs6000_function_arg will return a
10697 PARALLEL including a memory element as necessary. */
10698 if (type
10699 && (cum->nargs_prototype <= 0
10700 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10701 && TARGET_XL_COMPAT
10702 && align_words >= GP_ARG_NUM_REG)))
10703 return 0;
10705 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10706 passed_in_gprs = false;
10707 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10709 /* Compute number of bytes / words passed in FPRs. If there
10710 is still space available in the register parameter area
10711 *after* that amount, a part of the argument will be passed
10712 in GPRs. In that case, the total amount passed in any
10713 registers is equal to the amount that would have been passed
10714 in GPRs if everything were passed there, so we fall back to
10715 the GPR code below to compute the appropriate value. */
10716 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10717 * MIN (8, GET_MODE_SIZE (elt_mode)));
10718 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10720 if (align_words + fpr_words < GP_ARG_NUM_REG)
10721 passed_in_gprs = true;
10722 else
10723 ret = fpr;
10727 if (passed_in_gprs
10728 && align_words < GP_ARG_NUM_REG
10729 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10730 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10732 if (ret != 0 && TARGET_DEBUG_ARG)
10733 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10735 return ret;
10738 /* A C expression that indicates when an argument must be passed by
10739 reference. If nonzero for an argument, a copy of that argument is
10740 made in memory and a pointer to the argument is passed instead of
10741 the argument itself. The pointer is passed in whatever way is
10742 appropriate for passing a pointer to that type.
10744 Under V.4, aggregates and long double are passed by reference.
10746 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10747 reference unless the AltiVec vector extension ABI is in force.
10749 As an extension to all ABIs, variable sized types are passed by
10750 reference. */
10752 static bool
10753 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10754 machine_mode mode, const_tree type,
10755 bool named ATTRIBUTE_UNUSED)
10757 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10759 if (TARGET_DEBUG_ARG)
10760 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10761 return 1;
10764 if (!type)
10765 return 0;
10767 #if HAVE_UPC_PTS_STRUCT_REP
10768 if (DEFAULT_ABI == ABI_V4 && POINTER_TYPE_P (type)
10769 && upc_shared_type_p (TREE_TYPE (type)))
10771 if (TARGET_DEBUG_ARG)
10772 fprintf (stderr,
10773 "function_arg_pass_by_reference: V4 UPC ptr to shared\n");
10774 return 1;
10776 #endif
10778 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10780 if (TARGET_DEBUG_ARG)
10781 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10782 return 1;
10785 if (int_size_in_bytes (type) < 0)
10787 if (TARGET_DEBUG_ARG)
10788 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10789 return 1;
10792 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10793 modes only exist for GCC vector types if -maltivec. */
10794 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10796 if (TARGET_DEBUG_ARG)
10797 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10798 return 1;
10801 /* Pass synthetic vectors in memory. */
10802 if (TREE_CODE (type) == VECTOR_TYPE
10803 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10805 static bool warned_for_pass_big_vectors = false;
10806 if (TARGET_DEBUG_ARG)
10807 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10808 if (!warned_for_pass_big_vectors)
10810 warning (0, "GCC vector passed by reference: "
10811 "non-standard ABI extension with no compatibility guarantee");
10812 warned_for_pass_big_vectors = true;
10814 return 1;
10817 return 0;
10820 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10821 already processes. Return true if the parameter must be passed
10822 (fully or partially) on the stack. */
10824 static bool
10825 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10827 machine_mode mode;
10828 int unsignedp;
10829 rtx entry_parm;
10831 /* Catch errors. */
10832 if (type == NULL || type == error_mark_node)
10833 return true;
10835 /* Handle types with no storage requirement. */
10836 if (TYPE_MODE (type) == VOIDmode)
10837 return false;
10839 /* Handle complex types. */
10840 if (TREE_CODE (type) == COMPLEX_TYPE)
10841 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10842 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10844 /* Handle transparent aggregates. */
10845 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10846 && TYPE_TRANSPARENT_AGGR (type))
10847 type = TREE_TYPE (first_field (type));
10849 /* See if this arg was passed by invisible reference. */
10850 if (pass_by_reference (get_cumulative_args (args_so_far),
10851 TYPE_MODE (type), type, true))
10852 type = build_pointer_type (type);
10854 /* Find mode as it is passed by the ABI. */
10855 unsignedp = TYPE_UNSIGNED (type);
10856 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10858 /* If we must pass in stack, we need a stack. */
10859 if (rs6000_must_pass_in_stack (mode, type))
10860 return true;
10862 /* If there is no incoming register, we need a stack. */
10863 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10864 if (entry_parm == NULL)
10865 return true;
10867 /* Likewise if we need to pass both in registers and on the stack. */
10868 if (GET_CODE (entry_parm) == PARALLEL
10869 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10870 return true;
10872 /* Also true if we're partially in registers and partially not. */
10873 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10874 return true;
10876 /* Update info on where next arg arrives in registers. */
10877 rs6000_function_arg_advance (args_so_far, mode, type, true);
10878 return false;
10881 /* Return true if FUN has no prototype, has a variable argument
10882 list, or passes any parameter in memory. */
10884 static bool
10885 rs6000_function_parms_need_stack (tree fun, bool incoming)
10887 tree fntype, result;
10888 CUMULATIVE_ARGS args_so_far_v;
10889 cumulative_args_t args_so_far;
10891 if (!fun)
10892 /* Must be a libcall, all of which only use reg parms. */
10893 return false;
10895 fntype = fun;
10896 if (!TYPE_P (fun))
10897 fntype = TREE_TYPE (fun);
10899 /* Varargs functions need the parameter save area. */
10900 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10901 return true;
10903 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10904 args_so_far = pack_cumulative_args (&args_so_far_v);
10906 /* When incoming, we will have been passed the function decl.
10907 It is necessary to use the decl to handle K&R style functions,
10908 where TYPE_ARG_TYPES may not be available. */
10909 if (incoming)
10911 gcc_assert (DECL_P (fun));
10912 result = DECL_RESULT (fun);
10914 else
10915 result = TREE_TYPE (fntype);
10917 if (result && aggregate_value_p (result, fntype))
10919 if (!TYPE_P (result))
10920 result = TREE_TYPE (result);
10921 result = build_pointer_type (result);
10922 rs6000_parm_needs_stack (args_so_far, result);
10925 if (incoming)
10927 tree parm;
10929 for (parm = DECL_ARGUMENTS (fun);
10930 parm && parm != void_list_node;
10931 parm = TREE_CHAIN (parm))
10932 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10933 return true;
10935 else
10937 function_args_iterator args_iter;
10938 tree arg_type;
10940 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10941 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10942 return true;
10945 return false;
10948 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10949 usually a constant depending on the ABI. However, in the ELFv2 ABI
10950 the register parameter area is optional when calling a function that
10951 has a prototype is scope, has no variable argument list, and passes
10952 all parameters in registers. */
10955 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10957 int reg_parm_stack_space;
10959 switch (DEFAULT_ABI)
10961 default:
10962 reg_parm_stack_space = 0;
10963 break;
10965 case ABI_AIX:
10966 case ABI_DARWIN:
10967 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10968 break;
10970 case ABI_ELFv2:
10971 /* ??? Recomputing this every time is a bit expensive. Is there
10972 a place to cache this information? */
10973 if (rs6000_function_parms_need_stack (fun, incoming))
10974 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10975 else
10976 reg_parm_stack_space = 0;
10977 break;
10980 return reg_parm_stack_space;
10983 static void
10984 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10986 int i;
10987 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10989 if (nregs == 0)
10990 return;
10992 for (i = 0; i < nregs; i++)
10994 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10995 if (reload_completed)
10997 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10998 tem = NULL_RTX;
10999 else
11000 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11001 i * GET_MODE_SIZE (reg_mode));
11003 else
11004 tem = replace_equiv_address (tem, XEXP (tem, 0));
11006 gcc_assert (tem);
11008 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11012 /* Perform any needed actions needed for a function that is receiving a
11013 variable number of arguments.
11015 CUM is as above.
11017 MODE and TYPE are the mode and type of the current parameter.
11019 PRETEND_SIZE is a variable that should be set to the amount of stack
11020 that must be pushed by the prolog to pretend that our caller pushed
11023 Normally, this macro will push all remaining incoming registers on the
11024 stack and set PRETEND_SIZE to the length of the registers pushed. */
11026 static void
11027 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11028 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11029 int no_rtl)
11031 CUMULATIVE_ARGS next_cum;
11032 int reg_size = TARGET_32BIT ? 4 : 8;
11033 rtx save_area = NULL_RTX, mem;
11034 int first_reg_offset;
11035 alias_set_type set;
11037 /* Skip the last named argument. */
11038 next_cum = *get_cumulative_args (cum);
11039 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11041 if (DEFAULT_ABI == ABI_V4)
11043 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11045 if (! no_rtl)
11047 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11048 HOST_WIDE_INT offset = 0;
11050 /* Try to optimize the size of the varargs save area.
11051 The ABI requires that ap.reg_save_area is doubleword
11052 aligned, but we don't need to allocate space for all
11053 the bytes, only those to which we actually will save
11054 anything. */
11055 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11056 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11057 if (TARGET_HARD_FLOAT && TARGET_FPRS
11058 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11059 && cfun->va_list_fpr_size)
11061 if (gpr_reg_num)
11062 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11063 * UNITS_PER_FP_WORD;
11064 if (cfun->va_list_fpr_size
11065 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11066 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11067 else
11068 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11069 * UNITS_PER_FP_WORD;
11071 if (gpr_reg_num)
11073 offset = -((first_reg_offset * reg_size) & ~7);
11074 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11076 gpr_reg_num = cfun->va_list_gpr_size;
11077 if (reg_size == 4 && (first_reg_offset & 1))
11078 gpr_reg_num++;
11080 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11082 else if (fpr_size)
11083 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11084 * UNITS_PER_FP_WORD
11085 - (int) (GP_ARG_NUM_REG * reg_size);
11087 if (gpr_size + fpr_size)
11089 rtx reg_save_area
11090 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11091 gcc_assert (GET_CODE (reg_save_area) == MEM);
11092 reg_save_area = XEXP (reg_save_area, 0);
11093 if (GET_CODE (reg_save_area) == PLUS)
11095 gcc_assert (XEXP (reg_save_area, 0)
11096 == virtual_stack_vars_rtx);
11097 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11098 offset += INTVAL (XEXP (reg_save_area, 1));
11100 else
11101 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11104 cfun->machine->varargs_save_offset = offset;
11105 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11108 else
11110 first_reg_offset = next_cum.words;
11111 save_area = virtual_incoming_args_rtx;
11113 if (targetm.calls.must_pass_in_stack (mode, type))
11114 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11117 set = get_varargs_alias_set ();
11118 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11119 && cfun->va_list_gpr_size)
11121 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11123 if (va_list_gpr_counter_field)
11124 /* V4 va_list_gpr_size counts number of registers needed. */
11125 n_gpr = cfun->va_list_gpr_size;
11126 else
11127 /* char * va_list instead counts number of bytes needed. */
11128 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11130 if (nregs > n_gpr)
11131 nregs = n_gpr;
11133 mem = gen_rtx_MEM (BLKmode,
11134 plus_constant (Pmode, save_area,
11135 first_reg_offset * reg_size));
11136 MEM_NOTRAP_P (mem) = 1;
11137 set_mem_alias_set (mem, set);
11138 set_mem_align (mem, BITS_PER_WORD);
11140 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11141 nregs);
11144 /* Save FP registers if needed. */
11145 if (DEFAULT_ABI == ABI_V4
11146 && TARGET_HARD_FLOAT && TARGET_FPRS
11147 && ! no_rtl
11148 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11149 && cfun->va_list_fpr_size)
11151 int fregno = next_cum.fregno, nregs;
11152 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11153 rtx lab = gen_label_rtx ();
11154 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11155 * UNITS_PER_FP_WORD);
11157 emit_jump_insn
11158 (gen_rtx_SET (VOIDmode,
11159 pc_rtx,
11160 gen_rtx_IF_THEN_ELSE (VOIDmode,
11161 gen_rtx_NE (VOIDmode, cr1,
11162 const0_rtx),
11163 gen_rtx_LABEL_REF (VOIDmode, lab),
11164 pc_rtx)));
11166 for (nregs = 0;
11167 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11168 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11170 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11171 ? DFmode : SFmode,
11172 plus_constant (Pmode, save_area, off));
11173 MEM_NOTRAP_P (mem) = 1;
11174 set_mem_alias_set (mem, set);
11175 set_mem_align (mem, GET_MODE_ALIGNMENT (
11176 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11177 ? DFmode : SFmode));
11178 emit_move_insn (mem, gen_rtx_REG (
11179 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11180 ? DFmode : SFmode, fregno));
11183 emit_label (lab);
11187 /* Create the va_list data type. */
11189 static tree
11190 rs6000_build_builtin_va_list (void)
11192 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11194 /* For AIX, prefer 'char *' because that's what the system
11195 header files like. */
11196 if (DEFAULT_ABI != ABI_V4)
11197 return build_pointer_type (char_type_node);
11199 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11200 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11201 get_identifier ("__va_list_tag"), record);
11203 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11204 unsigned_char_type_node);
11205 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11206 unsigned_char_type_node);
11207 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11208 every user file. */
11209 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11210 get_identifier ("reserved"), short_unsigned_type_node);
11211 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11212 get_identifier ("overflow_arg_area"),
11213 ptr_type_node);
11214 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11215 get_identifier ("reg_save_area"),
11216 ptr_type_node);
11218 va_list_gpr_counter_field = f_gpr;
11219 va_list_fpr_counter_field = f_fpr;
11221 DECL_FIELD_CONTEXT (f_gpr) = record;
11222 DECL_FIELD_CONTEXT (f_fpr) = record;
11223 DECL_FIELD_CONTEXT (f_res) = record;
11224 DECL_FIELD_CONTEXT (f_ovf) = record;
11225 DECL_FIELD_CONTEXT (f_sav) = record;
11227 TYPE_STUB_DECL (record) = type_decl;
11228 TYPE_NAME (record) = type_decl;
11229 TYPE_FIELDS (record) = f_gpr;
11230 DECL_CHAIN (f_gpr) = f_fpr;
11231 DECL_CHAIN (f_fpr) = f_res;
11232 DECL_CHAIN (f_res) = f_ovf;
11233 DECL_CHAIN (f_ovf) = f_sav;
11235 layout_type (record);
11237 /* The correct type is an array type of one element. */
11238 return build_array_type (record, build_index_type (size_zero_node));
11241 /* Implement va_start. */
11243 static void
11244 rs6000_va_start (tree valist, rtx nextarg)
11246 HOST_WIDE_INT words, n_gpr, n_fpr;
11247 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11248 tree gpr, fpr, ovf, sav, t;
11250 /* Only SVR4 needs something special. */
11251 if (DEFAULT_ABI != ABI_V4)
11253 std_expand_builtin_va_start (valist, nextarg);
11254 return;
11257 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11258 f_fpr = DECL_CHAIN (f_gpr);
11259 f_res = DECL_CHAIN (f_fpr);
11260 f_ovf = DECL_CHAIN (f_res);
11261 f_sav = DECL_CHAIN (f_ovf);
11263 valist = build_simple_mem_ref (valist);
11264 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11265 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11266 f_fpr, NULL_TREE);
11267 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11268 f_ovf, NULL_TREE);
11269 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11270 f_sav, NULL_TREE);
11272 /* Count number of gp and fp argument registers used. */
11273 words = crtl->args.info.words;
11274 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11275 GP_ARG_NUM_REG);
11276 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11277 FP_ARG_NUM_REG);
11279 if (TARGET_DEBUG_ARG)
11280 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11281 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11282 words, n_gpr, n_fpr);
11284 if (cfun->va_list_gpr_size)
11286 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11287 build_int_cst (NULL_TREE, n_gpr));
11288 TREE_SIDE_EFFECTS (t) = 1;
11289 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11292 if (cfun->va_list_fpr_size)
11294 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11295 build_int_cst (NULL_TREE, n_fpr));
11296 TREE_SIDE_EFFECTS (t) = 1;
11297 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11299 #ifdef HAVE_AS_GNU_ATTRIBUTE
11300 if (call_ABI_of_interest (cfun->decl))
11301 rs6000_passes_float = true;
11302 #endif
11305 /* Find the overflow area. */
11306 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11307 if (words != 0)
11308 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11309 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11310 TREE_SIDE_EFFECTS (t) = 1;
11311 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11313 /* If there were no va_arg invocations, don't set up the register
11314 save area. */
11315 if (!cfun->va_list_gpr_size
11316 && !cfun->va_list_fpr_size
11317 && n_gpr < GP_ARG_NUM_REG
11318 && n_fpr < FP_ARG_V4_MAX_REG)
11319 return;
11321 /* Find the register save area. */
11322 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11323 if (cfun->machine->varargs_save_offset)
11324 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11325 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11326 TREE_SIDE_EFFECTS (t) = 1;
11327 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11330 /* Implement va_arg. */
11332 static tree
11333 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11334 gimple_seq *post_p)
11336 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11337 tree gpr, fpr, ovf, sav, reg, t, u;
11338 int size, rsize, n_reg, sav_ofs, sav_scale;
11339 tree lab_false, lab_over, addr;
11340 int align;
11341 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11342 int regalign = 0;
11343 gimple stmt;
11345 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11347 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11348 return build_va_arg_indirect_ref (t);
11351 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11352 earlier version of gcc, with the property that it always applied alignment
11353 adjustments to the va-args (even for zero-sized types). The cheapest way
11354 to deal with this is to replicate the effect of the part of
11355 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11356 of relevance.
11357 We don't need to check for pass-by-reference because of the test above.
11358 We can return a simplifed answer, since we know there's no offset to add. */
11360 if (((TARGET_MACHO
11361 && rs6000_darwin64_abi)
11362 || DEFAULT_ABI == ABI_ELFv2
11363 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11364 && integer_zerop (TYPE_SIZE (type)))
11366 unsigned HOST_WIDE_INT align, boundary;
11367 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11368 align = PARM_BOUNDARY / BITS_PER_UNIT;
11369 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11370 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11371 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11372 boundary /= BITS_PER_UNIT;
11373 if (boundary > align)
11375 tree t ;
11376 /* This updates arg ptr by the amount that would be necessary
11377 to align the zero-sized (but not zero-alignment) item. */
11378 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11379 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11380 gimplify_and_add (t, pre_p);
11382 t = fold_convert (sizetype, valist_tmp);
11383 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11384 fold_convert (TREE_TYPE (valist),
11385 fold_build2 (BIT_AND_EXPR, sizetype, t,
11386 size_int (-boundary))));
11387 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11388 gimplify_and_add (t, pre_p);
11390 /* Since it is zero-sized there's no increment for the item itself. */
11391 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11392 return build_va_arg_indirect_ref (valist_tmp);
11395 if (DEFAULT_ABI != ABI_V4)
11397 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11399 tree elem_type = TREE_TYPE (type);
11400 machine_mode elem_mode = TYPE_MODE (elem_type);
11401 int elem_size = GET_MODE_SIZE (elem_mode);
11403 if (elem_size < UNITS_PER_WORD)
11405 tree real_part, imag_part;
11406 gimple_seq post = NULL;
11408 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11409 &post);
11410 /* Copy the value into a temporary, lest the formal temporary
11411 be reused out from under us. */
11412 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11413 gimple_seq_add_seq (pre_p, post);
11415 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11416 post_p);
11418 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11422 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11425 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11426 f_fpr = DECL_CHAIN (f_gpr);
11427 f_res = DECL_CHAIN (f_fpr);
11428 f_ovf = DECL_CHAIN (f_res);
11429 f_sav = DECL_CHAIN (f_ovf);
11431 valist = build_va_arg_indirect_ref (valist);
11432 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11433 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11434 f_fpr, NULL_TREE);
11435 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11436 f_ovf, NULL_TREE);
11437 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11438 f_sav, NULL_TREE);
11440 size = int_size_in_bytes (type);
11441 rsize = (size + 3) / 4;
11442 align = 1;
11444 if (TARGET_HARD_FLOAT && TARGET_FPRS
11445 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11446 || (TARGET_DOUBLE_FLOAT
11447 && (TYPE_MODE (type) == DFmode
11448 || TYPE_MODE (type) == TFmode
11449 || TYPE_MODE (type) == SDmode
11450 || TYPE_MODE (type) == DDmode
11451 || TYPE_MODE (type) == TDmode))))
11453 /* FP args go in FP registers, if present. */
11454 reg = fpr;
11455 n_reg = (size + 7) / 8;
11456 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11457 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11458 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11459 align = 8;
11461 else
11463 /* Otherwise into GP registers. */
11464 reg = gpr;
11465 n_reg = rsize;
11466 sav_ofs = 0;
11467 sav_scale = 4;
11468 if (n_reg == 2)
11469 align = 8;
11472 /* Pull the value out of the saved registers.... */
11474 lab_over = NULL;
11475 addr = create_tmp_var (ptr_type_node, "addr");
11477 /* AltiVec vectors never go in registers when -mabi=altivec. */
11478 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11479 align = 16;
11480 else
11482 lab_false = create_artificial_label (input_location);
11483 lab_over = create_artificial_label (input_location);
11485 /* Long long and SPE vectors are aligned in the registers.
11486 As are any other 2 gpr item such as complex int due to a
11487 historical mistake. */
11488 u = reg;
11489 if (n_reg == 2 && reg == gpr)
11491 regalign = 1;
11492 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11493 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11494 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11495 unshare_expr (reg), u);
11497 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11498 reg number is 0 for f1, so we want to make it odd. */
11499 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11501 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11502 build_int_cst (TREE_TYPE (reg), 1));
11503 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11506 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11507 t = build2 (GE_EXPR, boolean_type_node, u, t);
11508 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11509 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11510 gimplify_and_add (t, pre_p);
11512 t = sav;
11513 if (sav_ofs)
11514 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11516 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11517 build_int_cst (TREE_TYPE (reg), n_reg));
11518 u = fold_convert (sizetype, u);
11519 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11520 t = fold_build_pointer_plus (t, u);
11522 /* _Decimal32 varargs are located in the second word of the 64-bit
11523 FP register for 32-bit binaries. */
11524 if (TARGET_32BIT
11525 && TARGET_HARD_FLOAT && TARGET_FPRS
11526 && TYPE_MODE (type) == SDmode)
11527 t = fold_build_pointer_plus_hwi (t, size);
11529 gimplify_assign (addr, t, pre_p);
11531 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11533 stmt = gimple_build_label (lab_false);
11534 gimple_seq_add_stmt (pre_p, stmt);
11536 if ((n_reg == 2 && !regalign) || n_reg > 2)
11538 /* Ensure that we don't find any more args in regs.
11539 Alignment has taken care of for special cases. */
11540 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11544 /* ... otherwise out of the overflow area. */
11546 /* Care for on-stack alignment if needed. */
11547 t = ovf;
11548 if (align != 1)
11550 t = fold_build_pointer_plus_hwi (t, align - 1);
11551 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11552 build_int_cst (TREE_TYPE (t), -align));
11554 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11556 gimplify_assign (unshare_expr (addr), t, pre_p);
11558 t = fold_build_pointer_plus_hwi (t, size);
11559 gimplify_assign (unshare_expr (ovf), t, pre_p);
11561 if (lab_over)
11563 stmt = gimple_build_label (lab_over);
11564 gimple_seq_add_stmt (pre_p, stmt);
11567 if (STRICT_ALIGNMENT
11568 && (TYPE_ALIGN (type)
11569 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11571 /* The value (of type complex double, for example) may not be
11572 aligned in memory in the saved registers, so copy via a
11573 temporary. (This is the same code as used for SPARC.) */
11574 tree tmp = create_tmp_var (type, "va_arg_tmp");
11575 tree dest_addr = build_fold_addr_expr (tmp);
11577 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11578 3, dest_addr, addr, size_int (rsize * 4));
11580 gimplify_and_add (copy, pre_p);
11581 addr = dest_addr;
11584 addr = fold_convert (ptrtype, addr);
11585 return build_va_arg_indirect_ref (addr);
11588 /* Builtins. */
11590 static void
11591 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11593 tree t;
11594 unsigned classify = rs6000_builtin_info[(int)code].attr;
11595 const char *attr_string = "";
11597 gcc_assert (name != NULL);
11598 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11600 if (rs6000_builtin_decls[(int)code])
11601 fatal_error (input_location,
11602 "internal error: builtin function %s already processed", name);
11604 rs6000_builtin_decls[(int)code] = t =
11605 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11607 /* Set any special attributes. */
11608 if ((classify & RS6000_BTC_CONST) != 0)
11610 /* const function, function only depends on the inputs. */
11611 TREE_READONLY (t) = 1;
11612 TREE_NOTHROW (t) = 1;
11613 attr_string = ", pure";
11615 else if ((classify & RS6000_BTC_PURE) != 0)
11617 /* pure function, function can read global memory, but does not set any
11618 external state. */
11619 DECL_PURE_P (t) = 1;
11620 TREE_NOTHROW (t) = 1;
11621 attr_string = ", const";
11623 else if ((classify & RS6000_BTC_FP) != 0)
11625 /* Function is a math function. If rounding mode is on, then treat the
11626 function as not reading global memory, but it can have arbitrary side
11627 effects. If it is off, then assume the function is a const function.
11628 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11629 builtin-attribute.def that is used for the math functions. */
11630 TREE_NOTHROW (t) = 1;
11631 if (flag_rounding_math)
11633 DECL_PURE_P (t) = 1;
11634 DECL_IS_NOVOPS (t) = 1;
11635 attr_string = ", fp, pure";
11637 else
11639 TREE_READONLY (t) = 1;
11640 attr_string = ", fp, const";
11643 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11644 gcc_unreachable ();
11646 if (TARGET_DEBUG_BUILTIN)
11647 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11648 (int)code, name, attr_string);
11651 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11653 #undef RS6000_BUILTIN_1
11654 #undef RS6000_BUILTIN_2
11655 #undef RS6000_BUILTIN_3
11656 #undef RS6000_BUILTIN_A
11657 #undef RS6000_BUILTIN_D
11658 #undef RS6000_BUILTIN_E
11659 #undef RS6000_BUILTIN_H
11660 #undef RS6000_BUILTIN_P
11661 #undef RS6000_BUILTIN_Q
11662 #undef RS6000_BUILTIN_S
11663 #undef RS6000_BUILTIN_X
11665 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11666 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11667 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11668 { MASK, ICODE, NAME, ENUM },
11670 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11671 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11672 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11673 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11674 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11675 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11676 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11677 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11679 static const struct builtin_description bdesc_3arg[] =
11681 #include "rs6000-builtin.def"
11684 /* DST operations: void foo (void *, const int, const char). */
11686 #undef RS6000_BUILTIN_1
11687 #undef RS6000_BUILTIN_2
11688 #undef RS6000_BUILTIN_3
11689 #undef RS6000_BUILTIN_A
11690 #undef RS6000_BUILTIN_D
11691 #undef RS6000_BUILTIN_E
11692 #undef RS6000_BUILTIN_H
11693 #undef RS6000_BUILTIN_P
11694 #undef RS6000_BUILTIN_Q
11695 #undef RS6000_BUILTIN_S
11696 #undef RS6000_BUILTIN_X
11698 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11699 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11700 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11701 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11702 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11703 { MASK, ICODE, NAME, ENUM },
11705 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11706 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11707 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11708 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11709 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11710 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11712 static const struct builtin_description bdesc_dst[] =
11714 #include "rs6000-builtin.def"
11717 /* Simple binary operations: VECc = foo (VECa, VECb). */
11719 #undef RS6000_BUILTIN_1
11720 #undef RS6000_BUILTIN_2
11721 #undef RS6000_BUILTIN_3
11722 #undef RS6000_BUILTIN_A
11723 #undef RS6000_BUILTIN_D
11724 #undef RS6000_BUILTIN_E
11725 #undef RS6000_BUILTIN_H
11726 #undef RS6000_BUILTIN_P
11727 #undef RS6000_BUILTIN_Q
11728 #undef RS6000_BUILTIN_S
11729 #undef RS6000_BUILTIN_X
11731 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11732 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11733 { MASK, ICODE, NAME, ENUM },
11735 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11736 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11737 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11738 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11739 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11740 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11741 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11742 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11743 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11745 static const struct builtin_description bdesc_2arg[] =
11747 #include "rs6000-builtin.def"
11750 #undef RS6000_BUILTIN_1
11751 #undef RS6000_BUILTIN_2
11752 #undef RS6000_BUILTIN_3
11753 #undef RS6000_BUILTIN_A
11754 #undef RS6000_BUILTIN_D
11755 #undef RS6000_BUILTIN_E
11756 #undef RS6000_BUILTIN_H
11757 #undef RS6000_BUILTIN_P
11758 #undef RS6000_BUILTIN_Q
11759 #undef RS6000_BUILTIN_S
11760 #undef RS6000_BUILTIN_X
11762 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11763 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11764 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11765 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11766 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11767 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11768 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11769 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11770 { MASK, ICODE, NAME, ENUM },
11772 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11773 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11774 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11776 /* AltiVec predicates. */
11778 static const struct builtin_description bdesc_altivec_preds[] =
11780 #include "rs6000-builtin.def"
11783 /* SPE predicates. */
11784 #undef RS6000_BUILTIN_1
11785 #undef RS6000_BUILTIN_2
11786 #undef RS6000_BUILTIN_3
11787 #undef RS6000_BUILTIN_A
11788 #undef RS6000_BUILTIN_D
11789 #undef RS6000_BUILTIN_E
11790 #undef RS6000_BUILTIN_H
11791 #undef RS6000_BUILTIN_P
11792 #undef RS6000_BUILTIN_Q
11793 #undef RS6000_BUILTIN_S
11794 #undef RS6000_BUILTIN_X
11796 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11797 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11798 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11799 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11800 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11801 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11802 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11803 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11804 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11805 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11806 { MASK, ICODE, NAME, ENUM },
11808 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11810 static const struct builtin_description bdesc_spe_predicates[] =
11812 #include "rs6000-builtin.def"
11815 /* SPE evsel predicates. */
11816 #undef RS6000_BUILTIN_1
11817 #undef RS6000_BUILTIN_2
11818 #undef RS6000_BUILTIN_3
11819 #undef RS6000_BUILTIN_A
11820 #undef RS6000_BUILTIN_D
11821 #undef RS6000_BUILTIN_E
11822 #undef RS6000_BUILTIN_H
11823 #undef RS6000_BUILTIN_P
11824 #undef RS6000_BUILTIN_Q
11825 #undef RS6000_BUILTIN_S
11826 #undef RS6000_BUILTIN_X
11828 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11829 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11830 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11831 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11832 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11833 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11834 { MASK, ICODE, NAME, ENUM },
11836 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11837 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11838 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11839 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11840 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11842 static const struct builtin_description bdesc_spe_evsel[] =
11844 #include "rs6000-builtin.def"
11847 /* PAIRED predicates. */
11848 #undef RS6000_BUILTIN_1
11849 #undef RS6000_BUILTIN_2
11850 #undef RS6000_BUILTIN_3
11851 #undef RS6000_BUILTIN_A
11852 #undef RS6000_BUILTIN_D
11853 #undef RS6000_BUILTIN_E
11854 #undef RS6000_BUILTIN_H
11855 #undef RS6000_BUILTIN_P
11856 #undef RS6000_BUILTIN_Q
11857 #undef RS6000_BUILTIN_S
11858 #undef RS6000_BUILTIN_X
11860 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11861 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11862 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11863 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11864 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11865 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11866 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11867 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11868 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11869 { MASK, ICODE, NAME, ENUM },
11871 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11872 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11874 static const struct builtin_description bdesc_paired_preds[] =
11876 #include "rs6000-builtin.def"
11879 /* ABS* operations. */
11881 #undef RS6000_BUILTIN_1
11882 #undef RS6000_BUILTIN_2
11883 #undef RS6000_BUILTIN_3
11884 #undef RS6000_BUILTIN_A
11885 #undef RS6000_BUILTIN_D
11886 #undef RS6000_BUILTIN_E
11887 #undef RS6000_BUILTIN_H
11888 #undef RS6000_BUILTIN_P
11889 #undef RS6000_BUILTIN_Q
11890 #undef RS6000_BUILTIN_S
11891 #undef RS6000_BUILTIN_X
11893 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11894 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11895 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11896 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11897 { MASK, ICODE, NAME, ENUM },
11899 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11900 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11901 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11902 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11903 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11904 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11905 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11907 static const struct builtin_description bdesc_abs[] =
11909 #include "rs6000-builtin.def"
11912 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11913 foo (VECa). */
11915 #undef RS6000_BUILTIN_1
11916 #undef RS6000_BUILTIN_2
11917 #undef RS6000_BUILTIN_3
11918 #undef RS6000_BUILTIN_A
11919 #undef RS6000_BUILTIN_D
11920 #undef RS6000_BUILTIN_E
11921 #undef RS6000_BUILTIN_H
11922 #undef RS6000_BUILTIN_P
11923 #undef RS6000_BUILTIN_Q
11924 #undef RS6000_BUILTIN_S
11925 #undef RS6000_BUILTIN_X
11927 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11928 { MASK, ICODE, NAME, ENUM },
11930 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11931 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11932 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11933 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11934 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11935 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11936 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11937 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11938 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11939 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11941 static const struct builtin_description bdesc_1arg[] =
11943 #include "rs6000-builtin.def"
11946 /* HTM builtins. */
11947 #undef RS6000_BUILTIN_1
11948 #undef RS6000_BUILTIN_2
11949 #undef RS6000_BUILTIN_3
11950 #undef RS6000_BUILTIN_A
11951 #undef RS6000_BUILTIN_D
11952 #undef RS6000_BUILTIN_E
11953 #undef RS6000_BUILTIN_H
11954 #undef RS6000_BUILTIN_P
11955 #undef RS6000_BUILTIN_Q
11956 #undef RS6000_BUILTIN_S
11957 #undef RS6000_BUILTIN_X
11959 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11960 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11961 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11962 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11963 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11964 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11965 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11966 { MASK, ICODE, NAME, ENUM },
11968 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11969 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11970 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11971 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11973 static const struct builtin_description bdesc_htm[] =
11975 #include "rs6000-builtin.def"
11978 #undef RS6000_BUILTIN_1
11979 #undef RS6000_BUILTIN_2
11980 #undef RS6000_BUILTIN_3
11981 #undef RS6000_BUILTIN_A
11982 #undef RS6000_BUILTIN_D
11983 #undef RS6000_BUILTIN_E
11984 #undef RS6000_BUILTIN_H
11985 #undef RS6000_BUILTIN_P
11986 #undef RS6000_BUILTIN_Q
11987 #undef RS6000_BUILTIN_S
11989 /* Return true if a builtin function is overloaded. */
11990 bool
11991 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11993 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11996 /* Expand an expression EXP that calls a builtin without arguments. */
11997 static rtx
11998 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12000 rtx pat;
12001 machine_mode tmode = insn_data[icode].operand[0].mode;
12003 if (icode == CODE_FOR_nothing)
12004 /* Builtin not supported on this processor. */
12005 return 0;
12007 if (target == 0
12008 || GET_MODE (target) != tmode
12009 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12010 target = gen_reg_rtx (tmode);
12012 pat = GEN_FCN (icode) (target);
12013 if (! pat)
12014 return 0;
12015 emit_insn (pat);
12017 return target;
12021 static rtx
12022 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12024 rtx pat;
12025 tree arg0 = CALL_EXPR_ARG (exp, 0);
12026 tree arg1 = CALL_EXPR_ARG (exp, 1);
12027 rtx op0 = expand_normal (arg0);
12028 rtx op1 = expand_normal (arg1);
12029 machine_mode mode0 = insn_data[icode].operand[0].mode;
12030 machine_mode mode1 = insn_data[icode].operand[1].mode;
12032 if (icode == CODE_FOR_nothing)
12033 /* Builtin not supported on this processor. */
12034 return 0;
12036 /* If we got invalid arguments bail out before generating bad rtl. */
12037 if (arg0 == error_mark_node || arg1 == error_mark_node)
12038 return const0_rtx;
12040 if (GET_CODE (op0) != CONST_INT
12041 || INTVAL (op0) > 255
12042 || INTVAL (op0) < 0)
12044 error ("argument 1 must be an 8-bit field value");
12045 return const0_rtx;
12048 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12049 op0 = copy_to_mode_reg (mode0, op0);
12051 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12052 op1 = copy_to_mode_reg (mode1, op1);
12054 pat = GEN_FCN (icode) (op0, op1);
12055 if (! pat)
12056 return const0_rtx;
12057 emit_insn (pat);
12059 return NULL_RTX;
12063 static rtx
12064 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12066 rtx pat;
12067 tree arg0 = CALL_EXPR_ARG (exp, 0);
12068 rtx op0 = expand_normal (arg0);
12069 machine_mode tmode = insn_data[icode].operand[0].mode;
12070 machine_mode mode0 = insn_data[icode].operand[1].mode;
12072 if (icode == CODE_FOR_nothing)
12073 /* Builtin not supported on this processor. */
12074 return 0;
12076 /* If we got invalid arguments bail out before generating bad rtl. */
12077 if (arg0 == error_mark_node)
12078 return const0_rtx;
12080 if (icode == CODE_FOR_altivec_vspltisb
12081 || icode == CODE_FOR_altivec_vspltish
12082 || icode == CODE_FOR_altivec_vspltisw
12083 || icode == CODE_FOR_spe_evsplatfi
12084 || icode == CODE_FOR_spe_evsplati)
12086 /* Only allow 5-bit *signed* literals. */
12087 if (GET_CODE (op0) != CONST_INT
12088 || INTVAL (op0) > 15
12089 || INTVAL (op0) < -16)
12091 error ("argument 1 must be a 5-bit signed literal");
12092 return const0_rtx;
12096 if (target == 0
12097 || GET_MODE (target) != tmode
12098 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12099 target = gen_reg_rtx (tmode);
12101 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12102 op0 = copy_to_mode_reg (mode0, op0);
12104 pat = GEN_FCN (icode) (target, op0);
12105 if (! pat)
12106 return 0;
12107 emit_insn (pat);
12109 return target;
12112 static rtx
12113 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12115 rtx pat, scratch1, scratch2;
12116 tree arg0 = CALL_EXPR_ARG (exp, 0);
12117 rtx op0 = expand_normal (arg0);
12118 machine_mode tmode = insn_data[icode].operand[0].mode;
12119 machine_mode mode0 = insn_data[icode].operand[1].mode;
12121 /* If we have invalid arguments, bail out before generating bad rtl. */
12122 if (arg0 == error_mark_node)
12123 return const0_rtx;
12125 if (target == 0
12126 || GET_MODE (target) != tmode
12127 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12128 target = gen_reg_rtx (tmode);
12130 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12131 op0 = copy_to_mode_reg (mode0, op0);
12133 scratch1 = gen_reg_rtx (mode0);
12134 scratch2 = gen_reg_rtx (mode0);
12136 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12137 if (! pat)
12138 return 0;
12139 emit_insn (pat);
12141 return target;
12144 static rtx
12145 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12147 rtx pat;
12148 tree arg0 = CALL_EXPR_ARG (exp, 0);
12149 tree arg1 = CALL_EXPR_ARG (exp, 1);
12150 rtx op0 = expand_normal (arg0);
12151 rtx op1 = expand_normal (arg1);
12152 machine_mode tmode = insn_data[icode].operand[0].mode;
12153 machine_mode mode0 = insn_data[icode].operand[1].mode;
12154 machine_mode mode1 = insn_data[icode].operand[2].mode;
12156 if (icode == CODE_FOR_nothing)
12157 /* Builtin not supported on this processor. */
12158 return 0;
12160 /* If we got invalid arguments bail out before generating bad rtl. */
12161 if (arg0 == error_mark_node || arg1 == error_mark_node)
12162 return const0_rtx;
12164 if (icode == CODE_FOR_altivec_vcfux
12165 || icode == CODE_FOR_altivec_vcfsx
12166 || icode == CODE_FOR_altivec_vctsxs
12167 || icode == CODE_FOR_altivec_vctuxs
12168 || icode == CODE_FOR_altivec_vspltb
12169 || icode == CODE_FOR_altivec_vsplth
12170 || icode == CODE_FOR_altivec_vspltw
12171 || icode == CODE_FOR_spe_evaddiw
12172 || icode == CODE_FOR_spe_evldd
12173 || icode == CODE_FOR_spe_evldh
12174 || icode == CODE_FOR_spe_evldw
12175 || icode == CODE_FOR_spe_evlhhesplat
12176 || icode == CODE_FOR_spe_evlhhossplat
12177 || icode == CODE_FOR_spe_evlhhousplat
12178 || icode == CODE_FOR_spe_evlwhe
12179 || icode == CODE_FOR_spe_evlwhos
12180 || icode == CODE_FOR_spe_evlwhou
12181 || icode == CODE_FOR_spe_evlwhsplat
12182 || icode == CODE_FOR_spe_evlwwsplat
12183 || icode == CODE_FOR_spe_evrlwi
12184 || icode == CODE_FOR_spe_evslwi
12185 || icode == CODE_FOR_spe_evsrwis
12186 || icode == CODE_FOR_spe_evsubifw
12187 || icode == CODE_FOR_spe_evsrwiu)
12189 /* Only allow 5-bit unsigned literals. */
12190 STRIP_NOPS (arg1);
12191 if (TREE_CODE (arg1) != INTEGER_CST
12192 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12194 error ("argument 2 must be a 5-bit unsigned literal");
12195 return const0_rtx;
12199 if (target == 0
12200 || GET_MODE (target) != tmode
12201 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12202 target = gen_reg_rtx (tmode);
12204 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12205 op0 = copy_to_mode_reg (mode0, op0);
12206 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12207 op1 = copy_to_mode_reg (mode1, op1);
12209 pat = GEN_FCN (icode) (target, op0, op1);
12210 if (! pat)
12211 return 0;
12212 emit_insn (pat);
12214 return target;
12217 static rtx
12218 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12220 rtx pat, scratch;
12221 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12222 tree arg0 = CALL_EXPR_ARG (exp, 1);
12223 tree arg1 = CALL_EXPR_ARG (exp, 2);
12224 rtx op0 = expand_normal (arg0);
12225 rtx op1 = expand_normal (arg1);
12226 machine_mode tmode = SImode;
12227 machine_mode mode0 = insn_data[icode].operand[1].mode;
12228 machine_mode mode1 = insn_data[icode].operand[2].mode;
12229 int cr6_form_int;
12231 if (TREE_CODE (cr6_form) != INTEGER_CST)
12233 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12234 return const0_rtx;
12236 else
12237 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12239 gcc_assert (mode0 == mode1);
12241 /* If we have invalid arguments, bail out before generating bad rtl. */
12242 if (arg0 == error_mark_node || arg1 == error_mark_node)
12243 return const0_rtx;
12245 if (target == 0
12246 || GET_MODE (target) != tmode
12247 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12248 target = gen_reg_rtx (tmode);
12250 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12251 op0 = copy_to_mode_reg (mode0, op0);
12252 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12253 op1 = copy_to_mode_reg (mode1, op1);
12255 scratch = gen_reg_rtx (mode0);
12257 pat = GEN_FCN (icode) (scratch, op0, op1);
12258 if (! pat)
12259 return 0;
12260 emit_insn (pat);
12262 /* The vec_any* and vec_all* predicates use the same opcodes for two
12263 different operations, but the bits in CR6 will be different
12264 depending on what information we want. So we have to play tricks
12265 with CR6 to get the right bits out.
12267 If you think this is disgusting, look at the specs for the
12268 AltiVec predicates. */
12270 switch (cr6_form_int)
12272 case 0:
12273 emit_insn (gen_cr6_test_for_zero (target));
12274 break;
12275 case 1:
12276 emit_insn (gen_cr6_test_for_zero_reverse (target));
12277 break;
12278 case 2:
12279 emit_insn (gen_cr6_test_for_lt (target));
12280 break;
12281 case 3:
12282 emit_insn (gen_cr6_test_for_lt_reverse (target));
12283 break;
12284 default:
12285 error ("argument 1 of __builtin_altivec_predicate is out of range");
12286 break;
12289 return target;
12292 static rtx
12293 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12295 rtx pat, addr;
12296 tree arg0 = CALL_EXPR_ARG (exp, 0);
12297 tree arg1 = CALL_EXPR_ARG (exp, 1);
12298 machine_mode tmode = insn_data[icode].operand[0].mode;
12299 machine_mode mode0 = Pmode;
12300 machine_mode mode1 = Pmode;
12301 rtx op0 = expand_normal (arg0);
12302 rtx op1 = expand_normal (arg1);
12304 if (icode == CODE_FOR_nothing)
12305 /* Builtin not supported on this processor. */
12306 return 0;
12308 /* If we got invalid arguments bail out before generating bad rtl. */
12309 if (arg0 == error_mark_node || arg1 == error_mark_node)
12310 return const0_rtx;
12312 if (target == 0
12313 || GET_MODE (target) != tmode
12314 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12315 target = gen_reg_rtx (tmode);
12317 op1 = copy_to_mode_reg (mode1, op1);
12319 if (op0 == const0_rtx)
12321 addr = gen_rtx_MEM (tmode, op1);
12323 else
12325 op0 = copy_to_mode_reg (mode0, op0);
12326 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12329 pat = GEN_FCN (icode) (target, addr);
12331 if (! pat)
12332 return 0;
12333 emit_insn (pat);
12335 return target;
12338 /* Return a constant vector for use as a little-endian permute control vector
12339 to reverse the order of elements of the given vector mode. */
12340 static rtx
12341 swap_selector_for_mode (machine_mode mode)
12343 /* These are little endian vectors, so their elements are reversed
12344 from what you would normally expect for a permute control vector. */
12345 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12346 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12347 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12348 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12349 unsigned int *swaparray, i;
12350 rtx perm[16];
12352 switch (mode)
12354 case V2DFmode:
12355 case V2DImode:
12356 swaparray = swap2;
12357 break;
12358 case V4SFmode:
12359 case V4SImode:
12360 swaparray = swap4;
12361 break;
12362 case V8HImode:
12363 swaparray = swap8;
12364 break;
12365 case V16QImode:
12366 swaparray = swap16;
12367 break;
12368 default:
12369 gcc_unreachable ();
12372 for (i = 0; i < 16; ++i)
12373 perm[i] = GEN_INT (swaparray[i]);
12375 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12378 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12379 with -maltivec=be specified. Issue the load followed by an element-reversing
12380 permute. */
12381 void
12382 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12384 rtx tmp = gen_reg_rtx (mode);
12385 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12386 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12387 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12388 rtx sel = swap_selector_for_mode (mode);
12389 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12391 gcc_assert (REG_P (op0));
12392 emit_insn (par);
12393 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12396 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12397 with -maltivec=be specified. Issue the store preceded by an element-reversing
12398 permute. */
12399 void
12400 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12402 rtx tmp = gen_reg_rtx (mode);
12403 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12404 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12405 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12406 rtx sel = swap_selector_for_mode (mode);
12407 rtx vperm;
12409 gcc_assert (REG_P (op1));
12410 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12411 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12412 emit_insn (par);
12415 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12416 specified. Issue the store preceded by an element-reversing permute. */
12417 void
12418 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12420 machine_mode inner_mode = GET_MODE_INNER (mode);
12421 rtx tmp = gen_reg_rtx (mode);
12422 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12423 rtx sel = swap_selector_for_mode (mode);
12424 rtx vperm;
12426 gcc_assert (REG_P (op1));
12427 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12428 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12429 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12432 static rtx
12433 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12435 rtx pat, addr;
12436 tree arg0 = CALL_EXPR_ARG (exp, 0);
12437 tree arg1 = CALL_EXPR_ARG (exp, 1);
12438 machine_mode tmode = insn_data[icode].operand[0].mode;
12439 machine_mode mode0 = Pmode;
12440 machine_mode mode1 = Pmode;
12441 rtx op0 = expand_normal (arg0);
12442 rtx op1 = expand_normal (arg1);
12444 if (icode == CODE_FOR_nothing)
12445 /* Builtin not supported on this processor. */
12446 return 0;
12448 /* If we got invalid arguments bail out before generating bad rtl. */
12449 if (arg0 == error_mark_node || arg1 == error_mark_node)
12450 return const0_rtx;
12452 if (target == 0
12453 || GET_MODE (target) != tmode
12454 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12455 target = gen_reg_rtx (tmode);
12457 op1 = copy_to_mode_reg (mode1, op1);
12459 if (op0 == const0_rtx)
12461 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12463 else
12465 op0 = copy_to_mode_reg (mode0, op0);
12466 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12469 pat = GEN_FCN (icode) (target, addr);
12471 if (! pat)
12472 return 0;
12473 emit_insn (pat);
12475 return target;
12478 static rtx
12479 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12481 tree arg0 = CALL_EXPR_ARG (exp, 0);
12482 tree arg1 = CALL_EXPR_ARG (exp, 1);
12483 tree arg2 = CALL_EXPR_ARG (exp, 2);
12484 rtx op0 = expand_normal (arg0);
12485 rtx op1 = expand_normal (arg1);
12486 rtx op2 = expand_normal (arg2);
12487 rtx pat;
12488 machine_mode mode0 = insn_data[icode].operand[0].mode;
12489 machine_mode mode1 = insn_data[icode].operand[1].mode;
12490 machine_mode mode2 = insn_data[icode].operand[2].mode;
12492 /* Invalid arguments. Bail before doing anything stoopid! */
12493 if (arg0 == error_mark_node
12494 || arg1 == error_mark_node
12495 || arg2 == error_mark_node)
12496 return const0_rtx;
12498 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12499 op0 = copy_to_mode_reg (mode2, op0);
12500 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12501 op1 = copy_to_mode_reg (mode0, op1);
12502 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12503 op2 = copy_to_mode_reg (mode1, op2);
12505 pat = GEN_FCN (icode) (op1, op2, op0);
12506 if (pat)
12507 emit_insn (pat);
12508 return NULL_RTX;
12511 static rtx
12512 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12514 tree arg0 = CALL_EXPR_ARG (exp, 0);
12515 tree arg1 = CALL_EXPR_ARG (exp, 1);
12516 tree arg2 = CALL_EXPR_ARG (exp, 2);
12517 rtx op0 = expand_normal (arg0);
12518 rtx op1 = expand_normal (arg1);
12519 rtx op2 = expand_normal (arg2);
12520 rtx pat, addr;
12521 machine_mode tmode = insn_data[icode].operand[0].mode;
12522 machine_mode mode1 = Pmode;
12523 machine_mode mode2 = Pmode;
12525 /* Invalid arguments. Bail before doing anything stoopid! */
12526 if (arg0 == error_mark_node
12527 || arg1 == error_mark_node
12528 || arg2 == error_mark_node)
12529 return const0_rtx;
12531 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12532 op0 = copy_to_mode_reg (tmode, op0);
12534 op2 = copy_to_mode_reg (mode2, op2);
12536 if (op1 == const0_rtx)
12538 addr = gen_rtx_MEM (tmode, op2);
12540 else
12542 op1 = copy_to_mode_reg (mode1, op1);
12543 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12546 pat = GEN_FCN (icode) (addr, op0);
12547 if (pat)
12548 emit_insn (pat);
12549 return NULL_RTX;
12552 static rtx
12553 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12555 tree arg0 = CALL_EXPR_ARG (exp, 0);
12556 tree arg1 = CALL_EXPR_ARG (exp, 1);
12557 tree arg2 = CALL_EXPR_ARG (exp, 2);
12558 rtx op0 = expand_normal (arg0);
12559 rtx op1 = expand_normal (arg1);
12560 rtx op2 = expand_normal (arg2);
12561 rtx pat, addr;
12562 machine_mode tmode = insn_data[icode].operand[0].mode;
12563 machine_mode smode = insn_data[icode].operand[1].mode;
12564 machine_mode mode1 = Pmode;
12565 machine_mode mode2 = Pmode;
12567 /* Invalid arguments. Bail before doing anything stoopid! */
12568 if (arg0 == error_mark_node
12569 || arg1 == error_mark_node
12570 || arg2 == error_mark_node)
12571 return const0_rtx;
12573 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12574 op0 = copy_to_mode_reg (smode, op0);
12576 op2 = copy_to_mode_reg (mode2, op2);
12578 if (op1 == const0_rtx)
12580 addr = gen_rtx_MEM (tmode, op2);
12582 else
12584 op1 = copy_to_mode_reg (mode1, op1);
12585 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12588 pat = GEN_FCN (icode) (addr, op0);
12589 if (pat)
12590 emit_insn (pat);
12591 return NULL_RTX;
12594 /* Return the appropriate SPR number associated with the given builtin. */
12595 static inline HOST_WIDE_INT
12596 htm_spr_num (enum rs6000_builtins code)
12598 if (code == HTM_BUILTIN_GET_TFHAR
12599 || code == HTM_BUILTIN_SET_TFHAR)
12600 return TFHAR_SPR;
12601 else if (code == HTM_BUILTIN_GET_TFIAR
12602 || code == HTM_BUILTIN_SET_TFIAR)
12603 return TFIAR_SPR;
12604 else if (code == HTM_BUILTIN_GET_TEXASR
12605 || code == HTM_BUILTIN_SET_TEXASR)
12606 return TEXASR_SPR;
12607 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12608 || code == HTM_BUILTIN_SET_TEXASRU);
12609 return TEXASRU_SPR;
12612 /* Return the appropriate SPR regno associated with the given builtin. */
12613 static inline HOST_WIDE_INT
12614 htm_spr_regno (enum rs6000_builtins code)
12616 if (code == HTM_BUILTIN_GET_TFHAR
12617 || code == HTM_BUILTIN_SET_TFHAR)
12618 return TFHAR_REGNO;
12619 else if (code == HTM_BUILTIN_GET_TFIAR
12620 || code == HTM_BUILTIN_SET_TFIAR)
12621 return TFIAR_REGNO;
12622 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12623 || code == HTM_BUILTIN_SET_TEXASR
12624 || code == HTM_BUILTIN_GET_TEXASRU
12625 || code == HTM_BUILTIN_SET_TEXASRU);
12626 return TEXASR_REGNO;
12629 /* Return the correct ICODE value depending on whether we are
12630 setting or reading the HTM SPRs. */
12631 static inline enum insn_code
12632 rs6000_htm_spr_icode (bool nonvoid)
12634 if (nonvoid)
12635 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12636 else
12637 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12640 /* Expand the HTM builtin in EXP and store the result in TARGET.
12641 Store true in *EXPANDEDP if we found a builtin to expand. */
12642 static rtx
12643 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12645 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12646 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12647 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12648 const struct builtin_description *d;
12649 size_t i;
12651 *expandedp = false;
12653 /* Expand the HTM builtins. */
12654 d = bdesc_htm;
12655 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12656 if (d->code == fcode)
12658 rtx op[MAX_HTM_OPERANDS], pat;
12659 int nopnds = 0;
12660 tree arg;
12661 call_expr_arg_iterator iter;
12662 unsigned attr = rs6000_builtin_info[fcode].attr;
12663 enum insn_code icode = d->icode;
12665 if (attr & RS6000_BTC_SPR)
12666 icode = rs6000_htm_spr_icode (nonvoid);
12668 if (nonvoid)
12670 machine_mode tmode = insn_data[icode].operand[0].mode;
12671 if (!target
12672 || GET_MODE (target) != tmode
12673 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12674 target = gen_reg_rtx (tmode);
12675 op[nopnds++] = target;
12678 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12680 const struct insn_operand_data *insn_op;
12682 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12683 return NULL_RTX;
12685 insn_op = &insn_data[icode].operand[nopnds];
12687 op[nopnds] = expand_normal (arg);
12689 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12691 if (!strcmp (insn_op->constraint, "n"))
12693 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12694 if (!CONST_INT_P (op[nopnds]))
12695 error ("argument %d must be an unsigned literal", arg_num);
12696 else
12697 error ("argument %d is an unsigned literal that is "
12698 "out of range", arg_num);
12699 return const0_rtx;
12701 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12704 nopnds++;
12707 /* Handle the builtins for extended mnemonics. These accept
12708 no arguments, but map to builtins that take arguments. */
12709 switch (fcode)
12711 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12712 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12713 op[nopnds++] = GEN_INT (1);
12714 #ifdef ENABLE_CHECKING
12715 attr |= RS6000_BTC_UNARY;
12716 #endif
12717 break;
12718 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12719 op[nopnds++] = GEN_INT (0);
12720 #ifdef ENABLE_CHECKING
12721 attr |= RS6000_BTC_UNARY;
12722 #endif
12723 break;
12724 default:
12725 break;
12728 /* If this builtin accesses SPRs, then pass in the appropriate
12729 SPR number and SPR regno as the last two operands. */
12730 if (attr & RS6000_BTC_SPR)
12732 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12733 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12736 #ifdef ENABLE_CHECKING
12737 int expected_nopnds = 0;
12738 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12739 expected_nopnds = 1;
12740 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12741 expected_nopnds = 2;
12742 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12743 expected_nopnds = 3;
12744 if (!(attr & RS6000_BTC_VOID))
12745 expected_nopnds += 1;
12746 if (attr & RS6000_BTC_SPR)
12747 expected_nopnds += 2;
12749 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12750 #endif
12752 switch (nopnds)
12754 case 1:
12755 pat = GEN_FCN (icode) (op[0]);
12756 break;
12757 case 2:
12758 pat = GEN_FCN (icode) (op[0], op[1]);
12759 break;
12760 case 3:
12761 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12762 break;
12763 case 4:
12764 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12765 break;
12766 default:
12767 gcc_unreachable ();
12769 if (!pat)
12770 return NULL_RTX;
12771 emit_insn (pat);
12773 *expandedp = true;
12774 if (nonvoid)
12775 return target;
12776 return const0_rtx;
12779 return NULL_RTX;
12782 static rtx
12783 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12785 rtx pat;
12786 tree arg0 = CALL_EXPR_ARG (exp, 0);
12787 tree arg1 = CALL_EXPR_ARG (exp, 1);
12788 tree arg2 = CALL_EXPR_ARG (exp, 2);
12789 rtx op0 = expand_normal (arg0);
12790 rtx op1 = expand_normal (arg1);
12791 rtx op2 = expand_normal (arg2);
12792 machine_mode tmode = insn_data[icode].operand[0].mode;
12793 machine_mode mode0 = insn_data[icode].operand[1].mode;
12794 machine_mode mode1 = insn_data[icode].operand[2].mode;
12795 machine_mode mode2 = insn_data[icode].operand[3].mode;
12797 if (icode == CODE_FOR_nothing)
12798 /* Builtin not supported on this processor. */
12799 return 0;
12801 /* If we got invalid arguments bail out before generating bad rtl. */
12802 if (arg0 == error_mark_node
12803 || arg1 == error_mark_node
12804 || arg2 == error_mark_node)
12805 return const0_rtx;
12807 /* Check and prepare argument depending on the instruction code.
12809 Note that a switch statement instead of the sequence of tests
12810 would be incorrect as many of the CODE_FOR values could be
12811 CODE_FOR_nothing and that would yield multiple alternatives
12812 with identical values. We'd never reach here at runtime in
12813 this case. */
12814 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12815 || icode == CODE_FOR_altivec_vsldoi_v4si
12816 || icode == CODE_FOR_altivec_vsldoi_v8hi
12817 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12819 /* Only allow 4-bit unsigned literals. */
12820 STRIP_NOPS (arg2);
12821 if (TREE_CODE (arg2) != INTEGER_CST
12822 || TREE_INT_CST_LOW (arg2) & ~0xf)
12824 error ("argument 3 must be a 4-bit unsigned literal");
12825 return const0_rtx;
12828 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12829 || icode == CODE_FOR_vsx_xxpermdi_v2di
12830 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12831 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12832 || icode == CODE_FOR_vsx_xxsldwi_v4si
12833 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12834 || icode == CODE_FOR_vsx_xxsldwi_v2di
12835 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12837 /* Only allow 2-bit unsigned literals. */
12838 STRIP_NOPS (arg2);
12839 if (TREE_CODE (arg2) != INTEGER_CST
12840 || TREE_INT_CST_LOW (arg2) & ~0x3)
12842 error ("argument 3 must be a 2-bit unsigned literal");
12843 return const0_rtx;
12846 else if (icode == CODE_FOR_vsx_set_v2df
12847 || icode == CODE_FOR_vsx_set_v2di
12848 || icode == CODE_FOR_bcdadd
12849 || icode == CODE_FOR_bcdadd_lt
12850 || icode == CODE_FOR_bcdadd_eq
12851 || icode == CODE_FOR_bcdadd_gt
12852 || icode == CODE_FOR_bcdsub
12853 || icode == CODE_FOR_bcdsub_lt
12854 || icode == CODE_FOR_bcdsub_eq
12855 || icode == CODE_FOR_bcdsub_gt)
12857 /* Only allow 1-bit unsigned literals. */
12858 STRIP_NOPS (arg2);
12859 if (TREE_CODE (arg2) != INTEGER_CST
12860 || TREE_INT_CST_LOW (arg2) & ~0x1)
12862 error ("argument 3 must be a 1-bit unsigned literal");
12863 return const0_rtx;
12866 else if (icode == CODE_FOR_dfp_ddedpd_dd
12867 || icode == CODE_FOR_dfp_ddedpd_td)
12869 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12870 STRIP_NOPS (arg0);
12871 if (TREE_CODE (arg0) != INTEGER_CST
12872 || TREE_INT_CST_LOW (arg2) & ~0x3)
12874 error ("argument 1 must be 0 or 2");
12875 return const0_rtx;
12878 else if (icode == CODE_FOR_dfp_denbcd_dd
12879 || icode == CODE_FOR_dfp_denbcd_td)
12881 /* Only allow 1-bit unsigned literals. */
12882 STRIP_NOPS (arg0);
12883 if (TREE_CODE (arg0) != INTEGER_CST
12884 || TREE_INT_CST_LOW (arg0) & ~0x1)
12886 error ("argument 1 must be a 1-bit unsigned literal");
12887 return const0_rtx;
12890 else if (icode == CODE_FOR_dfp_dscli_dd
12891 || icode == CODE_FOR_dfp_dscli_td
12892 || icode == CODE_FOR_dfp_dscri_dd
12893 || icode == CODE_FOR_dfp_dscri_td)
12895 /* Only allow 6-bit unsigned literals. */
12896 STRIP_NOPS (arg1);
12897 if (TREE_CODE (arg1) != INTEGER_CST
12898 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12900 error ("argument 2 must be a 6-bit unsigned literal");
12901 return const0_rtx;
12904 else if (icode == CODE_FOR_crypto_vshasigmaw
12905 || icode == CODE_FOR_crypto_vshasigmad)
12907 /* Check whether the 2nd and 3rd arguments are integer constants and in
12908 range and prepare arguments. */
12909 STRIP_NOPS (arg1);
12910 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12912 error ("argument 2 must be 0 or 1");
12913 return const0_rtx;
12916 STRIP_NOPS (arg2);
12917 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12919 error ("argument 3 must be in the range 0..15");
12920 return const0_rtx;
12924 if (target == 0
12925 || GET_MODE (target) != tmode
12926 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12927 target = gen_reg_rtx (tmode);
12929 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12930 op0 = copy_to_mode_reg (mode0, op0);
12931 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12932 op1 = copy_to_mode_reg (mode1, op1);
12933 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12934 op2 = copy_to_mode_reg (mode2, op2);
12936 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12937 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12938 else
12939 pat = GEN_FCN (icode) (target, op0, op1, op2);
12940 if (! pat)
12941 return 0;
12942 emit_insn (pat);
12944 return target;
12947 /* Expand the lvx builtins. */
12948 static rtx
12949 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12951 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12952 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12953 tree arg0;
12954 machine_mode tmode, mode0;
12955 rtx pat, op0;
12956 enum insn_code icode;
12958 switch (fcode)
12960 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12961 icode = CODE_FOR_vector_altivec_load_v16qi;
12962 break;
12963 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12964 icode = CODE_FOR_vector_altivec_load_v8hi;
12965 break;
12966 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12967 icode = CODE_FOR_vector_altivec_load_v4si;
12968 break;
12969 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12970 icode = CODE_FOR_vector_altivec_load_v4sf;
12971 break;
12972 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12973 icode = CODE_FOR_vector_altivec_load_v2df;
12974 break;
12975 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12976 icode = CODE_FOR_vector_altivec_load_v2di;
12977 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12978 icode = CODE_FOR_vector_altivec_load_v1ti;
12979 break;
12980 default:
12981 *expandedp = false;
12982 return NULL_RTX;
12985 *expandedp = true;
12987 arg0 = CALL_EXPR_ARG (exp, 0);
12988 op0 = expand_normal (arg0);
12989 tmode = insn_data[icode].operand[0].mode;
12990 mode0 = insn_data[icode].operand[1].mode;
12992 if (target == 0
12993 || GET_MODE (target) != tmode
12994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12995 target = gen_reg_rtx (tmode);
12997 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12998 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13000 pat = GEN_FCN (icode) (target, op0);
13001 if (! pat)
13002 return 0;
13003 emit_insn (pat);
13004 return target;
13007 /* Expand the stvx builtins. */
13008 static rtx
13009 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13010 bool *expandedp)
13012 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13013 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13014 tree arg0, arg1;
13015 machine_mode mode0, mode1;
13016 rtx pat, op0, op1;
13017 enum insn_code icode;
13019 switch (fcode)
13021 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13022 icode = CODE_FOR_vector_altivec_store_v16qi;
13023 break;
13024 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13025 icode = CODE_FOR_vector_altivec_store_v8hi;
13026 break;
13027 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13028 icode = CODE_FOR_vector_altivec_store_v4si;
13029 break;
13030 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13031 icode = CODE_FOR_vector_altivec_store_v4sf;
13032 break;
13033 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13034 icode = CODE_FOR_vector_altivec_store_v2df;
13035 break;
13036 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13037 icode = CODE_FOR_vector_altivec_store_v2di;
13038 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13039 icode = CODE_FOR_vector_altivec_store_v1ti;
13040 break;
13041 default:
13042 *expandedp = false;
13043 return NULL_RTX;
13046 arg0 = CALL_EXPR_ARG (exp, 0);
13047 arg1 = CALL_EXPR_ARG (exp, 1);
13048 op0 = expand_normal (arg0);
13049 op1 = expand_normal (arg1);
13050 mode0 = insn_data[icode].operand[0].mode;
13051 mode1 = insn_data[icode].operand[1].mode;
13053 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13054 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13055 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13056 op1 = copy_to_mode_reg (mode1, op1);
13058 pat = GEN_FCN (icode) (op0, op1);
13059 if (pat)
13060 emit_insn (pat);
13062 *expandedp = true;
13063 return NULL_RTX;
13066 /* Expand the dst builtins. */
13067 static rtx
13068 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13069 bool *expandedp)
13071 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13072 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13073 tree arg0, arg1, arg2;
13074 machine_mode mode0, mode1;
13075 rtx pat, op0, op1, op2;
13076 const struct builtin_description *d;
13077 size_t i;
13079 *expandedp = false;
13081 /* Handle DST variants. */
13082 d = bdesc_dst;
13083 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13084 if (d->code == fcode)
13086 arg0 = CALL_EXPR_ARG (exp, 0);
13087 arg1 = CALL_EXPR_ARG (exp, 1);
13088 arg2 = CALL_EXPR_ARG (exp, 2);
13089 op0 = expand_normal (arg0);
13090 op1 = expand_normal (arg1);
13091 op2 = expand_normal (arg2);
13092 mode0 = insn_data[d->icode].operand[0].mode;
13093 mode1 = insn_data[d->icode].operand[1].mode;
13095 /* Invalid arguments, bail out before generating bad rtl. */
13096 if (arg0 == error_mark_node
13097 || arg1 == error_mark_node
13098 || arg2 == error_mark_node)
13099 return const0_rtx;
13101 *expandedp = true;
13102 STRIP_NOPS (arg2);
13103 if (TREE_CODE (arg2) != INTEGER_CST
13104 || TREE_INT_CST_LOW (arg2) & ~0x3)
13106 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13107 return const0_rtx;
13110 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13111 op0 = copy_to_mode_reg (Pmode, op0);
13112 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13113 op1 = copy_to_mode_reg (mode1, op1);
13115 pat = GEN_FCN (d->icode) (op0, op1, op2);
13116 if (pat != 0)
13117 emit_insn (pat);
13119 return NULL_RTX;
13122 return NULL_RTX;
13125 /* Expand vec_init builtin. */
13126 static rtx
13127 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13129 machine_mode tmode = TYPE_MODE (type);
13130 machine_mode inner_mode = GET_MODE_INNER (tmode);
13131 int i, n_elt = GET_MODE_NUNITS (tmode);
13133 gcc_assert (VECTOR_MODE_P (tmode));
13134 gcc_assert (n_elt == call_expr_nargs (exp));
13136 if (!target || !register_operand (target, tmode))
13137 target = gen_reg_rtx (tmode);
13139 /* If we have a vector compromised of a single element, such as V1TImode, do
13140 the initialization directly. */
13141 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13143 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13144 emit_move_insn (target, gen_lowpart (tmode, x));
13146 else
13148 rtvec v = rtvec_alloc (n_elt);
13150 for (i = 0; i < n_elt; ++i)
13152 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13153 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13156 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13159 return target;
13162 /* Return the integer constant in ARG. Constrain it to be in the range
13163 of the subparts of VEC_TYPE; issue an error if not. */
13165 static int
13166 get_element_number (tree vec_type, tree arg)
13168 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13170 if (!tree_fits_uhwi_p (arg)
13171 || (elt = tree_to_uhwi (arg), elt > max))
13173 error ("selector must be an integer constant in the range 0..%wi", max);
13174 return 0;
13177 return elt;
13180 /* Expand vec_set builtin. */
13181 static rtx
13182 altivec_expand_vec_set_builtin (tree exp)
13184 machine_mode tmode, mode1;
13185 tree arg0, arg1, arg2;
13186 int elt;
13187 rtx op0, op1;
13189 arg0 = CALL_EXPR_ARG (exp, 0);
13190 arg1 = CALL_EXPR_ARG (exp, 1);
13191 arg2 = CALL_EXPR_ARG (exp, 2);
13193 tmode = TYPE_MODE (TREE_TYPE (arg0));
13194 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13195 gcc_assert (VECTOR_MODE_P (tmode));
13197 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13198 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13199 elt = get_element_number (TREE_TYPE (arg0), arg2);
13201 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13202 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13204 op0 = force_reg (tmode, op0);
13205 op1 = force_reg (mode1, op1);
13207 rs6000_expand_vector_set (op0, op1, elt);
13209 return op0;
13212 /* Expand vec_ext builtin. */
13213 static rtx
13214 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13216 machine_mode tmode, mode0;
13217 tree arg0, arg1;
13218 int elt;
13219 rtx op0;
13221 arg0 = CALL_EXPR_ARG (exp, 0);
13222 arg1 = CALL_EXPR_ARG (exp, 1);
13224 op0 = expand_normal (arg0);
13225 elt = get_element_number (TREE_TYPE (arg0), arg1);
13227 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13228 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13229 gcc_assert (VECTOR_MODE_P (mode0));
13231 op0 = force_reg (mode0, op0);
13233 if (optimize || !target || !register_operand (target, tmode))
13234 target = gen_reg_rtx (tmode);
13236 rs6000_expand_vector_extract (target, op0, elt);
13238 return target;
13241 /* Expand the builtin in EXP and store the result in TARGET. Store
13242 true in *EXPANDEDP if we found a builtin to expand. */
13243 static rtx
13244 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13246 const struct builtin_description *d;
13247 size_t i;
13248 enum insn_code icode;
13249 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13250 tree arg0;
13251 rtx op0, pat;
13252 machine_mode tmode, mode0;
13253 enum rs6000_builtins fcode
13254 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13256 if (rs6000_overloaded_builtin_p (fcode))
13258 *expandedp = true;
13259 error ("unresolved overload for Altivec builtin %qF", fndecl);
13261 /* Given it is invalid, just generate a normal call. */
13262 return expand_call (exp, target, false);
13265 target = altivec_expand_ld_builtin (exp, target, expandedp);
13266 if (*expandedp)
13267 return target;
13269 target = altivec_expand_st_builtin (exp, target, expandedp);
13270 if (*expandedp)
13271 return target;
13273 target = altivec_expand_dst_builtin (exp, target, expandedp);
13274 if (*expandedp)
13275 return target;
13277 *expandedp = true;
13279 switch (fcode)
13281 case ALTIVEC_BUILTIN_STVX_V2DF:
13282 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13283 case ALTIVEC_BUILTIN_STVX_V2DI:
13284 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13285 case ALTIVEC_BUILTIN_STVX_V4SF:
13286 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13287 case ALTIVEC_BUILTIN_STVX:
13288 case ALTIVEC_BUILTIN_STVX_V4SI:
13289 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13290 case ALTIVEC_BUILTIN_STVX_V8HI:
13291 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13292 case ALTIVEC_BUILTIN_STVX_V16QI:
13293 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13294 case ALTIVEC_BUILTIN_STVEBX:
13295 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13296 case ALTIVEC_BUILTIN_STVEHX:
13297 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13298 case ALTIVEC_BUILTIN_STVEWX:
13299 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13300 case ALTIVEC_BUILTIN_STVXL_V2DF:
13301 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13302 case ALTIVEC_BUILTIN_STVXL_V2DI:
13303 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13304 case ALTIVEC_BUILTIN_STVXL_V4SF:
13305 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13306 case ALTIVEC_BUILTIN_STVXL:
13307 case ALTIVEC_BUILTIN_STVXL_V4SI:
13308 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13309 case ALTIVEC_BUILTIN_STVXL_V8HI:
13310 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13311 case ALTIVEC_BUILTIN_STVXL_V16QI:
13312 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13314 case ALTIVEC_BUILTIN_STVLX:
13315 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13316 case ALTIVEC_BUILTIN_STVLXL:
13317 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13318 case ALTIVEC_BUILTIN_STVRX:
13319 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13320 case ALTIVEC_BUILTIN_STVRXL:
13321 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13323 case VSX_BUILTIN_STXVD2X_V1TI:
13324 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13325 case VSX_BUILTIN_STXVD2X_V2DF:
13326 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13327 case VSX_BUILTIN_STXVD2X_V2DI:
13328 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13329 case VSX_BUILTIN_STXVW4X_V4SF:
13330 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13331 case VSX_BUILTIN_STXVW4X_V4SI:
13332 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13333 case VSX_BUILTIN_STXVW4X_V8HI:
13334 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13335 case VSX_BUILTIN_STXVW4X_V16QI:
13336 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13338 case ALTIVEC_BUILTIN_MFVSCR:
13339 icode = CODE_FOR_altivec_mfvscr;
13340 tmode = insn_data[icode].operand[0].mode;
13342 if (target == 0
13343 || GET_MODE (target) != tmode
13344 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13345 target = gen_reg_rtx (tmode);
13347 pat = GEN_FCN (icode) (target);
13348 if (! pat)
13349 return 0;
13350 emit_insn (pat);
13351 return target;
13353 case ALTIVEC_BUILTIN_MTVSCR:
13354 icode = CODE_FOR_altivec_mtvscr;
13355 arg0 = CALL_EXPR_ARG (exp, 0);
13356 op0 = expand_normal (arg0);
13357 mode0 = insn_data[icode].operand[0].mode;
13359 /* If we got invalid arguments bail out before generating bad rtl. */
13360 if (arg0 == error_mark_node)
13361 return const0_rtx;
13363 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13364 op0 = copy_to_mode_reg (mode0, op0);
13366 pat = GEN_FCN (icode) (op0);
13367 if (pat)
13368 emit_insn (pat);
13369 return NULL_RTX;
13371 case ALTIVEC_BUILTIN_DSSALL:
13372 emit_insn (gen_altivec_dssall ());
13373 return NULL_RTX;
13375 case ALTIVEC_BUILTIN_DSS:
13376 icode = CODE_FOR_altivec_dss;
13377 arg0 = CALL_EXPR_ARG (exp, 0);
13378 STRIP_NOPS (arg0);
13379 op0 = expand_normal (arg0);
13380 mode0 = insn_data[icode].operand[0].mode;
13382 /* If we got invalid arguments bail out before generating bad rtl. */
13383 if (arg0 == error_mark_node)
13384 return const0_rtx;
13386 if (TREE_CODE (arg0) != INTEGER_CST
13387 || TREE_INT_CST_LOW (arg0) & ~0x3)
13389 error ("argument to dss must be a 2-bit unsigned literal");
13390 return const0_rtx;
13393 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13394 op0 = copy_to_mode_reg (mode0, op0);
13396 emit_insn (gen_altivec_dss (op0));
13397 return NULL_RTX;
13399 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13400 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13401 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13402 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13403 case VSX_BUILTIN_VEC_INIT_V2DF:
13404 case VSX_BUILTIN_VEC_INIT_V2DI:
13405 case VSX_BUILTIN_VEC_INIT_V1TI:
13406 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13408 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13409 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13410 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13411 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13412 case VSX_BUILTIN_VEC_SET_V2DF:
13413 case VSX_BUILTIN_VEC_SET_V2DI:
13414 case VSX_BUILTIN_VEC_SET_V1TI:
13415 return altivec_expand_vec_set_builtin (exp);
13417 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13418 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13419 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13420 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13421 case VSX_BUILTIN_VEC_EXT_V2DF:
13422 case VSX_BUILTIN_VEC_EXT_V2DI:
13423 case VSX_BUILTIN_VEC_EXT_V1TI:
13424 return altivec_expand_vec_ext_builtin (exp, target);
13426 default:
13427 break;
13428 /* Fall through. */
13431 /* Expand abs* operations. */
13432 d = bdesc_abs;
13433 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13434 if (d->code == fcode)
13435 return altivec_expand_abs_builtin (d->icode, exp, target);
13437 /* Expand the AltiVec predicates. */
13438 d = bdesc_altivec_preds;
13439 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13440 if (d->code == fcode)
13441 return altivec_expand_predicate_builtin (d->icode, exp, target);
13443 /* LV* are funky. We initialized them differently. */
13444 switch (fcode)
13446 case ALTIVEC_BUILTIN_LVSL:
13447 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13448 exp, target, false);
13449 case ALTIVEC_BUILTIN_LVSR:
13450 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13451 exp, target, false);
13452 case ALTIVEC_BUILTIN_LVEBX:
13453 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13454 exp, target, false);
13455 case ALTIVEC_BUILTIN_LVEHX:
13456 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13457 exp, target, false);
13458 case ALTIVEC_BUILTIN_LVEWX:
13459 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13460 exp, target, false);
13461 case ALTIVEC_BUILTIN_LVXL_V2DF:
13462 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13463 exp, target, false);
13464 case ALTIVEC_BUILTIN_LVXL_V2DI:
13465 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13466 exp, target, false);
13467 case ALTIVEC_BUILTIN_LVXL_V4SF:
13468 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13469 exp, target, false);
13470 case ALTIVEC_BUILTIN_LVXL:
13471 case ALTIVEC_BUILTIN_LVXL_V4SI:
13472 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13473 exp, target, false);
13474 case ALTIVEC_BUILTIN_LVXL_V8HI:
13475 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13476 exp, target, false);
13477 case ALTIVEC_BUILTIN_LVXL_V16QI:
13478 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13479 exp, target, false);
13480 case ALTIVEC_BUILTIN_LVX_V2DF:
13481 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13482 exp, target, false);
13483 case ALTIVEC_BUILTIN_LVX_V2DI:
13484 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13485 exp, target, false);
13486 case ALTIVEC_BUILTIN_LVX_V4SF:
13487 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13488 exp, target, false);
13489 case ALTIVEC_BUILTIN_LVX:
13490 case ALTIVEC_BUILTIN_LVX_V4SI:
13491 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13492 exp, target, false);
13493 case ALTIVEC_BUILTIN_LVX_V8HI:
13494 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13495 exp, target, false);
13496 case ALTIVEC_BUILTIN_LVX_V16QI:
13497 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13498 exp, target, false);
13499 case ALTIVEC_BUILTIN_LVLX:
13500 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13501 exp, target, true);
13502 case ALTIVEC_BUILTIN_LVLXL:
13503 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13504 exp, target, true);
13505 case ALTIVEC_BUILTIN_LVRX:
13506 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13507 exp, target, true);
13508 case ALTIVEC_BUILTIN_LVRXL:
13509 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13510 exp, target, true);
13511 case VSX_BUILTIN_LXVD2X_V1TI:
13512 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13513 exp, target, false);
13514 case VSX_BUILTIN_LXVD2X_V2DF:
13515 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13516 exp, target, false);
13517 case VSX_BUILTIN_LXVD2X_V2DI:
13518 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13519 exp, target, false);
13520 case VSX_BUILTIN_LXVW4X_V4SF:
13521 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13522 exp, target, false);
13523 case VSX_BUILTIN_LXVW4X_V4SI:
13524 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13525 exp, target, false);
13526 case VSX_BUILTIN_LXVW4X_V8HI:
13527 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13528 exp, target, false);
13529 case VSX_BUILTIN_LXVW4X_V16QI:
13530 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13531 exp, target, false);
13532 break;
13533 default:
13534 break;
13535 /* Fall through. */
13538 *expandedp = false;
13539 return NULL_RTX;
13542 /* Expand the builtin in EXP and store the result in TARGET. Store
13543 true in *EXPANDEDP if we found a builtin to expand. */
13544 static rtx
13545 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13547 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13548 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13549 const struct builtin_description *d;
13550 size_t i;
13552 *expandedp = true;
13554 switch (fcode)
13556 case PAIRED_BUILTIN_STX:
13557 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13558 case PAIRED_BUILTIN_LX:
13559 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13560 default:
13561 break;
13562 /* Fall through. */
13565 /* Expand the paired predicates. */
13566 d = bdesc_paired_preds;
13567 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13568 if (d->code == fcode)
13569 return paired_expand_predicate_builtin (d->icode, exp, target);
13571 *expandedp = false;
13572 return NULL_RTX;
13575 /* Binops that need to be initialized manually, but can be expanded
13576 automagically by rs6000_expand_binop_builtin. */
13577 static const struct builtin_description bdesc_2arg_spe[] =
13579 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13580 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13581 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13582 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13583 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13584 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13585 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13586 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13587 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13588 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13589 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13590 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13591 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13592 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13593 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13594 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13595 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13596 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13597 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13598 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13599 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13600 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13603 /* Expand the builtin in EXP and store the result in TARGET. Store
13604 true in *EXPANDEDP if we found a builtin to expand.
13606 This expands the SPE builtins that are not simple unary and binary
13607 operations. */
13608 static rtx
13609 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13611 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13612 tree arg1, arg0;
13613 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13614 enum insn_code icode;
13615 machine_mode tmode, mode0;
13616 rtx pat, op0;
13617 const struct builtin_description *d;
13618 size_t i;
13620 *expandedp = true;
13622 /* Syntax check for a 5-bit unsigned immediate. */
13623 switch (fcode)
13625 case SPE_BUILTIN_EVSTDD:
13626 case SPE_BUILTIN_EVSTDH:
13627 case SPE_BUILTIN_EVSTDW:
13628 case SPE_BUILTIN_EVSTWHE:
13629 case SPE_BUILTIN_EVSTWHO:
13630 case SPE_BUILTIN_EVSTWWE:
13631 case SPE_BUILTIN_EVSTWWO:
13632 arg1 = CALL_EXPR_ARG (exp, 2);
13633 if (TREE_CODE (arg1) != INTEGER_CST
13634 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13636 error ("argument 2 must be a 5-bit unsigned literal");
13637 return const0_rtx;
13639 break;
13640 default:
13641 break;
13644 /* The evsplat*i instructions are not quite generic. */
13645 switch (fcode)
13647 case SPE_BUILTIN_EVSPLATFI:
13648 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13649 exp, target);
13650 case SPE_BUILTIN_EVSPLATI:
13651 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13652 exp, target);
13653 default:
13654 break;
13657 d = bdesc_2arg_spe;
13658 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13659 if (d->code == fcode)
13660 return rs6000_expand_binop_builtin (d->icode, exp, target);
13662 d = bdesc_spe_predicates;
13663 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13664 if (d->code == fcode)
13665 return spe_expand_predicate_builtin (d->icode, exp, target);
13667 d = bdesc_spe_evsel;
13668 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13669 if (d->code == fcode)
13670 return spe_expand_evsel_builtin (d->icode, exp, target);
13672 switch (fcode)
13674 case SPE_BUILTIN_EVSTDDX:
13675 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13676 case SPE_BUILTIN_EVSTDHX:
13677 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13678 case SPE_BUILTIN_EVSTDWX:
13679 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13680 case SPE_BUILTIN_EVSTWHEX:
13681 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13682 case SPE_BUILTIN_EVSTWHOX:
13683 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13684 case SPE_BUILTIN_EVSTWWEX:
13685 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13686 case SPE_BUILTIN_EVSTWWOX:
13687 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13688 case SPE_BUILTIN_EVSTDD:
13689 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13690 case SPE_BUILTIN_EVSTDH:
13691 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13692 case SPE_BUILTIN_EVSTDW:
13693 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13694 case SPE_BUILTIN_EVSTWHE:
13695 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13696 case SPE_BUILTIN_EVSTWHO:
13697 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13698 case SPE_BUILTIN_EVSTWWE:
13699 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13700 case SPE_BUILTIN_EVSTWWO:
13701 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13702 case SPE_BUILTIN_MFSPEFSCR:
13703 icode = CODE_FOR_spe_mfspefscr;
13704 tmode = insn_data[icode].operand[0].mode;
13706 if (target == 0
13707 || GET_MODE (target) != tmode
13708 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13709 target = gen_reg_rtx (tmode);
13711 pat = GEN_FCN (icode) (target);
13712 if (! pat)
13713 return 0;
13714 emit_insn (pat);
13715 return target;
13716 case SPE_BUILTIN_MTSPEFSCR:
13717 icode = CODE_FOR_spe_mtspefscr;
13718 arg0 = CALL_EXPR_ARG (exp, 0);
13719 op0 = expand_normal (arg0);
13720 mode0 = insn_data[icode].operand[0].mode;
13722 if (arg0 == error_mark_node)
13723 return const0_rtx;
13725 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13726 op0 = copy_to_mode_reg (mode0, op0);
13728 pat = GEN_FCN (icode) (op0);
13729 if (pat)
13730 emit_insn (pat);
13731 return NULL_RTX;
13732 default:
13733 break;
13736 *expandedp = false;
13737 return NULL_RTX;
13740 static rtx
13741 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13743 rtx pat, scratch, tmp;
13744 tree form = CALL_EXPR_ARG (exp, 0);
13745 tree arg0 = CALL_EXPR_ARG (exp, 1);
13746 tree arg1 = CALL_EXPR_ARG (exp, 2);
13747 rtx op0 = expand_normal (arg0);
13748 rtx op1 = expand_normal (arg1);
13749 machine_mode mode0 = insn_data[icode].operand[1].mode;
13750 machine_mode mode1 = insn_data[icode].operand[2].mode;
13751 int form_int;
13752 enum rtx_code code;
13754 if (TREE_CODE (form) != INTEGER_CST)
13756 error ("argument 1 of __builtin_paired_predicate must be a constant");
13757 return const0_rtx;
13759 else
13760 form_int = TREE_INT_CST_LOW (form);
13762 gcc_assert (mode0 == mode1);
13764 if (arg0 == error_mark_node || arg1 == error_mark_node)
13765 return const0_rtx;
13767 if (target == 0
13768 || GET_MODE (target) != SImode
13769 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13770 target = gen_reg_rtx (SImode);
13771 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13772 op0 = copy_to_mode_reg (mode0, op0);
13773 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13774 op1 = copy_to_mode_reg (mode1, op1);
13776 scratch = gen_reg_rtx (CCFPmode);
13778 pat = GEN_FCN (icode) (scratch, op0, op1);
13779 if (!pat)
13780 return const0_rtx;
13782 emit_insn (pat);
13784 switch (form_int)
13786 /* LT bit. */
13787 case 0:
13788 code = LT;
13789 break;
13790 /* GT bit. */
13791 case 1:
13792 code = GT;
13793 break;
13794 /* EQ bit. */
13795 case 2:
13796 code = EQ;
13797 break;
13798 /* UN bit. */
13799 case 3:
13800 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13801 return target;
13802 default:
13803 error ("argument 1 of __builtin_paired_predicate is out of range");
13804 return const0_rtx;
13807 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13808 emit_move_insn (target, tmp);
13809 return target;
13812 static rtx
13813 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13815 rtx pat, scratch, tmp;
13816 tree form = CALL_EXPR_ARG (exp, 0);
13817 tree arg0 = CALL_EXPR_ARG (exp, 1);
13818 tree arg1 = CALL_EXPR_ARG (exp, 2);
13819 rtx op0 = expand_normal (arg0);
13820 rtx op1 = expand_normal (arg1);
13821 machine_mode mode0 = insn_data[icode].operand[1].mode;
13822 machine_mode mode1 = insn_data[icode].operand[2].mode;
13823 int form_int;
13824 enum rtx_code code;
13826 if (TREE_CODE (form) != INTEGER_CST)
13828 error ("argument 1 of __builtin_spe_predicate must be a constant");
13829 return const0_rtx;
13831 else
13832 form_int = TREE_INT_CST_LOW (form);
13834 gcc_assert (mode0 == mode1);
13836 if (arg0 == error_mark_node || arg1 == error_mark_node)
13837 return const0_rtx;
13839 if (target == 0
13840 || GET_MODE (target) != SImode
13841 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13842 target = gen_reg_rtx (SImode);
13844 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13845 op0 = copy_to_mode_reg (mode0, op0);
13846 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13847 op1 = copy_to_mode_reg (mode1, op1);
13849 scratch = gen_reg_rtx (CCmode);
13851 pat = GEN_FCN (icode) (scratch, op0, op1);
13852 if (! pat)
13853 return const0_rtx;
13854 emit_insn (pat);
13856 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13857 _lower_. We use one compare, but look in different bits of the
13858 CR for each variant.
13860 There are 2 elements in each SPE simd type (upper/lower). The CR
13861 bits are set as follows:
13863 BIT0 | BIT 1 | BIT 2 | BIT 3
13864 U | L | (U | L) | (U & L)
13866 So, for an "all" relationship, BIT 3 would be set.
13867 For an "any" relationship, BIT 2 would be set. Etc.
13869 Following traditional nomenclature, these bits map to:
13871 BIT0 | BIT 1 | BIT 2 | BIT 3
13872 LT | GT | EQ | OV
13874 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13877 switch (form_int)
13879 /* All variant. OV bit. */
13880 case 0:
13881 /* We need to get to the OV bit, which is the ORDERED bit. We
13882 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13883 that's ugly and will make validate_condition_mode die.
13884 So let's just use another pattern. */
13885 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13886 return target;
13887 /* Any variant. EQ bit. */
13888 case 1:
13889 code = EQ;
13890 break;
13891 /* Upper variant. LT bit. */
13892 case 2:
13893 code = LT;
13894 break;
13895 /* Lower variant. GT bit. */
13896 case 3:
13897 code = GT;
13898 break;
13899 default:
13900 error ("argument 1 of __builtin_spe_predicate is out of range");
13901 return const0_rtx;
13904 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13905 emit_move_insn (target, tmp);
13907 return target;
13910 /* The evsel builtins look like this:
13912 e = __builtin_spe_evsel_OP (a, b, c, d);
13914 and work like this:
13916 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13917 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13920 static rtx
13921 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13923 rtx pat, scratch;
13924 tree arg0 = CALL_EXPR_ARG (exp, 0);
13925 tree arg1 = CALL_EXPR_ARG (exp, 1);
13926 tree arg2 = CALL_EXPR_ARG (exp, 2);
13927 tree arg3 = CALL_EXPR_ARG (exp, 3);
13928 rtx op0 = expand_normal (arg0);
13929 rtx op1 = expand_normal (arg1);
13930 rtx op2 = expand_normal (arg2);
13931 rtx op3 = expand_normal (arg3);
13932 machine_mode mode0 = insn_data[icode].operand[1].mode;
13933 machine_mode mode1 = insn_data[icode].operand[2].mode;
13935 gcc_assert (mode0 == mode1);
13937 if (arg0 == error_mark_node || arg1 == error_mark_node
13938 || arg2 == error_mark_node || arg3 == error_mark_node)
13939 return const0_rtx;
13941 if (target == 0
13942 || GET_MODE (target) != mode0
13943 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13944 target = gen_reg_rtx (mode0);
13946 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13947 op0 = copy_to_mode_reg (mode0, op0);
13948 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13949 op1 = copy_to_mode_reg (mode0, op1);
13950 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13951 op2 = copy_to_mode_reg (mode0, op2);
13952 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13953 op3 = copy_to_mode_reg (mode0, op3);
13955 /* Generate the compare. */
13956 scratch = gen_reg_rtx (CCmode);
13957 pat = GEN_FCN (icode) (scratch, op0, op1);
13958 if (! pat)
13959 return const0_rtx;
13960 emit_insn (pat);
13962 if (mode0 == V2SImode)
13963 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13964 else
13965 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13967 return target;
13970 /* Raise an error message for a builtin function that is called without the
13971 appropriate target options being set. */
13973 static void
13974 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13976 size_t uns_fncode = (size_t)fncode;
13977 const char *name = rs6000_builtin_info[uns_fncode].name;
13978 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13980 gcc_assert (name != NULL);
13981 if ((fnmask & RS6000_BTM_CELL) != 0)
13982 error ("Builtin function %s is only valid for the cell processor", name);
13983 else if ((fnmask & RS6000_BTM_VSX) != 0)
13984 error ("Builtin function %s requires the -mvsx option", name);
13985 else if ((fnmask & RS6000_BTM_HTM) != 0)
13986 error ("Builtin function %s requires the -mhtm option", name);
13987 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13988 error ("Builtin function %s requires the -maltivec option", name);
13989 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13990 error ("Builtin function %s requires the -mpaired option", name);
13991 else if ((fnmask & RS6000_BTM_SPE) != 0)
13992 error ("Builtin function %s requires the -mspe option", name);
13993 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13994 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13995 error ("Builtin function %s requires the -mhard-dfp and"
13996 " -mpower8-vector options", name);
13997 else if ((fnmask & RS6000_BTM_DFP) != 0)
13998 error ("Builtin function %s requires the -mhard-dfp option", name);
13999 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14000 error ("Builtin function %s requires the -mpower8-vector option", name);
14001 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14002 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14003 error ("Builtin function %s requires the -mhard-float and"
14004 " -mlong-double-128 options", name);
14005 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14006 error ("Builtin function %s requires the -mhard-float option", name);
14007 else
14008 error ("Builtin function %s is not supported with the current options",
14009 name);
14012 /* Expand an expression EXP that calls a built-in function,
14013 with result going to TARGET if that's convenient
14014 (and in mode MODE if that's convenient).
14015 SUBTARGET may be used as the target for computing one of EXP's operands.
14016 IGNORE is nonzero if the value is to be ignored. */
14018 static rtx
14019 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14020 machine_mode mode ATTRIBUTE_UNUSED,
14021 int ignore ATTRIBUTE_UNUSED)
14023 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14024 enum rs6000_builtins fcode
14025 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14026 size_t uns_fcode = (size_t)fcode;
14027 const struct builtin_description *d;
14028 size_t i;
14029 rtx ret;
14030 bool success;
14031 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14032 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14034 if (TARGET_DEBUG_BUILTIN)
14036 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14037 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14038 const char *name2 = ((icode != CODE_FOR_nothing)
14039 ? get_insn_name ((int)icode)
14040 : "nothing");
14041 const char *name3;
14043 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14045 default: name3 = "unknown"; break;
14046 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14047 case RS6000_BTC_UNARY: name3 = "unary"; break;
14048 case RS6000_BTC_BINARY: name3 = "binary"; break;
14049 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14050 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14051 case RS6000_BTC_ABS: name3 = "abs"; break;
14052 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14053 case RS6000_BTC_DST: name3 = "dst"; break;
14057 fprintf (stderr,
14058 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14059 (name1) ? name1 : "---", fcode,
14060 (name2) ? name2 : "---", (int)icode,
14061 name3,
14062 func_valid_p ? "" : ", not valid");
14065 if (!func_valid_p)
14067 rs6000_invalid_builtin (fcode);
14069 /* Given it is invalid, just generate a normal call. */
14070 return expand_call (exp, target, ignore);
14073 switch (fcode)
14075 case RS6000_BUILTIN_RECIP:
14076 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14078 case RS6000_BUILTIN_RECIPF:
14079 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14081 case RS6000_BUILTIN_RSQRTF:
14082 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14084 case RS6000_BUILTIN_RSQRT:
14085 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14087 case POWER7_BUILTIN_BPERMD:
14088 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14089 ? CODE_FOR_bpermd_di
14090 : CODE_FOR_bpermd_si), exp, target);
14092 case RS6000_BUILTIN_GET_TB:
14093 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14094 target);
14096 case RS6000_BUILTIN_MFTB:
14097 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14098 ? CODE_FOR_rs6000_mftb_di
14099 : CODE_FOR_rs6000_mftb_si),
14100 target);
14102 case RS6000_BUILTIN_MFFS:
14103 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14105 case RS6000_BUILTIN_MTFSF:
14106 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14108 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14109 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14111 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14112 : (int) CODE_FOR_altivec_lvsl_direct);
14113 machine_mode tmode = insn_data[icode].operand[0].mode;
14114 machine_mode mode = insn_data[icode].operand[1].mode;
14115 tree arg;
14116 rtx op, addr, pat;
14118 gcc_assert (TARGET_ALTIVEC);
14120 arg = CALL_EXPR_ARG (exp, 0);
14121 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14122 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14123 addr = memory_address (mode, op);
14124 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14125 op = addr;
14126 else
14128 /* For the load case need to negate the address. */
14129 op = gen_reg_rtx (GET_MODE (addr));
14130 emit_insn (gen_rtx_SET (VOIDmode, op,
14131 gen_rtx_NEG (GET_MODE (addr), addr)));
14133 op = gen_rtx_MEM (mode, op);
14135 if (target == 0
14136 || GET_MODE (target) != tmode
14137 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14138 target = gen_reg_rtx (tmode);
14140 pat = GEN_FCN (icode) (target, op);
14141 if (!pat)
14142 return 0;
14143 emit_insn (pat);
14145 return target;
14148 case ALTIVEC_BUILTIN_VCFUX:
14149 case ALTIVEC_BUILTIN_VCFSX:
14150 case ALTIVEC_BUILTIN_VCTUXS:
14151 case ALTIVEC_BUILTIN_VCTSXS:
14152 /* FIXME: There's got to be a nicer way to handle this case than
14153 constructing a new CALL_EXPR. */
14154 if (call_expr_nargs (exp) == 1)
14156 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14157 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14159 break;
14161 default:
14162 break;
14165 if (TARGET_ALTIVEC)
14167 ret = altivec_expand_builtin (exp, target, &success);
14169 if (success)
14170 return ret;
14172 if (TARGET_SPE)
14174 ret = spe_expand_builtin (exp, target, &success);
14176 if (success)
14177 return ret;
14179 if (TARGET_PAIRED_FLOAT)
14181 ret = paired_expand_builtin (exp, target, &success);
14183 if (success)
14184 return ret;
14186 if (TARGET_HTM)
14188 ret = htm_expand_builtin (exp, target, &success);
14190 if (success)
14191 return ret;
14194 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14195 gcc_assert (attr == RS6000_BTC_UNARY
14196 || attr == RS6000_BTC_BINARY
14197 || attr == RS6000_BTC_TERNARY);
14199 /* Handle simple unary operations. */
14200 d = bdesc_1arg;
14201 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14202 if (d->code == fcode)
14203 return rs6000_expand_unop_builtin (d->icode, exp, target);
14205 /* Handle simple binary operations. */
14206 d = bdesc_2arg;
14207 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14208 if (d->code == fcode)
14209 return rs6000_expand_binop_builtin (d->icode, exp, target);
14211 /* Handle simple ternary operations. */
14212 d = bdesc_3arg;
14213 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14214 if (d->code == fcode)
14215 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14217 gcc_unreachable ();
14220 static void
14221 rs6000_init_builtins (void)
14223 tree tdecl;
14224 tree ftype;
14225 machine_mode mode;
14227 if (TARGET_DEBUG_BUILTIN)
14228 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14229 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14230 (TARGET_SPE) ? ", spe" : "",
14231 (TARGET_ALTIVEC) ? ", altivec" : "",
14232 (TARGET_VSX) ? ", vsx" : "");
14234 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14235 V2SF_type_node = build_vector_type (float_type_node, 2);
14236 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14237 V2DF_type_node = build_vector_type (double_type_node, 2);
14238 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14239 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14240 V4SF_type_node = build_vector_type (float_type_node, 4);
14241 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14242 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14244 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14245 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14246 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14247 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14249 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14250 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14251 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14252 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14254 /* We use V1TI mode as a special container to hold __int128_t items that
14255 must live in VSX registers. */
14256 if (intTI_type_node)
14258 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14259 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14262 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14263 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14264 'vector unsigned short'. */
14266 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14267 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14268 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14269 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14270 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14272 long_integer_type_internal_node = long_integer_type_node;
14273 long_unsigned_type_internal_node = long_unsigned_type_node;
14274 long_long_integer_type_internal_node = long_long_integer_type_node;
14275 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14276 intQI_type_internal_node = intQI_type_node;
14277 uintQI_type_internal_node = unsigned_intQI_type_node;
14278 intHI_type_internal_node = intHI_type_node;
14279 uintHI_type_internal_node = unsigned_intHI_type_node;
14280 intSI_type_internal_node = intSI_type_node;
14281 uintSI_type_internal_node = unsigned_intSI_type_node;
14282 intDI_type_internal_node = intDI_type_node;
14283 uintDI_type_internal_node = unsigned_intDI_type_node;
14284 intTI_type_internal_node = intTI_type_node;
14285 uintTI_type_internal_node = unsigned_intTI_type_node;
14286 float_type_internal_node = float_type_node;
14287 double_type_internal_node = double_type_node;
14288 long_double_type_internal_node = long_double_type_node;
14289 dfloat64_type_internal_node = dfloat64_type_node;
14290 dfloat128_type_internal_node = dfloat128_type_node;
14291 void_type_internal_node = void_type_node;
14293 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14294 tree type node. */
14295 builtin_mode_to_type[QImode][0] = integer_type_node;
14296 builtin_mode_to_type[HImode][0] = integer_type_node;
14297 builtin_mode_to_type[SImode][0] = intSI_type_node;
14298 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14299 builtin_mode_to_type[DImode][0] = intDI_type_node;
14300 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14301 builtin_mode_to_type[TImode][0] = intTI_type_node;
14302 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14303 builtin_mode_to_type[SFmode][0] = float_type_node;
14304 builtin_mode_to_type[DFmode][0] = double_type_node;
14305 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14306 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14307 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14308 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14309 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14310 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14311 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14312 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14313 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14314 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14315 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14316 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14317 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14318 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14319 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14320 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14321 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14322 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14324 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14325 TYPE_NAME (bool_char_type_node) = tdecl;
14327 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14328 TYPE_NAME (bool_short_type_node) = tdecl;
14330 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14331 TYPE_NAME (bool_int_type_node) = tdecl;
14333 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14334 TYPE_NAME (pixel_type_node) = tdecl;
14336 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14337 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14338 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14339 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14340 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14342 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14343 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14345 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14346 TYPE_NAME (V16QI_type_node) = tdecl;
14348 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14349 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14351 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14352 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14354 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14355 TYPE_NAME (V8HI_type_node) = tdecl;
14357 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14358 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14360 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14361 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14363 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14364 TYPE_NAME (V4SI_type_node) = tdecl;
14366 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14367 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14369 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14370 TYPE_NAME (V4SF_type_node) = tdecl;
14372 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14373 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14375 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14376 TYPE_NAME (V2DF_type_node) = tdecl;
14378 if (TARGET_POWERPC64)
14380 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14381 TYPE_NAME (V2DI_type_node) = tdecl;
14383 tdecl = add_builtin_type ("__vector unsigned long",
14384 unsigned_V2DI_type_node);
14385 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14387 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14388 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14390 else
14392 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14393 TYPE_NAME (V2DI_type_node) = tdecl;
14395 tdecl = add_builtin_type ("__vector unsigned long long",
14396 unsigned_V2DI_type_node);
14397 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14399 tdecl = add_builtin_type ("__vector __bool long long",
14400 bool_V2DI_type_node);
14401 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14404 if (V1TI_type_node)
14406 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14407 TYPE_NAME (V1TI_type_node) = tdecl;
14409 tdecl = add_builtin_type ("__vector unsigned __int128",
14410 unsigned_V1TI_type_node);
14411 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14414 /* Paired and SPE builtins are only available if you build a compiler with
14415 the appropriate options, so only create those builtins with the
14416 appropriate compiler option. Create Altivec and VSX builtins on machines
14417 with at least the general purpose extensions (970 and newer) to allow the
14418 use of the target attribute. */
14419 if (TARGET_PAIRED_FLOAT)
14420 paired_init_builtins ();
14421 if (TARGET_SPE)
14422 spe_init_builtins ();
14423 if (TARGET_EXTRA_BUILTINS)
14424 altivec_init_builtins ();
14425 if (TARGET_HTM)
14426 htm_init_builtins ();
14428 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14429 rs6000_common_init_builtins ();
14431 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14432 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14433 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14435 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14436 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14437 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14439 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14440 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14441 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14443 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14444 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14445 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14447 mode = (TARGET_64BIT) ? DImode : SImode;
14448 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14449 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14450 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14452 ftype = build_function_type_list (unsigned_intDI_type_node,
14453 NULL_TREE);
14454 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14456 if (TARGET_64BIT)
14457 ftype = build_function_type_list (unsigned_intDI_type_node,
14458 NULL_TREE);
14459 else
14460 ftype = build_function_type_list (unsigned_intSI_type_node,
14461 NULL_TREE);
14462 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14464 ftype = build_function_type_list (double_type_node, NULL_TREE);
14465 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14467 ftype = build_function_type_list (void_type_node,
14468 intSI_type_node, double_type_node,
14469 NULL_TREE);
14470 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14472 #if TARGET_XCOFF
14473 /* AIX libm provides clog as __clog. */
14474 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14475 set_user_assembler_name (tdecl, "__clog");
14476 #endif
14478 #ifdef SUBTARGET_INIT_BUILTINS
14479 SUBTARGET_INIT_BUILTINS;
14480 #endif
14483 /* Returns the rs6000 builtin decl for CODE. */
14485 static tree
14486 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14488 HOST_WIDE_INT fnmask;
14490 if (code >= RS6000_BUILTIN_COUNT)
14491 return error_mark_node;
14493 fnmask = rs6000_builtin_info[code].mask;
14494 if ((fnmask & rs6000_builtin_mask) != fnmask)
14496 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14497 return error_mark_node;
14500 return rs6000_builtin_decls[code];
14503 static void
14504 spe_init_builtins (void)
14506 tree puint_type_node = build_pointer_type (unsigned_type_node);
14507 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14508 const struct builtin_description *d;
14509 size_t i;
14511 tree v2si_ftype_4_v2si
14512 = build_function_type_list (opaque_V2SI_type_node,
14513 opaque_V2SI_type_node,
14514 opaque_V2SI_type_node,
14515 opaque_V2SI_type_node,
14516 opaque_V2SI_type_node,
14517 NULL_TREE);
14519 tree v2sf_ftype_4_v2sf
14520 = build_function_type_list (opaque_V2SF_type_node,
14521 opaque_V2SF_type_node,
14522 opaque_V2SF_type_node,
14523 opaque_V2SF_type_node,
14524 opaque_V2SF_type_node,
14525 NULL_TREE);
14527 tree int_ftype_int_v2si_v2si
14528 = build_function_type_list (integer_type_node,
14529 integer_type_node,
14530 opaque_V2SI_type_node,
14531 opaque_V2SI_type_node,
14532 NULL_TREE);
14534 tree int_ftype_int_v2sf_v2sf
14535 = build_function_type_list (integer_type_node,
14536 integer_type_node,
14537 opaque_V2SF_type_node,
14538 opaque_V2SF_type_node,
14539 NULL_TREE);
14541 tree void_ftype_v2si_puint_int
14542 = build_function_type_list (void_type_node,
14543 opaque_V2SI_type_node,
14544 puint_type_node,
14545 integer_type_node,
14546 NULL_TREE);
14548 tree void_ftype_v2si_puint_char
14549 = build_function_type_list (void_type_node,
14550 opaque_V2SI_type_node,
14551 puint_type_node,
14552 char_type_node,
14553 NULL_TREE);
14555 tree void_ftype_v2si_pv2si_int
14556 = build_function_type_list (void_type_node,
14557 opaque_V2SI_type_node,
14558 opaque_p_V2SI_type_node,
14559 integer_type_node,
14560 NULL_TREE);
14562 tree void_ftype_v2si_pv2si_char
14563 = build_function_type_list (void_type_node,
14564 opaque_V2SI_type_node,
14565 opaque_p_V2SI_type_node,
14566 char_type_node,
14567 NULL_TREE);
14569 tree void_ftype_int
14570 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14572 tree int_ftype_void
14573 = build_function_type_list (integer_type_node, NULL_TREE);
14575 tree v2si_ftype_pv2si_int
14576 = build_function_type_list (opaque_V2SI_type_node,
14577 opaque_p_V2SI_type_node,
14578 integer_type_node,
14579 NULL_TREE);
14581 tree v2si_ftype_puint_int
14582 = build_function_type_list (opaque_V2SI_type_node,
14583 puint_type_node,
14584 integer_type_node,
14585 NULL_TREE);
14587 tree v2si_ftype_pushort_int
14588 = build_function_type_list (opaque_V2SI_type_node,
14589 pushort_type_node,
14590 integer_type_node,
14591 NULL_TREE);
14593 tree v2si_ftype_signed_char
14594 = build_function_type_list (opaque_V2SI_type_node,
14595 signed_char_type_node,
14596 NULL_TREE);
14598 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14600 /* Initialize irregular SPE builtins. */
14602 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14603 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14604 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14605 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14606 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14607 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14608 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14609 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14610 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14611 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14612 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14613 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14614 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14615 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14616 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14617 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14618 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14619 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14621 /* Loads. */
14622 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14623 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14624 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14625 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14626 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14627 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14628 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14629 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14630 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14631 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14632 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14633 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14634 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14635 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14636 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14637 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14638 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14639 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14640 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14641 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14642 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14643 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14645 /* Predicates. */
14646 d = bdesc_spe_predicates;
14647 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14649 tree type;
14651 switch (insn_data[d->icode].operand[1].mode)
14653 case V2SImode:
14654 type = int_ftype_int_v2si_v2si;
14655 break;
14656 case V2SFmode:
14657 type = int_ftype_int_v2sf_v2sf;
14658 break;
14659 default:
14660 gcc_unreachable ();
14663 def_builtin (d->name, type, d->code);
14666 /* Evsel predicates. */
14667 d = bdesc_spe_evsel;
14668 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14670 tree type;
14672 switch (insn_data[d->icode].operand[1].mode)
14674 case V2SImode:
14675 type = v2si_ftype_4_v2si;
14676 break;
14677 case V2SFmode:
14678 type = v2sf_ftype_4_v2sf;
14679 break;
14680 default:
14681 gcc_unreachable ();
14684 def_builtin (d->name, type, d->code);
14688 static void
14689 paired_init_builtins (void)
14691 const struct builtin_description *d;
14692 size_t i;
14694 tree int_ftype_int_v2sf_v2sf
14695 = build_function_type_list (integer_type_node,
14696 integer_type_node,
14697 V2SF_type_node,
14698 V2SF_type_node,
14699 NULL_TREE);
14700 tree pcfloat_type_node =
14701 build_pointer_type (build_qualified_type
14702 (float_type_node, TYPE_QUAL_CONST));
14704 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14705 long_integer_type_node,
14706 pcfloat_type_node,
14707 NULL_TREE);
14708 tree void_ftype_v2sf_long_pcfloat =
14709 build_function_type_list (void_type_node,
14710 V2SF_type_node,
14711 long_integer_type_node,
14712 pcfloat_type_node,
14713 NULL_TREE);
14716 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14717 PAIRED_BUILTIN_LX);
14720 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14721 PAIRED_BUILTIN_STX);
14723 /* Predicates. */
14724 d = bdesc_paired_preds;
14725 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14727 tree type;
14729 if (TARGET_DEBUG_BUILTIN)
14730 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14731 (int)i, get_insn_name (d->icode), (int)d->icode,
14732 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14734 switch (insn_data[d->icode].operand[1].mode)
14736 case V2SFmode:
14737 type = int_ftype_int_v2sf_v2sf;
14738 break;
14739 default:
14740 gcc_unreachable ();
14743 def_builtin (d->name, type, d->code);
14747 static void
14748 altivec_init_builtins (void)
14750 const struct builtin_description *d;
14751 size_t i;
14752 tree ftype;
14753 tree decl;
14755 tree pvoid_type_node = build_pointer_type (void_type_node);
14757 tree pcvoid_type_node
14758 = build_pointer_type (build_qualified_type (void_type_node,
14759 TYPE_QUAL_CONST));
14761 tree int_ftype_opaque
14762 = build_function_type_list (integer_type_node,
14763 opaque_V4SI_type_node, NULL_TREE);
14764 tree opaque_ftype_opaque
14765 = build_function_type_list (integer_type_node, NULL_TREE);
14766 tree opaque_ftype_opaque_int
14767 = build_function_type_list (opaque_V4SI_type_node,
14768 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14769 tree opaque_ftype_opaque_opaque_int
14770 = build_function_type_list (opaque_V4SI_type_node,
14771 opaque_V4SI_type_node, opaque_V4SI_type_node,
14772 integer_type_node, NULL_TREE);
14773 tree int_ftype_int_opaque_opaque
14774 = build_function_type_list (integer_type_node,
14775 integer_type_node, opaque_V4SI_type_node,
14776 opaque_V4SI_type_node, NULL_TREE);
14777 tree int_ftype_int_v4si_v4si
14778 = build_function_type_list (integer_type_node,
14779 integer_type_node, V4SI_type_node,
14780 V4SI_type_node, NULL_TREE);
14781 tree int_ftype_int_v2di_v2di
14782 = build_function_type_list (integer_type_node,
14783 integer_type_node, V2DI_type_node,
14784 V2DI_type_node, NULL_TREE);
14785 tree void_ftype_v4si
14786 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14787 tree v8hi_ftype_void
14788 = build_function_type_list (V8HI_type_node, NULL_TREE);
14789 tree void_ftype_void
14790 = build_function_type_list (void_type_node, NULL_TREE);
14791 tree void_ftype_int
14792 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14794 tree opaque_ftype_long_pcvoid
14795 = build_function_type_list (opaque_V4SI_type_node,
14796 long_integer_type_node, pcvoid_type_node,
14797 NULL_TREE);
14798 tree v16qi_ftype_long_pcvoid
14799 = build_function_type_list (V16QI_type_node,
14800 long_integer_type_node, pcvoid_type_node,
14801 NULL_TREE);
14802 tree v8hi_ftype_long_pcvoid
14803 = build_function_type_list (V8HI_type_node,
14804 long_integer_type_node, pcvoid_type_node,
14805 NULL_TREE);
14806 tree v4si_ftype_long_pcvoid
14807 = build_function_type_list (V4SI_type_node,
14808 long_integer_type_node, pcvoid_type_node,
14809 NULL_TREE);
14810 tree v4sf_ftype_long_pcvoid
14811 = build_function_type_list (V4SF_type_node,
14812 long_integer_type_node, pcvoid_type_node,
14813 NULL_TREE);
14814 tree v2df_ftype_long_pcvoid
14815 = build_function_type_list (V2DF_type_node,
14816 long_integer_type_node, pcvoid_type_node,
14817 NULL_TREE);
14818 tree v2di_ftype_long_pcvoid
14819 = build_function_type_list (V2DI_type_node,
14820 long_integer_type_node, pcvoid_type_node,
14821 NULL_TREE);
14823 tree void_ftype_opaque_long_pvoid
14824 = build_function_type_list (void_type_node,
14825 opaque_V4SI_type_node, long_integer_type_node,
14826 pvoid_type_node, NULL_TREE);
14827 tree void_ftype_v4si_long_pvoid
14828 = build_function_type_list (void_type_node,
14829 V4SI_type_node, long_integer_type_node,
14830 pvoid_type_node, NULL_TREE);
14831 tree void_ftype_v16qi_long_pvoid
14832 = build_function_type_list (void_type_node,
14833 V16QI_type_node, long_integer_type_node,
14834 pvoid_type_node, NULL_TREE);
14835 tree void_ftype_v8hi_long_pvoid
14836 = build_function_type_list (void_type_node,
14837 V8HI_type_node, long_integer_type_node,
14838 pvoid_type_node, NULL_TREE);
14839 tree void_ftype_v4sf_long_pvoid
14840 = build_function_type_list (void_type_node,
14841 V4SF_type_node, long_integer_type_node,
14842 pvoid_type_node, NULL_TREE);
14843 tree void_ftype_v2df_long_pvoid
14844 = build_function_type_list (void_type_node,
14845 V2DF_type_node, long_integer_type_node,
14846 pvoid_type_node, NULL_TREE);
14847 tree void_ftype_v2di_long_pvoid
14848 = build_function_type_list (void_type_node,
14849 V2DI_type_node, long_integer_type_node,
14850 pvoid_type_node, NULL_TREE);
14851 tree int_ftype_int_v8hi_v8hi
14852 = build_function_type_list (integer_type_node,
14853 integer_type_node, V8HI_type_node,
14854 V8HI_type_node, NULL_TREE);
14855 tree int_ftype_int_v16qi_v16qi
14856 = build_function_type_list (integer_type_node,
14857 integer_type_node, V16QI_type_node,
14858 V16QI_type_node, NULL_TREE);
14859 tree int_ftype_int_v4sf_v4sf
14860 = build_function_type_list (integer_type_node,
14861 integer_type_node, V4SF_type_node,
14862 V4SF_type_node, NULL_TREE);
14863 tree int_ftype_int_v2df_v2df
14864 = build_function_type_list (integer_type_node,
14865 integer_type_node, V2DF_type_node,
14866 V2DF_type_node, NULL_TREE);
14867 tree v2di_ftype_v2di
14868 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14869 tree v4si_ftype_v4si
14870 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14871 tree v8hi_ftype_v8hi
14872 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14873 tree v16qi_ftype_v16qi
14874 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14875 tree v4sf_ftype_v4sf
14876 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14877 tree v2df_ftype_v2df
14878 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14879 tree void_ftype_pcvoid_int_int
14880 = build_function_type_list (void_type_node,
14881 pcvoid_type_node, integer_type_node,
14882 integer_type_node, NULL_TREE);
14884 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14885 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14886 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14887 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14888 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14889 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14890 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14891 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14892 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14893 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14894 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14895 ALTIVEC_BUILTIN_LVXL_V2DF);
14896 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14897 ALTIVEC_BUILTIN_LVXL_V2DI);
14898 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14899 ALTIVEC_BUILTIN_LVXL_V4SF);
14900 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14901 ALTIVEC_BUILTIN_LVXL_V4SI);
14902 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14903 ALTIVEC_BUILTIN_LVXL_V8HI);
14904 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14905 ALTIVEC_BUILTIN_LVXL_V16QI);
14906 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14907 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14908 ALTIVEC_BUILTIN_LVX_V2DF);
14909 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14910 ALTIVEC_BUILTIN_LVX_V2DI);
14911 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14912 ALTIVEC_BUILTIN_LVX_V4SF);
14913 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14914 ALTIVEC_BUILTIN_LVX_V4SI);
14915 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14916 ALTIVEC_BUILTIN_LVX_V8HI);
14917 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14918 ALTIVEC_BUILTIN_LVX_V16QI);
14919 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14920 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14921 ALTIVEC_BUILTIN_STVX_V2DF);
14922 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14923 ALTIVEC_BUILTIN_STVX_V2DI);
14924 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14925 ALTIVEC_BUILTIN_STVX_V4SF);
14926 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14927 ALTIVEC_BUILTIN_STVX_V4SI);
14928 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14929 ALTIVEC_BUILTIN_STVX_V8HI);
14930 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14931 ALTIVEC_BUILTIN_STVX_V16QI);
14932 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14933 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14934 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14935 ALTIVEC_BUILTIN_STVXL_V2DF);
14936 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14937 ALTIVEC_BUILTIN_STVXL_V2DI);
14938 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14939 ALTIVEC_BUILTIN_STVXL_V4SF);
14940 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14941 ALTIVEC_BUILTIN_STVXL_V4SI);
14942 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14943 ALTIVEC_BUILTIN_STVXL_V8HI);
14944 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14945 ALTIVEC_BUILTIN_STVXL_V16QI);
14946 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14947 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14948 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14949 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14950 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14951 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14952 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14953 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14954 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14955 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14956 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14957 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14958 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14959 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14960 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14961 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14963 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14964 VSX_BUILTIN_LXVD2X_V2DF);
14965 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14966 VSX_BUILTIN_LXVD2X_V2DI);
14967 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14968 VSX_BUILTIN_LXVW4X_V4SF);
14969 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14970 VSX_BUILTIN_LXVW4X_V4SI);
14971 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14972 VSX_BUILTIN_LXVW4X_V8HI);
14973 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14974 VSX_BUILTIN_LXVW4X_V16QI);
14975 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14976 VSX_BUILTIN_STXVD2X_V2DF);
14977 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14978 VSX_BUILTIN_STXVD2X_V2DI);
14979 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14980 VSX_BUILTIN_STXVW4X_V4SF);
14981 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14982 VSX_BUILTIN_STXVW4X_V4SI);
14983 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14984 VSX_BUILTIN_STXVW4X_V8HI);
14985 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14986 VSX_BUILTIN_STXVW4X_V16QI);
14987 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14988 VSX_BUILTIN_VEC_LD);
14989 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14990 VSX_BUILTIN_VEC_ST);
14992 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14993 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14994 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14996 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14997 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14998 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14999 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
15000 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
15001 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
15002 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
15003 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
15004 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
15005 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
15006 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
15007 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
15009 /* Cell builtins. */
15010 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
15011 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
15012 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
15013 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
15015 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
15016 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
15017 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
15018 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
15020 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
15021 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
15022 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
15023 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
15025 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
15026 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
15027 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15028 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15030 /* Add the DST variants. */
15031 d = bdesc_dst;
15032 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15033 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15035 /* Initialize the predicates. */
15036 d = bdesc_altivec_preds;
15037 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15039 machine_mode mode1;
15040 tree type;
15042 if (rs6000_overloaded_builtin_p (d->code))
15043 mode1 = VOIDmode;
15044 else
15045 mode1 = insn_data[d->icode].operand[1].mode;
15047 switch (mode1)
15049 case VOIDmode:
15050 type = int_ftype_int_opaque_opaque;
15051 break;
15052 case V2DImode:
15053 type = int_ftype_int_v2di_v2di;
15054 break;
15055 case V4SImode:
15056 type = int_ftype_int_v4si_v4si;
15057 break;
15058 case V8HImode:
15059 type = int_ftype_int_v8hi_v8hi;
15060 break;
15061 case V16QImode:
15062 type = int_ftype_int_v16qi_v16qi;
15063 break;
15064 case V4SFmode:
15065 type = int_ftype_int_v4sf_v4sf;
15066 break;
15067 case V2DFmode:
15068 type = int_ftype_int_v2df_v2df;
15069 break;
15070 default:
15071 gcc_unreachable ();
15074 def_builtin (d->name, type, d->code);
15077 /* Initialize the abs* operators. */
15078 d = bdesc_abs;
15079 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15081 machine_mode mode0;
15082 tree type;
15084 mode0 = insn_data[d->icode].operand[0].mode;
15086 switch (mode0)
15088 case V2DImode:
15089 type = v2di_ftype_v2di;
15090 break;
15091 case V4SImode:
15092 type = v4si_ftype_v4si;
15093 break;
15094 case V8HImode:
15095 type = v8hi_ftype_v8hi;
15096 break;
15097 case V16QImode:
15098 type = v16qi_ftype_v16qi;
15099 break;
15100 case V4SFmode:
15101 type = v4sf_ftype_v4sf;
15102 break;
15103 case V2DFmode:
15104 type = v2df_ftype_v2df;
15105 break;
15106 default:
15107 gcc_unreachable ();
15110 def_builtin (d->name, type, d->code);
15113 /* Initialize target builtin that implements
15114 targetm.vectorize.builtin_mask_for_load. */
15116 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15117 v16qi_ftype_long_pcvoid,
15118 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15119 BUILT_IN_MD, NULL, NULL_TREE);
15120 TREE_READONLY (decl) = 1;
15121 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15122 altivec_builtin_mask_for_load = decl;
15124 /* Access to the vec_init patterns. */
15125 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15126 integer_type_node, integer_type_node,
15127 integer_type_node, NULL_TREE);
15128 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15130 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15131 short_integer_type_node,
15132 short_integer_type_node,
15133 short_integer_type_node,
15134 short_integer_type_node,
15135 short_integer_type_node,
15136 short_integer_type_node,
15137 short_integer_type_node, NULL_TREE);
15138 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15140 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15141 char_type_node, char_type_node,
15142 char_type_node, char_type_node,
15143 char_type_node, char_type_node,
15144 char_type_node, char_type_node,
15145 char_type_node, char_type_node,
15146 char_type_node, char_type_node,
15147 char_type_node, char_type_node,
15148 char_type_node, NULL_TREE);
15149 def_builtin ("__builtin_vec_init_v16qi", ftype,
15150 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15152 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15153 float_type_node, float_type_node,
15154 float_type_node, NULL_TREE);
15155 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15157 /* VSX builtins. */
15158 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15159 double_type_node, NULL_TREE);
15160 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15162 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15163 intDI_type_node, NULL_TREE);
15164 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15166 /* Access to the vec_set patterns. */
15167 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15168 intSI_type_node,
15169 integer_type_node, NULL_TREE);
15170 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15172 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15173 intHI_type_node,
15174 integer_type_node, NULL_TREE);
15175 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15177 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15178 intQI_type_node,
15179 integer_type_node, NULL_TREE);
15180 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15182 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15183 float_type_node,
15184 integer_type_node, NULL_TREE);
15185 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15187 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15188 double_type_node,
15189 integer_type_node, NULL_TREE);
15190 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15192 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15193 intDI_type_node,
15194 integer_type_node, NULL_TREE);
15195 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15197 /* Access to the vec_extract patterns. */
15198 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15199 integer_type_node, NULL_TREE);
15200 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15202 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15203 integer_type_node, NULL_TREE);
15204 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15206 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15207 integer_type_node, NULL_TREE);
15208 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15210 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15211 integer_type_node, NULL_TREE);
15212 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15214 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15215 integer_type_node, NULL_TREE);
15216 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15218 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15219 integer_type_node, NULL_TREE);
15220 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15223 if (V1TI_type_node)
15225 tree v1ti_ftype_long_pcvoid
15226 = build_function_type_list (V1TI_type_node,
15227 long_integer_type_node, pcvoid_type_node,
15228 NULL_TREE);
15229 tree void_ftype_v1ti_long_pvoid
15230 = build_function_type_list (void_type_node,
15231 V1TI_type_node, long_integer_type_node,
15232 pvoid_type_node, NULL_TREE);
15233 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15234 VSX_BUILTIN_LXVD2X_V1TI);
15235 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15236 VSX_BUILTIN_STXVD2X_V1TI);
15237 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15238 NULL_TREE, NULL_TREE);
15239 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15240 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15241 intTI_type_node,
15242 integer_type_node, NULL_TREE);
15243 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15244 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15245 integer_type_node, NULL_TREE);
15246 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15251 static void
15252 htm_init_builtins (void)
15254 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15255 const struct builtin_description *d;
15256 size_t i;
15258 d = bdesc_htm;
15259 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15261 tree op[MAX_HTM_OPERANDS], type;
15262 HOST_WIDE_INT mask = d->mask;
15263 unsigned attr = rs6000_builtin_info[d->code].attr;
15264 bool void_func = (attr & RS6000_BTC_VOID);
15265 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15266 int nopnds = 0;
15267 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15268 : unsigned_type_node;
15270 if ((mask & builtin_mask) != mask)
15272 if (TARGET_DEBUG_BUILTIN)
15273 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15274 continue;
15277 if (d->name == 0)
15279 if (TARGET_DEBUG_BUILTIN)
15280 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15281 (long unsigned) i);
15282 continue;
15285 op[nopnds++] = (void_func) ? void_type_node : argtype;
15287 if (attr_args == RS6000_BTC_UNARY)
15288 op[nopnds++] = argtype;
15289 else if (attr_args == RS6000_BTC_BINARY)
15291 op[nopnds++] = argtype;
15292 op[nopnds++] = argtype;
15294 else if (attr_args == RS6000_BTC_TERNARY)
15296 op[nopnds++] = argtype;
15297 op[nopnds++] = argtype;
15298 op[nopnds++] = argtype;
15301 switch (nopnds)
15303 case 1:
15304 type = build_function_type_list (op[0], NULL_TREE);
15305 break;
15306 case 2:
15307 type = build_function_type_list (op[0], op[1], NULL_TREE);
15308 break;
15309 case 3:
15310 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15311 break;
15312 case 4:
15313 type = build_function_type_list (op[0], op[1], op[2], op[3],
15314 NULL_TREE);
15315 break;
15316 default:
15317 gcc_unreachable ();
15320 def_builtin (d->name, type, d->code);
15324 /* Hash function for builtin functions with up to 3 arguments and a return
15325 type. */
15326 hashval_t
15327 builtin_hasher::hash (builtin_hash_struct *bh)
15329 unsigned ret = 0;
15330 int i;
15332 for (i = 0; i < 4; i++)
15334 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15335 ret = (ret * 2) + bh->uns_p[i];
15338 return ret;
15341 /* Compare builtin hash entries H1 and H2 for equivalence. */
15342 bool
15343 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15345 return ((p1->mode[0] == p2->mode[0])
15346 && (p1->mode[1] == p2->mode[1])
15347 && (p1->mode[2] == p2->mode[2])
15348 && (p1->mode[3] == p2->mode[3])
15349 && (p1->uns_p[0] == p2->uns_p[0])
15350 && (p1->uns_p[1] == p2->uns_p[1])
15351 && (p1->uns_p[2] == p2->uns_p[2])
15352 && (p1->uns_p[3] == p2->uns_p[3]));
15355 /* Map types for builtin functions with an explicit return type and up to 3
15356 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15357 of the argument. */
15358 static tree
15359 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15360 machine_mode mode_arg1, machine_mode mode_arg2,
15361 enum rs6000_builtins builtin, const char *name)
15363 struct builtin_hash_struct h;
15364 struct builtin_hash_struct *h2;
15365 int num_args = 3;
15366 int i;
15367 tree ret_type = NULL_TREE;
15368 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15370 /* Create builtin_hash_table. */
15371 if (builtin_hash_table == NULL)
15372 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15374 h.type = NULL_TREE;
15375 h.mode[0] = mode_ret;
15376 h.mode[1] = mode_arg0;
15377 h.mode[2] = mode_arg1;
15378 h.mode[3] = mode_arg2;
15379 h.uns_p[0] = 0;
15380 h.uns_p[1] = 0;
15381 h.uns_p[2] = 0;
15382 h.uns_p[3] = 0;
15384 /* If the builtin is a type that produces unsigned results or takes unsigned
15385 arguments, and it is returned as a decl for the vectorizer (such as
15386 widening multiplies, permute), make sure the arguments and return value
15387 are type correct. */
15388 switch (builtin)
15390 /* unsigned 1 argument functions. */
15391 case CRYPTO_BUILTIN_VSBOX:
15392 case P8V_BUILTIN_VGBBD:
15393 case MISC_BUILTIN_CDTBCD:
15394 case MISC_BUILTIN_CBCDTD:
15395 h.uns_p[0] = 1;
15396 h.uns_p[1] = 1;
15397 break;
15399 /* unsigned 2 argument functions. */
15400 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15401 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15402 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15403 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15404 case CRYPTO_BUILTIN_VCIPHER:
15405 case CRYPTO_BUILTIN_VCIPHERLAST:
15406 case CRYPTO_BUILTIN_VNCIPHER:
15407 case CRYPTO_BUILTIN_VNCIPHERLAST:
15408 case CRYPTO_BUILTIN_VPMSUMB:
15409 case CRYPTO_BUILTIN_VPMSUMH:
15410 case CRYPTO_BUILTIN_VPMSUMW:
15411 case CRYPTO_BUILTIN_VPMSUMD:
15412 case CRYPTO_BUILTIN_VPMSUM:
15413 case MISC_BUILTIN_ADDG6S:
15414 case MISC_BUILTIN_DIVWEU:
15415 case MISC_BUILTIN_DIVWEUO:
15416 case MISC_BUILTIN_DIVDEU:
15417 case MISC_BUILTIN_DIVDEUO:
15418 h.uns_p[0] = 1;
15419 h.uns_p[1] = 1;
15420 h.uns_p[2] = 1;
15421 break;
15423 /* unsigned 3 argument functions. */
15424 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15425 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15426 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15427 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15428 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15429 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15430 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15431 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15432 case VSX_BUILTIN_VPERM_16QI_UNS:
15433 case VSX_BUILTIN_VPERM_8HI_UNS:
15434 case VSX_BUILTIN_VPERM_4SI_UNS:
15435 case VSX_BUILTIN_VPERM_2DI_UNS:
15436 case VSX_BUILTIN_XXSEL_16QI_UNS:
15437 case VSX_BUILTIN_XXSEL_8HI_UNS:
15438 case VSX_BUILTIN_XXSEL_4SI_UNS:
15439 case VSX_BUILTIN_XXSEL_2DI_UNS:
15440 case CRYPTO_BUILTIN_VPERMXOR:
15441 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15442 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15443 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15444 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15445 case CRYPTO_BUILTIN_VSHASIGMAW:
15446 case CRYPTO_BUILTIN_VSHASIGMAD:
15447 case CRYPTO_BUILTIN_VSHASIGMA:
15448 h.uns_p[0] = 1;
15449 h.uns_p[1] = 1;
15450 h.uns_p[2] = 1;
15451 h.uns_p[3] = 1;
15452 break;
15454 /* signed permute functions with unsigned char mask. */
15455 case ALTIVEC_BUILTIN_VPERM_16QI:
15456 case ALTIVEC_BUILTIN_VPERM_8HI:
15457 case ALTIVEC_BUILTIN_VPERM_4SI:
15458 case ALTIVEC_BUILTIN_VPERM_4SF:
15459 case ALTIVEC_BUILTIN_VPERM_2DI:
15460 case ALTIVEC_BUILTIN_VPERM_2DF:
15461 case VSX_BUILTIN_VPERM_16QI:
15462 case VSX_BUILTIN_VPERM_8HI:
15463 case VSX_BUILTIN_VPERM_4SI:
15464 case VSX_BUILTIN_VPERM_4SF:
15465 case VSX_BUILTIN_VPERM_2DI:
15466 case VSX_BUILTIN_VPERM_2DF:
15467 h.uns_p[3] = 1;
15468 break;
15470 /* unsigned args, signed return. */
15471 case VSX_BUILTIN_XVCVUXDDP_UNS:
15472 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15473 h.uns_p[1] = 1;
15474 break;
15476 /* signed args, unsigned return. */
15477 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15478 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15479 case MISC_BUILTIN_UNPACK_TD:
15480 case MISC_BUILTIN_UNPACK_V1TI:
15481 h.uns_p[0] = 1;
15482 break;
15484 /* unsigned arguments for 128-bit pack instructions. */
15485 case MISC_BUILTIN_PACK_TD:
15486 case MISC_BUILTIN_PACK_V1TI:
15487 h.uns_p[1] = 1;
15488 h.uns_p[2] = 1;
15489 break;
15491 default:
15492 break;
15495 /* Figure out how many args are present. */
15496 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15497 num_args--;
15499 if (num_args == 0)
15500 fatal_error (input_location,
15501 "internal error: builtin function %s had no type", name);
15503 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15504 if (!ret_type && h.uns_p[0])
15505 ret_type = builtin_mode_to_type[h.mode[0]][0];
15507 if (!ret_type)
15508 fatal_error (input_location,
15509 "internal error: builtin function %s had an unexpected "
15510 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15512 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15513 arg_type[i] = NULL_TREE;
15515 for (i = 0; i < num_args; i++)
15517 int m = (int) h.mode[i+1];
15518 int uns_p = h.uns_p[i+1];
15520 arg_type[i] = builtin_mode_to_type[m][uns_p];
15521 if (!arg_type[i] && uns_p)
15522 arg_type[i] = builtin_mode_to_type[m][0];
15524 if (!arg_type[i])
15525 fatal_error (input_location,
15526 "internal error: builtin function %s, argument %d "
15527 "had unexpected argument type %s", name, i,
15528 GET_MODE_NAME (m));
15531 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15532 if (*found == NULL)
15534 h2 = ggc_alloc<builtin_hash_struct> ();
15535 *h2 = h;
15536 *found = h2;
15538 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15539 arg_type[2], NULL_TREE);
15542 return (*found)->type;
15545 static void
15546 rs6000_common_init_builtins (void)
15548 const struct builtin_description *d;
15549 size_t i;
15551 tree opaque_ftype_opaque = NULL_TREE;
15552 tree opaque_ftype_opaque_opaque = NULL_TREE;
15553 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15554 tree v2si_ftype_qi = NULL_TREE;
15555 tree v2si_ftype_v2si_qi = NULL_TREE;
15556 tree v2si_ftype_int_qi = NULL_TREE;
15557 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15559 if (!TARGET_PAIRED_FLOAT)
15561 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15562 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15565 /* Paired and SPE builtins are only available if you build a compiler with
15566 the appropriate options, so only create those builtins with the
15567 appropriate compiler option. Create Altivec and VSX builtins on machines
15568 with at least the general purpose extensions (970 and newer) to allow the
15569 use of the target attribute.. */
15571 if (TARGET_EXTRA_BUILTINS)
15572 builtin_mask |= RS6000_BTM_COMMON;
15574 /* Add the ternary operators. */
15575 d = bdesc_3arg;
15576 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15578 tree type;
15579 HOST_WIDE_INT mask = d->mask;
15581 if ((mask & builtin_mask) != mask)
15583 if (TARGET_DEBUG_BUILTIN)
15584 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15585 continue;
15588 if (rs6000_overloaded_builtin_p (d->code))
15590 if (! (type = opaque_ftype_opaque_opaque_opaque))
15591 type = opaque_ftype_opaque_opaque_opaque
15592 = build_function_type_list (opaque_V4SI_type_node,
15593 opaque_V4SI_type_node,
15594 opaque_V4SI_type_node,
15595 opaque_V4SI_type_node,
15596 NULL_TREE);
15598 else
15600 enum insn_code icode = d->icode;
15601 if (d->name == 0)
15603 if (TARGET_DEBUG_BUILTIN)
15604 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15605 (long unsigned)i);
15607 continue;
15610 if (icode == CODE_FOR_nothing)
15612 if (TARGET_DEBUG_BUILTIN)
15613 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15614 d->name);
15616 continue;
15619 type = builtin_function_type (insn_data[icode].operand[0].mode,
15620 insn_data[icode].operand[1].mode,
15621 insn_data[icode].operand[2].mode,
15622 insn_data[icode].operand[3].mode,
15623 d->code, d->name);
15626 def_builtin (d->name, type, d->code);
15629 /* Add the binary operators. */
15630 d = bdesc_2arg;
15631 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15633 machine_mode mode0, mode1, mode2;
15634 tree type;
15635 HOST_WIDE_INT mask = d->mask;
15637 if ((mask & builtin_mask) != mask)
15639 if (TARGET_DEBUG_BUILTIN)
15640 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15641 continue;
15644 if (rs6000_overloaded_builtin_p (d->code))
15646 if (! (type = opaque_ftype_opaque_opaque))
15647 type = opaque_ftype_opaque_opaque
15648 = build_function_type_list (opaque_V4SI_type_node,
15649 opaque_V4SI_type_node,
15650 opaque_V4SI_type_node,
15651 NULL_TREE);
15653 else
15655 enum insn_code icode = d->icode;
15656 if (d->name == 0)
15658 if (TARGET_DEBUG_BUILTIN)
15659 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15660 (long unsigned)i);
15662 continue;
15665 if (icode == CODE_FOR_nothing)
15667 if (TARGET_DEBUG_BUILTIN)
15668 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15669 d->name);
15671 continue;
15674 mode0 = insn_data[icode].operand[0].mode;
15675 mode1 = insn_data[icode].operand[1].mode;
15676 mode2 = insn_data[icode].operand[2].mode;
15678 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15680 if (! (type = v2si_ftype_v2si_qi))
15681 type = v2si_ftype_v2si_qi
15682 = build_function_type_list (opaque_V2SI_type_node,
15683 opaque_V2SI_type_node,
15684 char_type_node,
15685 NULL_TREE);
15688 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15689 && mode2 == QImode)
15691 if (! (type = v2si_ftype_int_qi))
15692 type = v2si_ftype_int_qi
15693 = build_function_type_list (opaque_V2SI_type_node,
15694 integer_type_node,
15695 char_type_node,
15696 NULL_TREE);
15699 else
15700 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15701 d->code, d->name);
15704 def_builtin (d->name, type, d->code);
15707 /* Add the simple unary operators. */
15708 d = bdesc_1arg;
15709 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15711 machine_mode mode0, mode1;
15712 tree type;
15713 HOST_WIDE_INT mask = d->mask;
15715 if ((mask & builtin_mask) != mask)
15717 if (TARGET_DEBUG_BUILTIN)
15718 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15719 continue;
15722 if (rs6000_overloaded_builtin_p (d->code))
15724 if (! (type = opaque_ftype_opaque))
15725 type = opaque_ftype_opaque
15726 = build_function_type_list (opaque_V4SI_type_node,
15727 opaque_V4SI_type_node,
15728 NULL_TREE);
15730 else
15732 enum insn_code icode = d->icode;
15733 if (d->name == 0)
15735 if (TARGET_DEBUG_BUILTIN)
15736 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15737 (long unsigned)i);
15739 continue;
15742 if (icode == CODE_FOR_nothing)
15744 if (TARGET_DEBUG_BUILTIN)
15745 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15746 d->name);
15748 continue;
15751 mode0 = insn_data[icode].operand[0].mode;
15752 mode1 = insn_data[icode].operand[1].mode;
15754 if (mode0 == V2SImode && mode1 == QImode)
15756 if (! (type = v2si_ftype_qi))
15757 type = v2si_ftype_qi
15758 = build_function_type_list (opaque_V2SI_type_node,
15759 char_type_node,
15760 NULL_TREE);
15763 else
15764 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15765 d->code, d->name);
15768 def_builtin (d->name, type, d->code);
15772 static void
15773 rs6000_init_libfuncs (void)
15775 if (!TARGET_IEEEQUAD)
15776 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15777 if (!TARGET_XL_COMPAT)
15779 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15780 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15781 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15782 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15784 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15786 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15787 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15788 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15789 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15790 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15791 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15792 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15794 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15795 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15796 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15797 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15798 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15799 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15800 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15801 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15804 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15805 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15807 else
15809 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15810 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15811 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15812 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15814 else
15816 /* 32-bit SVR4 quad floating point routines. */
15818 set_optab_libfunc (add_optab, TFmode, "_q_add");
15819 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15820 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15821 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15822 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15823 if (TARGET_PPC_GPOPT)
15824 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15826 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15827 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15828 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15829 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15830 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15831 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15833 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15834 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15835 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15836 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15837 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15838 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15839 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15840 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15845 /* Expand a block clear operation, and return 1 if successful. Return 0
15846 if we should let the compiler generate normal code.
15848 operands[0] is the destination
15849 operands[1] is the length
15850 operands[3] is the alignment */
15853 expand_block_clear (rtx operands[])
15855 rtx orig_dest = operands[0];
15856 rtx bytes_rtx = operands[1];
15857 rtx align_rtx = operands[3];
15858 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15859 HOST_WIDE_INT align;
15860 HOST_WIDE_INT bytes;
15861 int offset;
15862 int clear_bytes;
15863 int clear_step;
15865 /* If this is not a fixed size move, just call memcpy */
15866 if (! constp)
15867 return 0;
15869 /* This must be a fixed size alignment */
15870 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15871 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15873 /* Anything to clear? */
15874 bytes = INTVAL (bytes_rtx);
15875 if (bytes <= 0)
15876 return 1;
15878 /* Use the builtin memset after a point, to avoid huge code bloat.
15879 When optimize_size, avoid any significant code bloat; calling
15880 memset is about 4 instructions, so allow for one instruction to
15881 load zero and three to do clearing. */
15882 if (TARGET_ALTIVEC && align >= 128)
15883 clear_step = 16;
15884 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15885 clear_step = 8;
15886 else if (TARGET_SPE && align >= 64)
15887 clear_step = 8;
15888 else
15889 clear_step = 4;
15891 if (optimize_size && bytes > 3 * clear_step)
15892 return 0;
15893 if (! optimize_size && bytes > 8 * clear_step)
15894 return 0;
15896 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15898 machine_mode mode = BLKmode;
15899 rtx dest;
15901 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15903 clear_bytes = 16;
15904 mode = V4SImode;
15906 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15908 clear_bytes = 8;
15909 mode = V2SImode;
15911 else if (bytes >= 8 && TARGET_POWERPC64
15912 && (align >= 64 || !STRICT_ALIGNMENT))
15914 clear_bytes = 8;
15915 mode = DImode;
15916 if (offset == 0 && align < 64)
15918 rtx addr;
15920 /* If the address form is reg+offset with offset not a
15921 multiple of four, reload into reg indirect form here
15922 rather than waiting for reload. This way we get one
15923 reload, not one per store. */
15924 addr = XEXP (orig_dest, 0);
15925 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15926 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15927 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15929 addr = copy_addr_to_reg (addr);
15930 orig_dest = replace_equiv_address (orig_dest, addr);
15934 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15935 { /* move 4 bytes */
15936 clear_bytes = 4;
15937 mode = SImode;
15939 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15940 { /* move 2 bytes */
15941 clear_bytes = 2;
15942 mode = HImode;
15944 else /* move 1 byte at a time */
15946 clear_bytes = 1;
15947 mode = QImode;
15950 dest = adjust_address (orig_dest, mode, offset);
15952 emit_move_insn (dest, CONST0_RTX (mode));
15955 return 1;
15959 /* Expand a block move operation, and return 1 if successful. Return 0
15960 if we should let the compiler generate normal code.
15962 operands[0] is the destination
15963 operands[1] is the source
15964 operands[2] is the length
15965 operands[3] is the alignment */
15967 #define MAX_MOVE_REG 4
15970 expand_block_move (rtx operands[])
15972 rtx orig_dest = operands[0];
15973 rtx orig_src = operands[1];
15974 rtx bytes_rtx = operands[2];
15975 rtx align_rtx = operands[3];
15976 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15977 int align;
15978 int bytes;
15979 int offset;
15980 int move_bytes;
15981 rtx stores[MAX_MOVE_REG];
15982 int num_reg = 0;
15984 /* If this is not a fixed size move, just call memcpy */
15985 if (! constp)
15986 return 0;
15988 /* This must be a fixed size alignment */
15989 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15990 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15992 /* Anything to move? */
15993 bytes = INTVAL (bytes_rtx);
15994 if (bytes <= 0)
15995 return 1;
15997 if (bytes > rs6000_block_move_inline_limit)
15998 return 0;
16000 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
16002 union {
16003 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
16004 rtx (*mov) (rtx, rtx);
16005 } gen_func;
16006 machine_mode mode = BLKmode;
16007 rtx src, dest;
16009 /* Altivec first, since it will be faster than a string move
16010 when it applies, and usually not significantly larger. */
16011 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
16013 move_bytes = 16;
16014 mode = V4SImode;
16015 gen_func.mov = gen_movv4si;
16017 else if (TARGET_SPE && bytes >= 8 && align >= 64)
16019 move_bytes = 8;
16020 mode = V2SImode;
16021 gen_func.mov = gen_movv2si;
16023 else if (TARGET_STRING
16024 && bytes > 24 /* move up to 32 bytes at a time */
16025 && ! fixed_regs[5]
16026 && ! fixed_regs[6]
16027 && ! fixed_regs[7]
16028 && ! fixed_regs[8]
16029 && ! fixed_regs[9]
16030 && ! fixed_regs[10]
16031 && ! fixed_regs[11]
16032 && ! fixed_regs[12])
16034 move_bytes = (bytes > 32) ? 32 : bytes;
16035 gen_func.movmemsi = gen_movmemsi_8reg;
16037 else if (TARGET_STRING
16038 && bytes > 16 /* move up to 24 bytes at a time */
16039 && ! fixed_regs[5]
16040 && ! fixed_regs[6]
16041 && ! fixed_regs[7]
16042 && ! fixed_regs[8]
16043 && ! fixed_regs[9]
16044 && ! fixed_regs[10])
16046 move_bytes = (bytes > 24) ? 24 : bytes;
16047 gen_func.movmemsi = gen_movmemsi_6reg;
16049 else if (TARGET_STRING
16050 && bytes > 8 /* move up to 16 bytes at a time */
16051 && ! fixed_regs[5]
16052 && ! fixed_regs[6]
16053 && ! fixed_regs[7]
16054 && ! fixed_regs[8])
16056 move_bytes = (bytes > 16) ? 16 : bytes;
16057 gen_func.movmemsi = gen_movmemsi_4reg;
16059 else if (bytes >= 8 && TARGET_POWERPC64
16060 && (align >= 64 || !STRICT_ALIGNMENT))
16062 move_bytes = 8;
16063 mode = DImode;
16064 gen_func.mov = gen_movdi;
16065 if (offset == 0 && align < 64)
16067 rtx addr;
16069 /* If the address form is reg+offset with offset not a
16070 multiple of four, reload into reg indirect form here
16071 rather than waiting for reload. This way we get one
16072 reload, not one per load and/or store. */
16073 addr = XEXP (orig_dest, 0);
16074 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16075 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16076 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16078 addr = copy_addr_to_reg (addr);
16079 orig_dest = replace_equiv_address (orig_dest, addr);
16081 addr = XEXP (orig_src, 0);
16082 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16083 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16084 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16086 addr = copy_addr_to_reg (addr);
16087 orig_src = replace_equiv_address (orig_src, addr);
16091 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16092 { /* move up to 8 bytes at a time */
16093 move_bytes = (bytes > 8) ? 8 : bytes;
16094 gen_func.movmemsi = gen_movmemsi_2reg;
16096 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16097 { /* move 4 bytes */
16098 move_bytes = 4;
16099 mode = SImode;
16100 gen_func.mov = gen_movsi;
16102 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16103 { /* move 2 bytes */
16104 move_bytes = 2;
16105 mode = HImode;
16106 gen_func.mov = gen_movhi;
16108 else if (TARGET_STRING && bytes > 1)
16109 { /* move up to 4 bytes at a time */
16110 move_bytes = (bytes > 4) ? 4 : bytes;
16111 gen_func.movmemsi = gen_movmemsi_1reg;
16113 else /* move 1 byte at a time */
16115 move_bytes = 1;
16116 mode = QImode;
16117 gen_func.mov = gen_movqi;
16120 src = adjust_address (orig_src, mode, offset);
16121 dest = adjust_address (orig_dest, mode, offset);
16123 if (mode != BLKmode)
16125 rtx tmp_reg = gen_reg_rtx (mode);
16127 emit_insn ((*gen_func.mov) (tmp_reg, src));
16128 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16131 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16133 int i;
16134 for (i = 0; i < num_reg; i++)
16135 emit_insn (stores[i]);
16136 num_reg = 0;
16139 if (mode == BLKmode)
16141 /* Move the address into scratch registers. The movmemsi
16142 patterns require zero offset. */
16143 if (!REG_P (XEXP (src, 0)))
16145 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16146 src = replace_equiv_address (src, src_reg);
16148 set_mem_size (src, move_bytes);
16150 if (!REG_P (XEXP (dest, 0)))
16152 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16153 dest = replace_equiv_address (dest, dest_reg);
16155 set_mem_size (dest, move_bytes);
16157 emit_insn ((*gen_func.movmemsi) (dest, src,
16158 GEN_INT (move_bytes & 31),
16159 align_rtx));
16163 return 1;
16167 /* Return a string to perform a load_multiple operation.
16168 operands[0] is the vector.
16169 operands[1] is the source address.
16170 operands[2] is the first destination register. */
16172 const char *
16173 rs6000_output_load_multiple (rtx operands[3])
16175 /* We have to handle the case where the pseudo used to contain the address
16176 is assigned to one of the output registers. */
16177 int i, j;
16178 int words = XVECLEN (operands[0], 0);
16179 rtx xop[10];
16181 if (XVECLEN (operands[0], 0) == 1)
16182 return "lwz %2,0(%1)";
16184 for (i = 0; i < words; i++)
16185 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16187 if (i == words-1)
16189 xop[0] = GEN_INT (4 * (words-1));
16190 xop[1] = operands[1];
16191 xop[2] = operands[2];
16192 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16193 return "";
16195 else if (i == 0)
16197 xop[0] = GEN_INT (4 * (words-1));
16198 xop[1] = operands[1];
16199 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16200 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16201 return "";
16203 else
16205 for (j = 0; j < words; j++)
16206 if (j != i)
16208 xop[0] = GEN_INT (j * 4);
16209 xop[1] = operands[1];
16210 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16211 output_asm_insn ("lwz %2,%0(%1)", xop);
16213 xop[0] = GEN_INT (i * 4);
16214 xop[1] = operands[1];
16215 output_asm_insn ("lwz %1,%0(%1)", xop);
16216 return "";
16220 return "lswi %2,%1,%N0";
16224 /* A validation routine: say whether CODE, a condition code, and MODE
16225 match. The other alternatives either don't make sense or should
16226 never be generated. */
16228 void
16229 validate_condition_mode (enum rtx_code code, machine_mode mode)
16231 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16232 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16233 && GET_MODE_CLASS (mode) == MODE_CC);
16235 /* These don't make sense. */
16236 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16237 || mode != CCUNSmode);
16239 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16240 || mode == CCUNSmode);
16242 gcc_assert (mode == CCFPmode
16243 || (code != ORDERED && code != UNORDERED
16244 && code != UNEQ && code != LTGT
16245 && code != UNGT && code != UNLT
16246 && code != UNGE && code != UNLE));
16248 /* These should never be generated except for
16249 flag_finite_math_only. */
16250 gcc_assert (mode != CCFPmode
16251 || flag_finite_math_only
16252 || (code != LE && code != GE
16253 && code != UNEQ && code != LTGT
16254 && code != UNGT && code != UNLT));
16256 /* These are invalid; the information is not there. */
16257 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16261 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16262 mask required to convert the result of a rotate insn into a shift
16263 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16266 includes_lshift_p (rtx shiftop, rtx andop)
16268 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16270 shift_mask <<= INTVAL (shiftop);
16272 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16275 /* Similar, but for right shift. */
16278 includes_rshift_p (rtx shiftop, rtx andop)
16280 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16282 shift_mask >>= INTVAL (shiftop);
16284 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16287 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16288 to perform a left shift. It must have exactly SHIFTOP least
16289 significant 0's, then one or more 1's, then zero or more 0's. */
16292 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16294 if (GET_CODE (andop) == CONST_INT)
16296 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16298 c = INTVAL (andop);
16299 if (c == 0 || c == HOST_WIDE_INT_M1U)
16300 return 0;
16302 shift_mask = HOST_WIDE_INT_M1U;
16303 shift_mask <<= INTVAL (shiftop);
16305 /* Find the least significant one bit. */
16306 lsb = c & -c;
16308 /* It must coincide with the LSB of the shift mask. */
16309 if (-lsb != shift_mask)
16310 return 0;
16312 /* Invert to look for the next transition (if any). */
16313 c = ~c;
16315 /* Remove the low group of ones (originally low group of zeros). */
16316 c &= -lsb;
16318 /* Again find the lsb, and check we have all 1's above. */
16319 lsb = c & -c;
16320 return c == -lsb;
16322 else
16323 return 0;
16326 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16327 to perform a left shift. It must have SHIFTOP or more least
16328 significant 0's, with the remainder of the word 1's. */
16331 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16333 if (GET_CODE (andop) == CONST_INT)
16335 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16337 shift_mask = HOST_WIDE_INT_M1U;
16338 shift_mask <<= INTVAL (shiftop);
16339 c = INTVAL (andop);
16341 /* Find the least significant one bit. */
16342 lsb = c & -c;
16344 /* It must be covered by the shift mask.
16345 This test also rejects c == 0. */
16346 if ((lsb & shift_mask) == 0)
16347 return 0;
16349 /* Check we have all 1's above the transition, and reject all 1's. */
16350 return c == -lsb && lsb != 1;
16352 else
16353 return 0;
16356 /* Return 1 if operands will generate a valid arguments to rlwimi
16357 instruction for insert with right shift in 64-bit mode. The mask may
16358 not start on the first bit or stop on the last bit because wrap-around
16359 effects of instruction do not correspond to semantics of RTL insn. */
16362 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16364 if (INTVAL (startop) > 32
16365 && INTVAL (startop) < 64
16366 && INTVAL (sizeop) > 1
16367 && INTVAL (sizeop) + INTVAL (startop) < 64
16368 && INTVAL (shiftop) > 0
16369 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16370 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16371 return 1;
16373 return 0;
16376 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16377 for lfq and stfq insns iff the registers are hard registers. */
16380 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16382 /* We might have been passed a SUBREG. */
16383 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16384 return 0;
16386 /* We might have been passed non floating point registers. */
16387 if (!FP_REGNO_P (REGNO (reg1))
16388 || !FP_REGNO_P (REGNO (reg2)))
16389 return 0;
16391 return (REGNO (reg1) == REGNO (reg2) - 1);
16394 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16395 addr1 and addr2 must be in consecutive memory locations
16396 (addr2 == addr1 + 8). */
16399 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16401 rtx addr1, addr2;
16402 unsigned int reg1, reg2;
16403 int offset1, offset2;
16405 /* The mems cannot be volatile. */
16406 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16407 return 0;
16409 addr1 = XEXP (mem1, 0);
16410 addr2 = XEXP (mem2, 0);
16412 /* Extract an offset (if used) from the first addr. */
16413 if (GET_CODE (addr1) == PLUS)
16415 /* If not a REG, return zero. */
16416 if (GET_CODE (XEXP (addr1, 0)) != REG)
16417 return 0;
16418 else
16420 reg1 = REGNO (XEXP (addr1, 0));
16421 /* The offset must be constant! */
16422 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16423 return 0;
16424 offset1 = INTVAL (XEXP (addr1, 1));
16427 else if (GET_CODE (addr1) != REG)
16428 return 0;
16429 else
16431 reg1 = REGNO (addr1);
16432 /* This was a simple (mem (reg)) expression. Offset is 0. */
16433 offset1 = 0;
16436 /* And now for the second addr. */
16437 if (GET_CODE (addr2) == PLUS)
16439 /* If not a REG, return zero. */
16440 if (GET_CODE (XEXP (addr2, 0)) != REG)
16441 return 0;
16442 else
16444 reg2 = REGNO (XEXP (addr2, 0));
16445 /* The offset must be constant. */
16446 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16447 return 0;
16448 offset2 = INTVAL (XEXP (addr2, 1));
16451 else if (GET_CODE (addr2) != REG)
16452 return 0;
16453 else
16455 reg2 = REGNO (addr2);
16456 /* This was a simple (mem (reg)) expression. Offset is 0. */
16457 offset2 = 0;
16460 /* Both of these must have the same base register. */
16461 if (reg1 != reg2)
16462 return 0;
16464 /* The offset for the second addr must be 8 more than the first addr. */
16465 if (offset2 != offset1 + 8)
16466 return 0;
16468 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16469 instructions. */
16470 return 1;
16475 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16477 static bool eliminated = false;
16478 rtx ret;
16480 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16481 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16482 else
16484 rtx mem = cfun->machine->sdmode_stack_slot;
16485 gcc_assert (mem != NULL_RTX);
16487 if (!eliminated)
16489 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16490 cfun->machine->sdmode_stack_slot = mem;
16491 eliminated = true;
16493 ret = mem;
16496 if (TARGET_DEBUG_ADDR)
16498 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16499 GET_MODE_NAME (mode));
16500 if (!ret)
16501 fprintf (stderr, "\tNULL_RTX\n");
16502 else
16503 debug_rtx (ret);
16506 return ret;
16509 /* Return the mode to be used for memory when a secondary memory
16510 location is needed. For SDmode values we need to use DDmode, in
16511 all other cases we can use the same mode. */
16512 machine_mode
16513 rs6000_secondary_memory_needed_mode (machine_mode mode)
16515 if (lra_in_progress && mode == SDmode)
16516 return DDmode;
16517 return mode;
16520 static tree
16521 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16523 /* Don't walk into types. */
16524 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16526 *walk_subtrees = 0;
16527 return NULL_TREE;
16530 switch (TREE_CODE (*tp))
16532 case VAR_DECL:
16533 case PARM_DECL:
16534 case FIELD_DECL:
16535 case RESULT_DECL:
16536 case SSA_NAME:
16537 case REAL_CST:
16538 case MEM_REF:
16539 case VIEW_CONVERT_EXPR:
16540 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16541 return *tp;
16542 break;
16543 default:
16544 break;
16547 return NULL_TREE;
16550 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16551 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16552 only work on the traditional altivec registers, note if an altivec register
16553 was chosen. */
16555 static enum rs6000_reg_type
16556 register_to_reg_type (rtx reg, bool *is_altivec)
16558 HOST_WIDE_INT regno;
16559 enum reg_class rclass;
16561 if (GET_CODE (reg) == SUBREG)
16562 reg = SUBREG_REG (reg);
16564 if (!REG_P (reg))
16565 return NO_REG_TYPE;
16567 regno = REGNO (reg);
16568 if (regno >= FIRST_PSEUDO_REGISTER)
16570 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16571 return PSEUDO_REG_TYPE;
16573 regno = true_regnum (reg);
16574 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16575 return PSEUDO_REG_TYPE;
16578 gcc_assert (regno >= 0);
16580 if (is_altivec && ALTIVEC_REGNO_P (regno))
16581 *is_altivec = true;
16583 rclass = rs6000_regno_regclass[regno];
16584 return reg_class_to_reg_type[(int)rclass];
16587 /* Helper function to return the cost of adding a TOC entry address. */
16589 static inline int
16590 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16592 int ret;
16594 if (TARGET_CMODEL != CMODEL_SMALL)
16595 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16597 else
16598 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16600 return ret;
16603 /* Helper function for rs6000_secondary_reload to determine whether the memory
16604 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16605 needs reloading. Return negative if the memory is not handled by the memory
16606 helper functions and to try a different reload method, 0 if no additional
16607 instructions are need, and positive to give the extra cost for the
16608 memory. */
16610 static int
16611 rs6000_secondary_reload_memory (rtx addr,
16612 enum reg_class rclass,
16613 enum machine_mode mode)
16615 int extra_cost = 0;
16616 rtx reg, and_arg, plus_arg0, plus_arg1;
16617 addr_mask_type addr_mask;
16618 const char *type = NULL;
16619 const char *fail_msg = NULL;
16621 if (GPR_REG_CLASS_P (rclass))
16622 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16624 else if (rclass == FLOAT_REGS)
16625 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16627 else if (rclass == ALTIVEC_REGS)
16628 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16630 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16631 else if (rclass == VSX_REGS)
16632 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16633 & ~RELOAD_REG_AND_M16);
16635 else
16637 if (TARGET_DEBUG_ADDR)
16638 fprintf (stderr,
16639 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16640 "class is not GPR, FPR, VMX\n",
16641 GET_MODE_NAME (mode), reg_class_names[rclass]);
16643 return -1;
16646 /* If the register isn't valid in this register class, just return now. */
16647 if ((addr_mask & RELOAD_REG_VALID) == 0)
16649 if (TARGET_DEBUG_ADDR)
16650 fprintf (stderr,
16651 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16652 "not valid in class\n",
16653 GET_MODE_NAME (mode), reg_class_names[rclass]);
16655 return -1;
16658 switch (GET_CODE (addr))
16660 /* Does the register class supports auto update forms for this mode? We
16661 don't need a scratch register, since the powerpc only supports
16662 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16663 case PRE_INC:
16664 case PRE_DEC:
16665 reg = XEXP (addr, 0);
16666 if (!base_reg_operand (addr, GET_MODE (reg)))
16668 fail_msg = "no base register #1";
16669 extra_cost = -1;
16672 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16674 extra_cost = 1;
16675 type = "update";
16677 break;
16679 case PRE_MODIFY:
16680 reg = XEXP (addr, 0);
16681 plus_arg1 = XEXP (addr, 1);
16682 if (!base_reg_operand (reg, GET_MODE (reg))
16683 || GET_CODE (plus_arg1) != PLUS
16684 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16686 fail_msg = "bad PRE_MODIFY";
16687 extra_cost = -1;
16690 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16692 extra_cost = 1;
16693 type = "update";
16695 break;
16697 /* Do we need to simulate AND -16 to clear the bottom address bits used
16698 in VMX load/stores? Only allow the AND for vector sizes. */
16699 case AND:
16700 and_arg = XEXP (addr, 0);
16701 if (GET_MODE_SIZE (mode) != 16
16702 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16703 || INTVAL (XEXP (addr, 1)) != -16)
16705 fail_msg = "bad Altivec AND #1";
16706 extra_cost = -1;
16709 if (rclass != ALTIVEC_REGS)
16711 if (legitimate_indirect_address_p (and_arg, false))
16712 extra_cost = 1;
16714 else if (legitimate_indexed_address_p (and_arg, false))
16715 extra_cost = 2;
16717 else
16719 fail_msg = "bad Altivec AND #2";
16720 extra_cost = -1;
16723 type = "and";
16725 break;
16727 /* If this is an indirect address, make sure it is a base register. */
16728 case REG:
16729 case SUBREG:
16730 if (!legitimate_indirect_address_p (addr, false))
16732 extra_cost = 1;
16733 type = "move";
16735 break;
16737 /* If this is an indexed address, make sure the register class can handle
16738 indexed addresses for this mode. */
16739 case PLUS:
16740 plus_arg0 = XEXP (addr, 0);
16741 plus_arg1 = XEXP (addr, 1);
16743 /* (plus (plus (reg) (constant)) (constant)) is generated during
16744 push_reload processing, so handle it now. */
16745 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16747 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16749 extra_cost = 1;
16750 type = "offset";
16754 /* (plus (plus (reg) (constant)) (reg)) is also generated during
16755 push_reload processing, so handle it now. */
16756 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
16758 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
16760 extra_cost = 1;
16761 type = "indexed #2";
16765 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16767 fail_msg = "no base register #2";
16768 extra_cost = -1;
16771 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16773 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16774 || !legitimate_indexed_address_p (addr, false))
16776 extra_cost = 1;
16777 type = "indexed";
16781 /* Make sure the register class can handle offset addresses. */
16782 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16784 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16786 extra_cost = 1;
16787 type = "offset";
16791 else
16793 fail_msg = "bad PLUS";
16794 extra_cost = -1;
16797 break;
16799 case LO_SUM:
16800 if (!legitimate_lo_sum_address_p (mode, addr, false))
16802 fail_msg = "bad LO_SUM";
16803 extra_cost = -1;
16806 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16808 extra_cost = 1;
16809 type = "lo_sum";
16811 break;
16813 /* Static addresses need to create a TOC entry. */
16814 case CONST:
16815 case SYMBOL_REF:
16816 case LABEL_REF:
16817 type = "address";
16818 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16819 break;
16821 /* TOC references look like offsetable memory. */
16822 case UNSPEC:
16823 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16825 fail_msg = "bad UNSPEC";
16826 extra_cost = -1;
16829 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16831 extra_cost = 1;
16832 type = "toc reference";
16834 break;
16836 default:
16838 fail_msg = "bad address";
16839 extra_cost = -1;
16843 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16845 if (extra_cost < 0)
16846 fprintf (stderr,
16847 "rs6000_secondary_reload_memory error: mode = %s, "
16848 "class = %s, addr_mask = '%s', %s\n",
16849 GET_MODE_NAME (mode),
16850 reg_class_names[rclass],
16851 rs6000_debug_addr_mask (addr_mask, false),
16852 (fail_msg != NULL) ? fail_msg : "<bad address>");
16854 else
16855 fprintf (stderr,
16856 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16857 "addr_mask = '%s', extra cost = %d, %s\n",
16858 GET_MODE_NAME (mode),
16859 reg_class_names[rclass],
16860 rs6000_debug_addr_mask (addr_mask, false),
16861 extra_cost,
16862 (type) ? type : "<none>");
16864 debug_rtx (addr);
16867 return extra_cost;
16870 /* Helper function for rs6000_secondary_reload to return true if a move to a
16871 different register classe is really a simple move. */
16873 static bool
16874 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16875 enum rs6000_reg_type from_type,
16876 machine_mode mode)
16878 int size;
16880 /* Add support for various direct moves available. In this function, we only
16881 look at cases where we don't need any extra registers, and one or more
16882 simple move insns are issued. At present, 32-bit integers are not allowed
16883 in FPR/VSX registers. Single precision binary floating is not a simple
16884 move because we need to convert to the single precision memory layout.
16885 The 4-byte SDmode can be moved. */
16886 size = GET_MODE_SIZE (mode);
16887 if (TARGET_DIRECT_MOVE
16888 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16889 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16890 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16891 return true;
16893 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16894 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16895 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16896 return true;
16898 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16899 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16900 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16901 return true;
16903 return false;
16906 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16907 special direct moves that involve allocating an extra register, return the
16908 insn code of the helper function if there is such a function or
16909 CODE_FOR_nothing if not. */
16911 static bool
16912 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16913 enum rs6000_reg_type from_type,
16914 machine_mode mode,
16915 secondary_reload_info *sri,
16916 bool altivec_p)
16918 bool ret = false;
16919 enum insn_code icode = CODE_FOR_nothing;
16920 int cost = 0;
16921 int size = GET_MODE_SIZE (mode);
16923 if (TARGET_POWERPC64)
16925 if (size == 16)
16927 /* Handle moving 128-bit values from GPRs to VSX point registers on
16928 power8 when running in 64-bit mode using XXPERMDI to glue the two
16929 64-bit values back together. */
16930 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16932 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16933 icode = reg_addr[mode].reload_vsx_gpr;
16936 /* Handle moving 128-bit values from VSX point registers to GPRs on
16937 power8 when running in 64-bit mode using XXPERMDI to get access to the
16938 bottom 64-bit value. */
16939 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16941 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16942 icode = reg_addr[mode].reload_gpr_vsx;
16946 else if (mode == SFmode)
16948 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16950 cost = 3; /* xscvdpspn, mfvsrd, and. */
16951 icode = reg_addr[mode].reload_gpr_vsx;
16954 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16956 cost = 2; /* mtvsrz, xscvspdpn. */
16957 icode = reg_addr[mode].reload_vsx_gpr;
16962 if (TARGET_POWERPC64 && size == 16)
16964 /* Handle moving 128-bit values from GPRs to VSX point registers on
16965 power8 when running in 64-bit mode using XXPERMDI to glue the two
16966 64-bit values back together. */
16967 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16969 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16970 icode = reg_addr[mode].reload_vsx_gpr;
16973 /* Handle moving 128-bit values from VSX point registers to GPRs on
16974 power8 when running in 64-bit mode using XXPERMDI to get access to the
16975 bottom 64-bit value. */
16976 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16978 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16979 icode = reg_addr[mode].reload_gpr_vsx;
16983 else if (!TARGET_POWERPC64 && size == 8)
16985 /* Handle moving 64-bit values from GPRs to floating point registers on
16986 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16987 values back together. Altivec register classes must be handled
16988 specially since a different instruction is used, and the secondary
16989 reload support requires a single instruction class in the scratch
16990 register constraint. However, right now TFmode is not allowed in
16991 Altivec registers, so the pattern will never match. */
16992 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16994 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16995 icode = reg_addr[mode].reload_fpr_gpr;
16999 if (icode != CODE_FOR_nothing)
17001 ret = true;
17002 if (sri)
17004 sri->icode = icode;
17005 sri->extra_cost = cost;
17009 return ret;
17012 /* Return whether a move between two register classes can be done either
17013 directly (simple move) or via a pattern that uses a single extra temporary
17014 (using power8's direct move in this case. */
17016 static bool
17017 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
17018 enum rs6000_reg_type from_type,
17019 machine_mode mode,
17020 secondary_reload_info *sri,
17021 bool altivec_p)
17023 /* Fall back to load/store reloads if either type is not a register. */
17024 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
17025 return false;
17027 /* If we haven't allocated registers yet, assume the move can be done for the
17028 standard register types. */
17029 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
17030 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
17031 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
17032 return true;
17034 /* Moves to the same set of registers is a simple move for non-specialized
17035 registers. */
17036 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
17037 return true;
17039 /* Check whether a simple move can be done directly. */
17040 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
17042 if (sri)
17044 sri->icode = CODE_FOR_nothing;
17045 sri->extra_cost = 0;
17047 return true;
17050 /* Now check if we can do it in a few steps. */
17051 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
17052 altivec_p);
17055 /* Inform reload about cases where moving X with a mode MODE to a register in
17056 RCLASS requires an extra scratch or immediate register. Return the class
17057 needed for the immediate register.
17059 For VSX and Altivec, we may need a register to convert sp+offset into
17060 reg+sp.
17062 For misaligned 64-bit gpr loads and stores we need a register to
17063 convert an offset address to indirect. */
17065 static reg_class_t
17066 rs6000_secondary_reload (bool in_p,
17067 rtx x,
17068 reg_class_t rclass_i,
17069 machine_mode mode,
17070 secondary_reload_info *sri)
17072 enum reg_class rclass = (enum reg_class) rclass_i;
17073 reg_class_t ret = ALL_REGS;
17074 enum insn_code icode;
17075 bool default_p = false;
17076 bool done_p = false;
17078 /* Allow subreg of memory before/during reload. */
17079 bool memory_p = (MEM_P (x)
17080 || (!reload_completed && GET_CODE (x) == SUBREG
17081 && MEM_P (SUBREG_REG (x))));
17083 sri->icode = CODE_FOR_nothing;
17084 sri->extra_cost = 0;
17085 icode = ((in_p)
17086 ? reg_addr[mode].reload_load
17087 : reg_addr[mode].reload_store);
17089 if (REG_P (x) || register_operand (x, mode))
17091 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
17092 bool altivec_p = (rclass == ALTIVEC_REGS);
17093 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
17095 if (!in_p)
17097 enum rs6000_reg_type exchange = to_type;
17098 to_type = from_type;
17099 from_type = exchange;
17102 /* Can we do a direct move of some sort? */
17103 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
17104 altivec_p))
17106 icode = (enum insn_code)sri->icode;
17107 default_p = false;
17108 done_p = true;
17109 ret = NO_REGS;
17113 /* Make sure 0.0 is not reloaded or forced into memory. */
17114 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17116 ret = NO_REGS;
17117 default_p = false;
17118 done_p = true;
17121 /* If this is a scalar floating point value and we want to load it into the
17122 traditional Altivec registers, do it via a move via a traditional floating
17123 point register. Also make sure that non-zero constants use a FPR. */
17124 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17125 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17126 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17128 ret = FLOAT_REGS;
17129 default_p = false;
17130 done_p = true;
17133 /* Handle reload of load/stores if we have reload helper functions. */
17134 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17136 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17137 mode);
17139 if (extra_cost >= 0)
17141 done_p = true;
17142 ret = NO_REGS;
17143 if (extra_cost > 0)
17145 sri->extra_cost = extra_cost;
17146 sri->icode = icode;
17151 /* Handle unaligned loads and stores of integer registers. */
17152 if (!done_p && TARGET_POWERPC64
17153 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17154 && memory_p
17155 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17157 rtx addr = XEXP (x, 0);
17158 rtx off = address_offset (addr);
17160 if (off != NULL_RTX)
17162 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17163 unsigned HOST_WIDE_INT offset = INTVAL (off);
17165 /* We need a secondary reload when our legitimate_address_p
17166 says the address is good (as otherwise the entire address
17167 will be reloaded), and the offset is not a multiple of
17168 four or we have an address wrap. Address wrap will only
17169 occur for LO_SUMs since legitimate_offset_address_p
17170 rejects addresses for 16-byte mems that will wrap. */
17171 if (GET_CODE (addr) == LO_SUM
17172 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17173 && ((offset & 3) != 0
17174 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17175 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17176 && (offset & 3) != 0))
17178 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
17179 if (in_p)
17180 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
17181 : CODE_FOR_reload_di_load);
17182 else
17183 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
17184 : CODE_FOR_reload_di_store);
17185 sri->extra_cost = 2;
17186 ret = NO_REGS;
17187 done_p = true;
17189 else
17190 default_p = true;
17192 else
17193 default_p = true;
17196 if (!done_p && !TARGET_POWERPC64
17197 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17198 && memory_p
17199 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17201 rtx addr = XEXP (x, 0);
17202 rtx off = address_offset (addr);
17204 if (off != NULL_RTX)
17206 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17207 unsigned HOST_WIDE_INT offset = INTVAL (off);
17209 /* We need a secondary reload when our legitimate_address_p
17210 says the address is good (as otherwise the entire address
17211 will be reloaded), and we have a wrap.
17213 legitimate_lo_sum_address_p allows LO_SUM addresses to
17214 have any offset so test for wrap in the low 16 bits.
17216 legitimate_offset_address_p checks for the range
17217 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17218 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17219 [0x7ff4,0x7fff] respectively, so test for the
17220 intersection of these ranges, [0x7ffc,0x7fff] and
17221 [0x7ff4,0x7ff7] respectively.
17223 Note that the address we see here may have been
17224 manipulated by legitimize_reload_address. */
17225 if (GET_CODE (addr) == LO_SUM
17226 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17227 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17229 if (in_p)
17230 sri->icode = CODE_FOR_reload_si_load;
17231 else
17232 sri->icode = CODE_FOR_reload_si_store;
17233 sri->extra_cost = 2;
17234 ret = NO_REGS;
17235 done_p = true;
17237 else
17238 default_p = true;
17240 else
17241 default_p = true;
17244 if (!done_p)
17245 default_p = true;
17247 if (default_p)
17248 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17250 gcc_assert (ret != ALL_REGS);
17252 if (TARGET_DEBUG_ADDR)
17254 fprintf (stderr,
17255 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17256 "mode = %s",
17257 reg_class_names[ret],
17258 in_p ? "true" : "false",
17259 reg_class_names[rclass],
17260 GET_MODE_NAME (mode));
17262 if (reload_completed)
17263 fputs (", after reload", stderr);
17265 if (!done_p)
17266 fputs (", done_p not set", stderr);
17268 if (default_p)
17269 fputs (", default secondary reload", stderr);
17271 if (sri->icode != CODE_FOR_nothing)
17272 fprintf (stderr, ", reload func = %s, extra cost = %d",
17273 insn_data[sri->icode].name, sri->extra_cost);
17275 fputs ("\n", stderr);
17276 debug_rtx (x);
17279 return ret;
17282 /* Better tracing for rs6000_secondary_reload_inner. */
17284 static void
17285 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17286 bool store_p)
17288 rtx set, clobber;
17290 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17292 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17293 store_p ? "store" : "load");
17295 if (store_p)
17296 set = gen_rtx_SET (VOIDmode, mem, reg);
17297 else
17298 set = gen_rtx_SET (VOIDmode, reg, mem);
17300 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17301 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17304 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17305 ATTRIBUTE_NORETURN;
17307 static void
17308 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17309 bool store_p)
17311 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17312 gcc_unreachable ();
17315 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17316 reload helper functions. These were identified in
17317 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17318 reload, it calls the insns:
17319 reload_<RELOAD:mode>_<P:mptrsize>_store
17320 reload_<RELOAD:mode>_<P:mptrsize>_load
17322 which in turn calls this function, to do whatever is necessary to create
17323 valid addresses. */
17325 void
17326 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17328 int regno = true_regnum (reg);
17329 machine_mode mode = GET_MODE (reg);
17330 addr_mask_type addr_mask;
17331 rtx addr;
17332 rtx new_addr;
17333 rtx op_reg, op0, op1;
17334 rtx and_op;
17335 rtx cc_clobber;
17336 rtvec rv;
17338 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17339 || !base_reg_operand (scratch, GET_MODE (scratch)))
17340 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17342 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17343 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17345 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17346 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17348 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17349 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17351 else
17352 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17354 /* Make sure the mode is valid in this register class. */
17355 if ((addr_mask & RELOAD_REG_VALID) == 0)
17356 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17358 if (TARGET_DEBUG_ADDR)
17359 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17361 new_addr = addr = XEXP (mem, 0);
17362 switch (GET_CODE (addr))
17364 /* Does the register class support auto update forms for this mode? If
17365 not, do the update now. We don't need a scratch register, since the
17366 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17367 case PRE_INC:
17368 case PRE_DEC:
17369 op_reg = XEXP (addr, 0);
17370 if (!base_reg_operand (op_reg, Pmode))
17371 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17373 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17375 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17376 new_addr = op_reg;
17378 break;
17380 case PRE_MODIFY:
17381 op0 = XEXP (addr, 0);
17382 op1 = XEXP (addr, 1);
17383 if (!base_reg_operand (op0, Pmode)
17384 || GET_CODE (op1) != PLUS
17385 || !rtx_equal_p (op0, XEXP (op1, 0)))
17386 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17388 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17390 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17391 new_addr = reg;
17393 break;
17395 /* Do we need to simulate AND -16 to clear the bottom address bits used
17396 in VMX load/stores? */
17397 case AND:
17398 op0 = XEXP (addr, 0);
17399 op1 = XEXP (addr, 1);
17400 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17402 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17403 op_reg = op0;
17405 else if (GET_CODE (op1) == PLUS)
17407 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17408 op_reg = scratch;
17411 else
17412 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17414 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17415 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17416 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17417 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17418 new_addr = scratch;
17420 break;
17422 /* If this is an indirect address, make sure it is a base register. */
17423 case REG:
17424 case SUBREG:
17425 if (!base_reg_operand (addr, GET_MODE (addr)))
17427 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17428 new_addr = scratch;
17430 break;
17432 /* If this is an indexed address, make sure the register class can handle
17433 indexed addresses for this mode. */
17434 case PLUS:
17435 op0 = XEXP (addr, 0);
17436 op1 = XEXP (addr, 1);
17437 if (!base_reg_operand (op0, Pmode))
17438 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17440 else if (int_reg_operand (op1, Pmode))
17442 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17444 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17445 new_addr = scratch;
17449 /* Make sure the register class can handle offset addresses. */
17450 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17452 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17454 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17455 new_addr = scratch;
17459 else
17460 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17462 break;
17464 case LO_SUM:
17465 op0 = XEXP (addr, 0);
17466 op1 = XEXP (addr, 1);
17467 if (!base_reg_operand (op0, Pmode))
17468 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17470 else if (int_reg_operand (op1, Pmode))
17472 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17474 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17475 new_addr = scratch;
17479 /* Make sure the register class can handle offset addresses. */
17480 else if (legitimate_lo_sum_address_p (mode, addr, false))
17482 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17484 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17485 new_addr = scratch;
17489 else
17490 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17492 break;
17494 case SYMBOL_REF:
17495 case CONST:
17496 case LABEL_REF:
17497 rs6000_emit_move (scratch, addr, Pmode);
17498 new_addr = scratch;
17499 break;
17501 default:
17502 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17505 /* Adjust the address if it changed. */
17506 if (addr != new_addr)
17508 mem = replace_equiv_address_nv (mem, new_addr);
17509 if (TARGET_DEBUG_ADDR)
17510 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17513 /* Now create the move. */
17514 if (store_p)
17515 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17516 else
17517 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17519 return;
17522 /* Convert reloads involving 64-bit gprs and misaligned offset
17523 addressing, or multiple 32-bit gprs and offsets that are too large,
17524 to use indirect addressing. */
17526 void
17527 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17529 int regno = true_regnum (reg);
17530 enum reg_class rclass;
17531 rtx addr;
17532 rtx scratch_or_premodify = scratch;
17534 if (TARGET_DEBUG_ADDR)
17536 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17537 store_p ? "store" : "load");
17538 fprintf (stderr, "reg:\n");
17539 debug_rtx (reg);
17540 fprintf (stderr, "mem:\n");
17541 debug_rtx (mem);
17542 fprintf (stderr, "scratch:\n");
17543 debug_rtx (scratch);
17546 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17547 gcc_assert (GET_CODE (mem) == MEM);
17548 rclass = REGNO_REG_CLASS (regno);
17549 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17550 addr = XEXP (mem, 0);
17552 if (GET_CODE (addr) == PRE_MODIFY)
17554 scratch_or_premodify = XEXP (addr, 0);
17555 gcc_assert (REG_P (scratch_or_premodify));
17556 addr = XEXP (addr, 1);
17558 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17560 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17562 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17564 /* Now create the move. */
17565 if (store_p)
17566 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17567 else
17568 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17570 return;
17573 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17574 this function has any SDmode references. If we are on a power7 or later, we
17575 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17576 can load/store the value. */
17578 static void
17579 rs6000_alloc_sdmode_stack_slot (void)
17581 tree t;
17582 basic_block bb;
17583 gimple_stmt_iterator gsi;
17585 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17586 /* We use a different approach for dealing with the secondary
17587 memory in LRA. */
17588 if (ira_use_lra_p)
17589 return;
17591 if (TARGET_NO_SDMODE_STACK)
17592 return;
17594 FOR_EACH_BB_FN (bb, cfun)
17595 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17597 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17598 if (ret)
17600 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17601 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17602 SDmode, 0);
17603 return;
17607 /* Check for any SDmode parameters of the function. */
17608 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17610 if (TREE_TYPE (t) == error_mark_node)
17611 continue;
17613 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17614 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17616 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17617 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17618 SDmode, 0);
17619 return;
17624 static void
17625 rs6000_instantiate_decls (void)
17627 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17628 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17631 /* Given an rtx X being reloaded into a reg required to be
17632 in class CLASS, return the class of reg to actually use.
17633 In general this is just CLASS; but on some machines
17634 in some cases it is preferable to use a more restrictive class.
17636 On the RS/6000, we have to return NO_REGS when we want to reload a
17637 floating-point CONST_DOUBLE to force it to be copied to memory.
17639 We also don't want to reload integer values into floating-point
17640 registers if we can at all help it. In fact, this can
17641 cause reload to die, if it tries to generate a reload of CTR
17642 into a FP register and discovers it doesn't have the memory location
17643 required.
17645 ??? Would it be a good idea to have reload do the converse, that is
17646 try to reload floating modes into FP registers if possible?
17649 static enum reg_class
17650 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17652 machine_mode mode = GET_MODE (x);
17653 bool is_constant = CONSTANT_P (x);
17655 /* Do VSX tests before handling traditional floaitng point registers. */
17656 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17658 if (is_constant)
17660 /* Zero is always allowed in all VSX registers. */
17661 if (x == CONST0_RTX (mode))
17662 return rclass;
17664 /* If this is a vector constant that can be formed with a few Altivec
17665 instructions, we want altivec registers. */
17666 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17667 return ALTIVEC_REGS;
17669 /* Force constant to memory. */
17670 return NO_REGS;
17673 /* If this is a scalar floating point value, prefer the traditional
17674 floating point registers so that we can use D-form (register+offset)
17675 addressing. */
17676 if (GET_MODE_SIZE (mode) < 16)
17677 return FLOAT_REGS;
17679 /* Prefer the Altivec registers if Altivec is handling the vector
17680 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17681 loads. */
17682 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17683 || mode == V1TImode)
17684 return ALTIVEC_REGS;
17686 return rclass;
17689 if (is_constant || GET_CODE (x) == PLUS)
17691 if (reg_class_subset_p (GENERAL_REGS, rclass))
17692 return GENERAL_REGS;
17693 if (reg_class_subset_p (BASE_REGS, rclass))
17694 return BASE_REGS;
17695 return NO_REGS;
17698 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17699 return GENERAL_REGS;
17701 return rclass;
17704 /* Debug version of rs6000_preferred_reload_class. */
17705 static enum reg_class
17706 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17708 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17710 fprintf (stderr,
17711 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17712 "mode = %s, x:\n",
17713 reg_class_names[ret], reg_class_names[rclass],
17714 GET_MODE_NAME (GET_MODE (x)));
17715 debug_rtx (x);
17717 return ret;
17720 /* If we are copying between FP or AltiVec registers and anything else, we need
17721 a memory location. The exception is when we are targeting ppc64 and the
17722 move to/from fpr to gpr instructions are available. Also, under VSX, you
17723 can copy vector registers from the FP register set to the Altivec register
17724 set and vice versa. */
17726 static bool
17727 rs6000_secondary_memory_needed (enum reg_class from_class,
17728 enum reg_class to_class,
17729 machine_mode mode)
17731 enum rs6000_reg_type from_type, to_type;
17732 bool altivec_p = ((from_class == ALTIVEC_REGS)
17733 || (to_class == ALTIVEC_REGS));
17735 /* If a simple/direct move is available, we don't need secondary memory */
17736 from_type = reg_class_to_reg_type[(int)from_class];
17737 to_type = reg_class_to_reg_type[(int)to_class];
17739 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17740 (secondary_reload_info *)0, altivec_p))
17741 return false;
17743 /* If we have a floating point or vector register class, we need to use
17744 memory to transfer the data. */
17745 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17746 return true;
17748 return false;
17751 /* Debug version of rs6000_secondary_memory_needed. */
17752 static bool
17753 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17754 enum reg_class to_class,
17755 machine_mode mode)
17757 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17759 fprintf (stderr,
17760 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17761 "to_class = %s, mode = %s\n",
17762 ret ? "true" : "false",
17763 reg_class_names[from_class],
17764 reg_class_names[to_class],
17765 GET_MODE_NAME (mode));
17767 return ret;
17770 /* Return the register class of a scratch register needed to copy IN into
17771 or out of a register in RCLASS in MODE. If it can be done directly,
17772 NO_REGS is returned. */
17774 static enum reg_class
17775 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17776 rtx in)
17778 int regno;
17780 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17781 #if TARGET_MACHO
17782 && MACHOPIC_INDIRECT
17783 #endif
17786 /* We cannot copy a symbolic operand directly into anything
17787 other than BASE_REGS for TARGET_ELF. So indicate that a
17788 register from BASE_REGS is needed as an intermediate
17789 register.
17791 On Darwin, pic addresses require a load from memory, which
17792 needs a base register. */
17793 if (rclass != BASE_REGS
17794 && (GET_CODE (in) == SYMBOL_REF
17795 || GET_CODE (in) == HIGH
17796 || GET_CODE (in) == LABEL_REF
17797 || GET_CODE (in) == CONST))
17798 return BASE_REGS;
17801 if (GET_CODE (in) == REG)
17803 regno = REGNO (in);
17804 if (regno >= FIRST_PSEUDO_REGISTER)
17806 regno = true_regnum (in);
17807 if (regno >= FIRST_PSEUDO_REGISTER)
17808 regno = -1;
17811 else if (GET_CODE (in) == SUBREG)
17813 regno = true_regnum (in);
17814 if (regno >= FIRST_PSEUDO_REGISTER)
17815 regno = -1;
17817 else
17818 regno = -1;
17820 /* If we have VSX register moves, prefer moving scalar values between
17821 Altivec registers and GPR by going via an FPR (and then via memory)
17822 instead of reloading the secondary memory address for Altivec moves. */
17823 if (TARGET_VSX
17824 && GET_MODE_SIZE (mode) < 16
17825 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17826 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17827 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17828 && (regno >= 0 && INT_REGNO_P (regno)))))
17829 return FLOAT_REGS;
17831 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17832 into anything. */
17833 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17834 || (regno >= 0 && INT_REGNO_P (regno)))
17835 return NO_REGS;
17837 /* Constants, memory, and VSX registers can go into VSX registers (both the
17838 traditional floating point and the altivec registers). */
17839 if (rclass == VSX_REGS
17840 && (regno == -1 || VSX_REGNO_P (regno)))
17841 return NO_REGS;
17843 /* Constants, memory, and FP registers can go into FP registers. */
17844 if ((regno == -1 || FP_REGNO_P (regno))
17845 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17846 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17848 /* Memory, and AltiVec registers can go into AltiVec registers. */
17849 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17850 && rclass == ALTIVEC_REGS)
17851 return NO_REGS;
17853 /* We can copy among the CR registers. */
17854 if ((rclass == CR_REGS || rclass == CR0_REGS)
17855 && regno >= 0 && CR_REGNO_P (regno))
17856 return NO_REGS;
17858 /* Otherwise, we need GENERAL_REGS. */
17859 return GENERAL_REGS;
17862 /* Debug version of rs6000_secondary_reload_class. */
17863 static enum reg_class
17864 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17865 machine_mode mode, rtx in)
17867 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17868 fprintf (stderr,
17869 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17870 "mode = %s, input rtx:\n",
17871 reg_class_names[ret], reg_class_names[rclass],
17872 GET_MODE_NAME (mode));
17873 debug_rtx (in);
17875 return ret;
17878 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17880 static bool
17881 rs6000_cannot_change_mode_class (machine_mode from,
17882 machine_mode to,
17883 enum reg_class rclass)
17885 unsigned from_size = GET_MODE_SIZE (from);
17886 unsigned to_size = GET_MODE_SIZE (to);
17888 if (from_size != to_size)
17890 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17892 if (reg_classes_intersect_p (xclass, rclass))
17894 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17895 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17897 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17898 single register under VSX because the scalar part of the register
17899 is in the upper 64-bits, and not the lower 64-bits. Types like
17900 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17901 IEEE floating point can't overlap, and neither can small
17902 values. */
17904 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17905 return true;
17907 /* TDmode in floating-mode registers must always go into a register
17908 pair with the most significant word in the even-numbered register
17909 to match ISA requirements. In little-endian mode, this does not
17910 match subreg numbering, so we cannot allow subregs. */
17911 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17912 return true;
17914 if (from_size < 8 || to_size < 8)
17915 return true;
17917 if (from_size == 8 && (8 * to_nregs) != to_size)
17918 return true;
17920 if (to_size == 8 && (8 * from_nregs) != from_size)
17921 return true;
17923 return false;
17925 else
17926 return false;
17929 if (TARGET_E500_DOUBLE
17930 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17931 || (((to) == TFmode) + ((from) == TFmode)) == 1
17932 || (((to) == DDmode) + ((from) == DDmode)) == 1
17933 || (((to) == TDmode) + ((from) == TDmode)) == 1
17934 || (((to) == DImode) + ((from) == DImode)) == 1))
17935 return true;
17937 /* Since the VSX register set includes traditional floating point registers
17938 and altivec registers, just check for the size being different instead of
17939 trying to check whether the modes are vector modes. Otherwise it won't
17940 allow say DF and DI to change classes. For types like TFmode and TDmode
17941 that take 2 64-bit registers, rather than a single 128-bit register, don't
17942 allow subregs of those types to other 128 bit types. */
17943 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17945 unsigned num_regs = (from_size + 15) / 16;
17946 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17947 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17948 return true;
17950 return (from_size != 8 && from_size != 16);
17953 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17954 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17955 return true;
17957 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17958 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17959 return true;
17961 return false;
17964 /* Debug version of rs6000_cannot_change_mode_class. */
17965 static bool
17966 rs6000_debug_cannot_change_mode_class (machine_mode from,
17967 machine_mode to,
17968 enum reg_class rclass)
17970 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17972 fprintf (stderr,
17973 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17974 "to = %s, rclass = %s\n",
17975 ret ? "true" : "false",
17976 GET_MODE_NAME (from), GET_MODE_NAME (to),
17977 reg_class_names[rclass]);
17979 return ret;
17982 /* Return a string to do a move operation of 128 bits of data. */
17984 const char *
17985 rs6000_output_move_128bit (rtx operands[])
17987 rtx dest = operands[0];
17988 rtx src = operands[1];
17989 machine_mode mode = GET_MODE (dest);
17990 int dest_regno;
17991 int src_regno;
17992 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17993 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17995 if (REG_P (dest))
17997 dest_regno = REGNO (dest);
17998 dest_gpr_p = INT_REGNO_P (dest_regno);
17999 dest_fp_p = FP_REGNO_P (dest_regno);
18000 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
18001 dest_vsx_p = dest_fp_p | dest_vmx_p;
18003 else
18005 dest_regno = -1;
18006 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
18009 if (REG_P (src))
18011 src_regno = REGNO (src);
18012 src_gpr_p = INT_REGNO_P (src_regno);
18013 src_fp_p = FP_REGNO_P (src_regno);
18014 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
18015 src_vsx_p = src_fp_p | src_vmx_p;
18017 else
18019 src_regno = -1;
18020 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
18023 /* Register moves. */
18024 if (dest_regno >= 0 && src_regno >= 0)
18026 if (dest_gpr_p)
18028 if (src_gpr_p)
18029 return "#";
18031 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
18032 return "#";
18035 else if (TARGET_VSX && dest_vsx_p)
18037 if (src_vsx_p)
18038 return "xxlor %x0,%x1,%x1";
18040 else if (TARGET_DIRECT_MOVE && src_gpr_p)
18041 return "#";
18044 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
18045 return "vor %0,%1,%1";
18047 else if (dest_fp_p && src_fp_p)
18048 return "#";
18051 /* Loads. */
18052 else if (dest_regno >= 0 && MEM_P (src))
18054 if (dest_gpr_p)
18056 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18057 return "lq %0,%1";
18058 else
18059 return "#";
18062 else if (TARGET_ALTIVEC && dest_vmx_p
18063 && altivec_indexed_or_indirect_operand (src, mode))
18064 return "lvx %0,%y1";
18066 else if (TARGET_VSX && dest_vsx_p)
18068 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18069 return "lxvw4x %x0,%y1";
18070 else
18071 return "lxvd2x %x0,%y1";
18074 else if (TARGET_ALTIVEC && dest_vmx_p)
18075 return "lvx %0,%y1";
18077 else if (dest_fp_p)
18078 return "#";
18081 /* Stores. */
18082 else if (src_regno >= 0 && MEM_P (dest))
18084 if (src_gpr_p)
18086 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18087 return "stq %1,%0";
18088 else
18089 return "#";
18092 else if (TARGET_ALTIVEC && src_vmx_p
18093 && altivec_indexed_or_indirect_operand (src, mode))
18094 return "stvx %1,%y0";
18096 else if (TARGET_VSX && src_vsx_p)
18098 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18099 return "stxvw4x %x1,%y0";
18100 else
18101 return "stxvd2x %x1,%y0";
18104 else if (TARGET_ALTIVEC && src_vmx_p)
18105 return "stvx %1,%y0";
18107 else if (src_fp_p)
18108 return "#";
18111 /* Constants. */
18112 else if (dest_regno >= 0
18113 && (GET_CODE (src) == CONST_INT
18114 || GET_CODE (src) == CONST_WIDE_INT
18115 || GET_CODE (src) == CONST_DOUBLE
18116 || GET_CODE (src) == CONST_VECTOR))
18118 if (dest_gpr_p)
18119 return "#";
18121 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18122 return "xxlxor %x0,%x0,%x0";
18124 else if (TARGET_ALTIVEC && dest_vmx_p)
18125 return output_vec_const_move (operands);
18128 if (TARGET_DEBUG_ADDR)
18130 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18131 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18134 gcc_unreachable ();
18137 /* Validate a 128-bit move. */
18138 bool
18139 rs6000_move_128bit_ok_p (rtx operands[])
18141 machine_mode mode = GET_MODE (operands[0]);
18142 return (gpc_reg_operand (operands[0], mode)
18143 || gpc_reg_operand (operands[1], mode));
18146 /* Return true if a 128-bit move needs to be split. */
18147 bool
18148 rs6000_split_128bit_ok_p (rtx operands[])
18150 if (!reload_completed)
18151 return false;
18153 if (!gpr_or_gpr_p (operands[0], operands[1]))
18154 return false;
18156 if (quad_load_store_p (operands[0], operands[1]))
18157 return false;
18159 return true;
18163 /* Given a comparison operation, return the bit number in CCR to test. We
18164 know this is a valid comparison.
18166 SCC_P is 1 if this is for an scc. That means that %D will have been
18167 used instead of %C, so the bits will be in different places.
18169 Return -1 if OP isn't a valid comparison for some reason. */
18172 ccr_bit (rtx op, int scc_p)
18174 enum rtx_code code = GET_CODE (op);
18175 machine_mode cc_mode;
18176 int cc_regnum;
18177 int base_bit;
18178 rtx reg;
18180 if (!COMPARISON_P (op))
18181 return -1;
18183 reg = XEXP (op, 0);
18185 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18187 cc_mode = GET_MODE (reg);
18188 cc_regnum = REGNO (reg);
18189 base_bit = 4 * (cc_regnum - CR0_REGNO);
18191 validate_condition_mode (code, cc_mode);
18193 /* When generating a sCOND operation, only positive conditions are
18194 allowed. */
18195 gcc_assert (!scc_p
18196 || code == EQ || code == GT || code == LT || code == UNORDERED
18197 || code == GTU || code == LTU);
18199 switch (code)
18201 case NE:
18202 return scc_p ? base_bit + 3 : base_bit + 2;
18203 case EQ:
18204 return base_bit + 2;
18205 case GT: case GTU: case UNLE:
18206 return base_bit + 1;
18207 case LT: case LTU: case UNGE:
18208 return base_bit;
18209 case ORDERED: case UNORDERED:
18210 return base_bit + 3;
18212 case GE: case GEU:
18213 /* If scc, we will have done a cror to put the bit in the
18214 unordered position. So test that bit. For integer, this is ! LT
18215 unless this is an scc insn. */
18216 return scc_p ? base_bit + 3 : base_bit;
18218 case LE: case LEU:
18219 return scc_p ? base_bit + 3 : base_bit + 1;
18221 default:
18222 gcc_unreachable ();
18226 /* Return the GOT register. */
18229 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18231 /* The second flow pass currently (June 1999) can't update
18232 regs_ever_live without disturbing other parts of the compiler, so
18233 update it here to make the prolog/epilogue code happy. */
18234 if (!can_create_pseudo_p ()
18235 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18236 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18238 crtl->uses_pic_offset_table = 1;
18240 return pic_offset_table_rtx;
18243 static rs6000_stack_t stack_info;
18245 /* Function to init struct machine_function.
18246 This will be called, via a pointer variable,
18247 from push_function_context. */
18249 static struct machine_function *
18250 rs6000_init_machine_status (void)
18252 stack_info.reload_completed = 0;
18253 return ggc_cleared_alloc<machine_function> ();
18256 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18259 extract_MB (rtx op)
18261 int i;
18262 unsigned long val = INTVAL (op);
18264 /* If the high bit is zero, the value is the first 1 bit we find
18265 from the left. */
18266 if ((val & 0x80000000) == 0)
18268 gcc_assert (val & 0xffffffff);
18270 i = 1;
18271 while (((val <<= 1) & 0x80000000) == 0)
18272 ++i;
18273 return i;
18276 /* If the high bit is set and the low bit is not, or the mask is all
18277 1's, the value is zero. */
18278 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18279 return 0;
18281 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18282 from the right. */
18283 i = 31;
18284 while (((val >>= 1) & 1) != 0)
18285 --i;
18287 return i;
18291 extract_ME (rtx op)
18293 int i;
18294 unsigned long val = INTVAL (op);
18296 /* If the low bit is zero, the value is the first 1 bit we find from
18297 the right. */
18298 if ((val & 1) == 0)
18300 gcc_assert (val & 0xffffffff);
18302 i = 30;
18303 while (((val >>= 1) & 1) == 0)
18304 --i;
18306 return i;
18309 /* If the low bit is set and the high bit is not, or the mask is all
18310 1's, the value is 31. */
18311 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18312 return 31;
18314 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18315 from the left. */
18316 i = 0;
18317 while (((val <<= 1) & 0x80000000) != 0)
18318 ++i;
18320 return i;
18323 /* Write out a function code label. */
18325 void
18326 rs6000_output_function_entry (FILE *file, const char *fname)
18328 if (fname[0] != '.')
18330 switch (DEFAULT_ABI)
18332 default:
18333 gcc_unreachable ();
18335 case ABI_AIX:
18336 if (DOT_SYMBOLS)
18337 putc ('.', file);
18338 else
18339 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18340 break;
18342 case ABI_ELFv2:
18343 case ABI_V4:
18344 case ABI_DARWIN:
18345 break;
18349 RS6000_OUTPUT_BASENAME (file, fname);
18352 /* Print an operand. Recognize special options, documented below. */
18354 #if TARGET_ELF
18355 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18356 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18357 #else
18358 #define SMALL_DATA_RELOC "sda21"
18359 #define SMALL_DATA_REG 0
18360 #endif
18362 void
18363 print_operand (FILE *file, rtx x, int code)
18365 int i;
18366 unsigned HOST_WIDE_INT uval;
18368 switch (code)
18370 /* %a is output_address. */
18372 case 'b':
18373 /* If constant, low-order 16 bits of constant, unsigned.
18374 Otherwise, write normally. */
18375 if (INT_P (x))
18376 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18377 else
18378 print_operand (file, x, 0);
18379 return;
18381 case 'B':
18382 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18383 for 64-bit mask direction. */
18384 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18385 return;
18387 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18388 output_operand. */
18390 case 'D':
18391 /* Like 'J' but get to the GT bit only. */
18392 gcc_assert (REG_P (x));
18394 /* Bit 1 is GT bit. */
18395 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18397 /* Add one for shift count in rlinm for scc. */
18398 fprintf (file, "%d", i + 1);
18399 return;
18401 case 'e':
18402 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18403 if (! INT_P (x))
18405 output_operand_lossage ("invalid %%e value");
18406 return;
18409 uval = INTVAL (x);
18410 if ((uval & 0xffff) == 0 && uval != 0)
18411 putc ('s', file);
18412 return;
18414 case 'E':
18415 /* X is a CR register. Print the number of the EQ bit of the CR */
18416 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18417 output_operand_lossage ("invalid %%E value");
18418 else
18419 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18420 return;
18422 case 'f':
18423 /* X is a CR register. Print the shift count needed to move it
18424 to the high-order four bits. */
18425 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18426 output_operand_lossage ("invalid %%f value");
18427 else
18428 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18429 return;
18431 case 'F':
18432 /* Similar, but print the count for the rotate in the opposite
18433 direction. */
18434 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18435 output_operand_lossage ("invalid %%F value");
18436 else
18437 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18438 return;
18440 case 'G':
18441 /* X is a constant integer. If it is negative, print "m",
18442 otherwise print "z". This is to make an aze or ame insn. */
18443 if (GET_CODE (x) != CONST_INT)
18444 output_operand_lossage ("invalid %%G value");
18445 else if (INTVAL (x) >= 0)
18446 putc ('z', file);
18447 else
18448 putc ('m', file);
18449 return;
18451 case 'h':
18452 /* If constant, output low-order five bits. Otherwise, write
18453 normally. */
18454 if (INT_P (x))
18455 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18456 else
18457 print_operand (file, x, 0);
18458 return;
18460 case 'H':
18461 /* If constant, output low-order six bits. Otherwise, write
18462 normally. */
18463 if (INT_P (x))
18464 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18465 else
18466 print_operand (file, x, 0);
18467 return;
18469 case 'I':
18470 /* Print `i' if this is a constant, else nothing. */
18471 if (INT_P (x))
18472 putc ('i', file);
18473 return;
18475 case 'j':
18476 /* Write the bit number in CCR for jump. */
18477 i = ccr_bit (x, 0);
18478 if (i == -1)
18479 output_operand_lossage ("invalid %%j code");
18480 else
18481 fprintf (file, "%d", i);
18482 return;
18484 case 'J':
18485 /* Similar, but add one for shift count in rlinm for scc and pass
18486 scc flag to `ccr_bit'. */
18487 i = ccr_bit (x, 1);
18488 if (i == -1)
18489 output_operand_lossage ("invalid %%J code");
18490 else
18491 /* If we want bit 31, write a shift count of zero, not 32. */
18492 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18493 return;
18495 case 'k':
18496 /* X must be a constant. Write the 1's complement of the
18497 constant. */
18498 if (! INT_P (x))
18499 output_operand_lossage ("invalid %%k value");
18500 else
18501 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18502 return;
18504 case 'K':
18505 /* X must be a symbolic constant on ELF. Write an
18506 expression suitable for an 'addi' that adds in the low 16
18507 bits of the MEM. */
18508 if (GET_CODE (x) == CONST)
18510 if (GET_CODE (XEXP (x, 0)) != PLUS
18511 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18512 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18513 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18514 output_operand_lossage ("invalid %%K value");
18516 print_operand_address (file, x);
18517 fputs ("@l", file);
18518 return;
18520 /* %l is output_asm_label. */
18522 case 'L':
18523 /* Write second word of DImode or DFmode reference. Works on register
18524 or non-indexed memory only. */
18525 if (REG_P (x))
18526 fputs (reg_names[REGNO (x) + 1], file);
18527 else if (MEM_P (x))
18529 /* Handle possible auto-increment. Since it is pre-increment and
18530 we have already done it, we can just use an offset of word. */
18531 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18532 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18533 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18534 UNITS_PER_WORD));
18535 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18536 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18537 UNITS_PER_WORD));
18538 else
18539 output_address (XEXP (adjust_address_nv (x, SImode,
18540 UNITS_PER_WORD),
18541 0));
18543 if (small_data_operand (x, GET_MODE (x)))
18544 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18545 reg_names[SMALL_DATA_REG]);
18547 return;
18549 case 'm':
18550 /* MB value for a mask operand. */
18551 if (! mask_operand (x, SImode))
18552 output_operand_lossage ("invalid %%m value");
18554 fprintf (file, "%d", extract_MB (x));
18555 return;
18557 case 'M':
18558 /* ME value for a mask operand. */
18559 if (! mask_operand (x, SImode))
18560 output_operand_lossage ("invalid %%M value");
18562 fprintf (file, "%d", extract_ME (x));
18563 return;
18565 /* %n outputs the negative of its operand. */
18567 case 'N':
18568 /* Write the number of elements in the vector times 4. */
18569 if (GET_CODE (x) != PARALLEL)
18570 output_operand_lossage ("invalid %%N value");
18571 else
18572 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18573 return;
18575 case 'O':
18576 /* Similar, but subtract 1 first. */
18577 if (GET_CODE (x) != PARALLEL)
18578 output_operand_lossage ("invalid %%O value");
18579 else
18580 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18581 return;
18583 case 'p':
18584 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18585 if (! INT_P (x)
18586 || INTVAL (x) < 0
18587 || (i = exact_log2 (INTVAL (x))) < 0)
18588 output_operand_lossage ("invalid %%p value");
18589 else
18590 fprintf (file, "%d", i);
18591 return;
18593 case 'P':
18594 /* The operand must be an indirect memory reference. The result
18595 is the register name. */
18596 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18597 || REGNO (XEXP (x, 0)) >= 32)
18598 output_operand_lossage ("invalid %%P value");
18599 else
18600 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18601 return;
18603 case 'q':
18604 /* This outputs the logical code corresponding to a boolean
18605 expression. The expression may have one or both operands
18606 negated (if one, only the first one). For condition register
18607 logical operations, it will also treat the negated
18608 CR codes as NOTs, but not handle NOTs of them. */
18610 const char *const *t = 0;
18611 const char *s;
18612 enum rtx_code code = GET_CODE (x);
18613 static const char * const tbl[3][3] = {
18614 { "and", "andc", "nor" },
18615 { "or", "orc", "nand" },
18616 { "xor", "eqv", "xor" } };
18618 if (code == AND)
18619 t = tbl[0];
18620 else if (code == IOR)
18621 t = tbl[1];
18622 else if (code == XOR)
18623 t = tbl[2];
18624 else
18625 output_operand_lossage ("invalid %%q value");
18627 if (GET_CODE (XEXP (x, 0)) != NOT)
18628 s = t[0];
18629 else
18631 if (GET_CODE (XEXP (x, 1)) == NOT)
18632 s = t[2];
18633 else
18634 s = t[1];
18637 fputs (s, file);
18639 return;
18641 case 'Q':
18642 if (! TARGET_MFCRF)
18643 return;
18644 fputc (',', file);
18645 /* FALLTHRU */
18647 case 'R':
18648 /* X is a CR register. Print the mask for `mtcrf'. */
18649 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18650 output_operand_lossage ("invalid %%R value");
18651 else
18652 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18653 return;
18655 case 's':
18656 /* Low 5 bits of 32 - value */
18657 if (! INT_P (x))
18658 output_operand_lossage ("invalid %%s value");
18659 else
18660 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18661 return;
18663 case 'S':
18664 /* PowerPC64 mask position. All 0's is excluded.
18665 CONST_INT 32-bit mask is considered sign-extended so any
18666 transition must occur within the CONST_INT, not on the boundary. */
18667 if (! mask64_operand (x, DImode))
18668 output_operand_lossage ("invalid %%S value");
18670 uval = INTVAL (x);
18672 if (uval & 1) /* Clear Left */
18674 #if HOST_BITS_PER_WIDE_INT > 64
18675 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18676 #endif
18677 i = 64;
18679 else /* Clear Right */
18681 uval = ~uval;
18682 #if HOST_BITS_PER_WIDE_INT > 64
18683 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18684 #endif
18685 i = 63;
18687 while (uval != 0)
18688 --i, uval >>= 1;
18689 gcc_assert (i >= 0);
18690 fprintf (file, "%d", i);
18691 return;
18693 case 't':
18694 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18695 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18697 /* Bit 3 is OV bit. */
18698 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18700 /* If we want bit 31, write a shift count of zero, not 32. */
18701 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18702 return;
18704 case 'T':
18705 /* Print the symbolic name of a branch target register. */
18706 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18707 && REGNO (x) != CTR_REGNO))
18708 output_operand_lossage ("invalid %%T value");
18709 else if (REGNO (x) == LR_REGNO)
18710 fputs ("lr", file);
18711 else
18712 fputs ("ctr", file);
18713 return;
18715 case 'u':
18716 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18717 for use in unsigned operand. */
18718 if (! INT_P (x))
18720 output_operand_lossage ("invalid %%u value");
18721 return;
18724 uval = INTVAL (x);
18725 if ((uval & 0xffff) == 0)
18726 uval >>= 16;
18728 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18729 return;
18731 case 'v':
18732 /* High-order 16 bits of constant for use in signed operand. */
18733 if (! INT_P (x))
18734 output_operand_lossage ("invalid %%v value");
18735 else
18736 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18737 (INTVAL (x) >> 16) & 0xffff);
18738 return;
18740 case 'U':
18741 /* Print `u' if this has an auto-increment or auto-decrement. */
18742 if (MEM_P (x)
18743 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18744 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18745 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18746 putc ('u', file);
18747 return;
18749 case 'V':
18750 /* Print the trap code for this operand. */
18751 switch (GET_CODE (x))
18753 case EQ:
18754 fputs ("eq", file); /* 4 */
18755 break;
18756 case NE:
18757 fputs ("ne", file); /* 24 */
18758 break;
18759 case LT:
18760 fputs ("lt", file); /* 16 */
18761 break;
18762 case LE:
18763 fputs ("le", file); /* 20 */
18764 break;
18765 case GT:
18766 fputs ("gt", file); /* 8 */
18767 break;
18768 case GE:
18769 fputs ("ge", file); /* 12 */
18770 break;
18771 case LTU:
18772 fputs ("llt", file); /* 2 */
18773 break;
18774 case LEU:
18775 fputs ("lle", file); /* 6 */
18776 break;
18777 case GTU:
18778 fputs ("lgt", file); /* 1 */
18779 break;
18780 case GEU:
18781 fputs ("lge", file); /* 5 */
18782 break;
18783 default:
18784 gcc_unreachable ();
18786 break;
18788 case 'w':
18789 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18790 normally. */
18791 if (INT_P (x))
18792 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18793 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18794 else
18795 print_operand (file, x, 0);
18796 return;
18798 case 'W':
18799 /* MB value for a PowerPC64 rldic operand. */
18800 i = clz_hwi (INTVAL (x));
18802 fprintf (file, "%d", i);
18803 return;
18805 case 'x':
18806 /* X is a FPR or Altivec register used in a VSX context. */
18807 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18808 output_operand_lossage ("invalid %%x value");
18809 else
18811 int reg = REGNO (x);
18812 int vsx_reg = (FP_REGNO_P (reg)
18813 ? reg - 32
18814 : reg - FIRST_ALTIVEC_REGNO + 32);
18816 #ifdef TARGET_REGNAMES
18817 if (TARGET_REGNAMES)
18818 fprintf (file, "%%vs%d", vsx_reg);
18819 else
18820 #endif
18821 fprintf (file, "%d", vsx_reg);
18823 return;
18825 case 'X':
18826 if (MEM_P (x)
18827 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18828 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18829 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18830 putc ('x', file);
18831 return;
18833 case 'Y':
18834 /* Like 'L', for third word of TImode/PTImode */
18835 if (REG_P (x))
18836 fputs (reg_names[REGNO (x) + 2], file);
18837 else if (MEM_P (x))
18839 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18840 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18841 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18842 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18843 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18844 else
18845 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18846 if (small_data_operand (x, GET_MODE (x)))
18847 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18848 reg_names[SMALL_DATA_REG]);
18850 return;
18852 case 'z':
18853 /* X is a SYMBOL_REF. Write out the name preceded by a
18854 period and without any trailing data in brackets. Used for function
18855 names. If we are configured for System V (or the embedded ABI) on
18856 the PowerPC, do not emit the period, since those systems do not use
18857 TOCs and the like. */
18858 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18860 /* For macho, check to see if we need a stub. */
18861 if (TARGET_MACHO)
18863 const char *name = XSTR (x, 0);
18864 #if TARGET_MACHO
18865 if (darwin_emit_branch_islands
18866 && MACHOPIC_INDIRECT
18867 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18868 name = machopic_indirection_name (x, /*stub_p=*/true);
18869 #endif
18870 assemble_name (file, name);
18872 else if (!DOT_SYMBOLS)
18873 assemble_name (file, XSTR (x, 0));
18874 else
18875 rs6000_output_function_entry (file, XSTR (x, 0));
18876 return;
18878 case 'Z':
18879 /* Like 'L', for last word of TImode/PTImode. */
18880 if (REG_P (x))
18881 fputs (reg_names[REGNO (x) + 3], file);
18882 else if (MEM_P (x))
18884 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18885 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18886 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18887 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18888 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18889 else
18890 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18891 if (small_data_operand (x, GET_MODE (x)))
18892 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18893 reg_names[SMALL_DATA_REG]);
18895 return;
18897 /* Print AltiVec or SPE memory operand. */
18898 case 'y':
18900 rtx tmp;
18902 gcc_assert (MEM_P (x));
18904 tmp = XEXP (x, 0);
18906 /* Ugly hack because %y is overloaded. */
18907 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18908 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18909 || GET_MODE (x) == TFmode
18910 || GET_MODE (x) == TImode
18911 || GET_MODE (x) == PTImode))
18913 /* Handle [reg]. */
18914 if (REG_P (tmp))
18916 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18917 break;
18919 /* Handle [reg+UIMM]. */
18920 else if (GET_CODE (tmp) == PLUS &&
18921 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18923 int x;
18925 gcc_assert (REG_P (XEXP (tmp, 0)));
18927 x = INTVAL (XEXP (tmp, 1));
18928 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18929 break;
18932 /* Fall through. Must be [reg+reg]. */
18934 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18935 && GET_CODE (tmp) == AND
18936 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18937 && INTVAL (XEXP (tmp, 1)) == -16)
18938 tmp = XEXP (tmp, 0);
18939 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18940 && GET_CODE (tmp) == PRE_MODIFY)
18941 tmp = XEXP (tmp, 1);
18942 if (REG_P (tmp))
18943 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18944 else
18946 if (GET_CODE (tmp) != PLUS
18947 || !REG_P (XEXP (tmp, 0))
18948 || !REG_P (XEXP (tmp, 1)))
18950 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18951 break;
18954 if (REGNO (XEXP (tmp, 0)) == 0)
18955 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18956 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18957 else
18958 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18959 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18961 break;
18964 case 0:
18965 if (REG_P (x))
18966 fprintf (file, "%s", reg_names[REGNO (x)]);
18967 else if (MEM_P (x))
18969 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18970 know the width from the mode. */
18971 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18972 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18973 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18974 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18975 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18976 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18977 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18978 output_address (XEXP (XEXP (x, 0), 1));
18979 else
18980 output_address (XEXP (x, 0));
18982 else
18984 if (toc_relative_expr_p (x, false))
18985 /* This hack along with a corresponding hack in
18986 rs6000_output_addr_const_extra arranges to output addends
18987 where the assembler expects to find them. eg.
18988 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18989 without this hack would be output as "x@toc+4". We
18990 want "x+4@toc". */
18991 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18992 else
18993 output_addr_const (file, x);
18995 return;
18997 case '&':
18998 if (const char *name = get_some_local_dynamic_name ())
18999 assemble_name (file, name);
19000 else
19001 output_operand_lossage ("'%%&' used without any "
19002 "local dynamic TLS references");
19003 return;
19005 default:
19006 output_operand_lossage ("invalid %%xn code");
19010 /* Print the address of an operand. */
19012 void
19013 print_operand_address (FILE *file, rtx x)
19015 if (REG_P (x))
19016 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
19017 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
19018 || GET_CODE (x) == LABEL_REF)
19020 output_addr_const (file, x);
19021 if (small_data_operand (x, GET_MODE (x)))
19022 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19023 reg_names[SMALL_DATA_REG]);
19024 else
19025 gcc_assert (!TARGET_TOC);
19027 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19028 && REG_P (XEXP (x, 1)))
19030 if (REGNO (XEXP (x, 0)) == 0)
19031 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
19032 reg_names[ REGNO (XEXP (x, 0)) ]);
19033 else
19034 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
19035 reg_names[ REGNO (XEXP (x, 1)) ]);
19037 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19038 && GET_CODE (XEXP (x, 1)) == CONST_INT)
19039 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
19040 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
19041 #if TARGET_MACHO
19042 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19043 && CONSTANT_P (XEXP (x, 1)))
19045 fprintf (file, "lo16(");
19046 output_addr_const (file, XEXP (x, 1));
19047 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19049 #endif
19050 #if TARGET_ELF
19051 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19052 && CONSTANT_P (XEXP (x, 1)))
19054 output_addr_const (file, XEXP (x, 1));
19055 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19057 #endif
19058 else if (toc_relative_expr_p (x, false))
19060 /* This hack along with a corresponding hack in
19061 rs6000_output_addr_const_extra arranges to output addends
19062 where the assembler expects to find them. eg.
19063 (lo_sum (reg 9)
19064 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
19065 without this hack would be output as "x@toc+8@l(9)". We
19066 want "x+8@toc@l(9)". */
19067 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19068 if (GET_CODE (x) == LO_SUM)
19069 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
19070 else
19071 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
19073 else
19074 gcc_unreachable ();
19077 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
19079 static bool
19080 rs6000_output_addr_const_extra (FILE *file, rtx x)
19082 if (GET_CODE (x) == UNSPEC)
19083 switch (XINT (x, 1))
19085 case UNSPEC_TOCREL:
19086 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
19087 && REG_P (XVECEXP (x, 0, 1))
19088 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
19089 output_addr_const (file, XVECEXP (x, 0, 0));
19090 if (x == tocrel_base && tocrel_offset != const0_rtx)
19092 if (INTVAL (tocrel_offset) >= 0)
19093 fprintf (file, "+");
19094 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
19096 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
19098 putc ('-', file);
19099 assemble_name (file, toc_label_name);
19101 else if (TARGET_ELF)
19102 fputs ("@toc", file);
19103 return true;
19105 #if TARGET_MACHO
19106 case UNSPEC_MACHOPIC_OFFSET:
19107 output_addr_const (file, XVECEXP (x, 0, 0));
19108 putc ('-', file);
19109 machopic_output_function_base_name (file);
19110 return true;
19111 #endif
19113 return false;
19116 /* Target hook for assembling integer objects. The PowerPC version has
19117 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19118 is defined. It also needs to handle DI-mode objects on 64-bit
19119 targets. */
19121 static bool
19122 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19124 #ifdef RELOCATABLE_NEEDS_FIXUP
19125 /* Special handling for SI values. */
19126 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19128 static int recurse = 0;
19130 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19131 the .fixup section. Since the TOC section is already relocated, we
19132 don't need to mark it here. We used to skip the text section, but it
19133 should never be valid for relocated addresses to be placed in the text
19134 section. */
19135 if (TARGET_RELOCATABLE
19136 && in_section != toc_section
19137 && !recurse
19138 && !CONST_SCALAR_INT_P (x)
19139 && CONSTANT_P (x))
19141 char buf[256];
19143 recurse = 1;
19144 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19145 fixuplabelno++;
19146 ASM_OUTPUT_LABEL (asm_out_file, buf);
19147 fprintf (asm_out_file, "\t.long\t(");
19148 output_addr_const (asm_out_file, x);
19149 fprintf (asm_out_file, ")@fixup\n");
19150 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19151 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19152 fprintf (asm_out_file, "\t.long\t");
19153 assemble_name (asm_out_file, buf);
19154 fprintf (asm_out_file, "\n\t.previous\n");
19155 recurse = 0;
19156 return true;
19158 /* Remove initial .'s to turn a -mcall-aixdesc function
19159 address into the address of the descriptor, not the function
19160 itself. */
19161 else if (GET_CODE (x) == SYMBOL_REF
19162 && XSTR (x, 0)[0] == '.'
19163 && DEFAULT_ABI == ABI_AIX)
19165 const char *name = XSTR (x, 0);
19166 while (*name == '.')
19167 name++;
19169 fprintf (asm_out_file, "\t.long\t%s\n", name);
19170 return true;
19173 #endif /* RELOCATABLE_NEEDS_FIXUP */
19174 return default_assemble_integer (x, size, aligned_p);
19177 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19178 /* Emit an assembler directive to set symbol visibility for DECL to
19179 VISIBILITY_TYPE. */
19181 static void
19182 rs6000_assemble_visibility (tree decl, int vis)
19184 if (TARGET_XCOFF)
19185 return;
19187 /* Functions need to have their entry point symbol visibility set as
19188 well as their descriptor symbol visibility. */
19189 if (DEFAULT_ABI == ABI_AIX
19190 && DOT_SYMBOLS
19191 && TREE_CODE (decl) == FUNCTION_DECL)
19193 static const char * const visibility_types[] = {
19194 NULL, "internal", "hidden", "protected"
19197 const char *name, *type;
19199 name = ((* targetm.strip_name_encoding)
19200 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19201 type = visibility_types[vis];
19203 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19204 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19206 else
19207 default_assemble_visibility (decl, vis);
19209 #endif
19211 enum rtx_code
19212 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19214 /* Reversal of FP compares takes care -- an ordered compare
19215 becomes an unordered compare and vice versa. */
19216 if (mode == CCFPmode
19217 && (!flag_finite_math_only
19218 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19219 || code == UNEQ || code == LTGT))
19220 return reverse_condition_maybe_unordered (code);
19221 else
19222 return reverse_condition (code);
19225 /* Generate a compare for CODE. Return a brand-new rtx that
19226 represents the result of the compare. */
19228 static rtx
19229 rs6000_generate_compare (rtx cmp, machine_mode mode)
19231 machine_mode comp_mode;
19232 rtx compare_result;
19233 enum rtx_code code = GET_CODE (cmp);
19234 rtx op0 = XEXP (cmp, 0);
19235 rtx op1 = XEXP (cmp, 1);
19237 if (FLOAT_MODE_P (mode))
19238 comp_mode = CCFPmode;
19239 else if (code == GTU || code == LTU
19240 || code == GEU || code == LEU)
19241 comp_mode = CCUNSmode;
19242 else if ((code == EQ || code == NE)
19243 && unsigned_reg_p (op0)
19244 && (unsigned_reg_p (op1)
19245 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19246 /* These are unsigned values, perhaps there will be a later
19247 ordering compare that can be shared with this one. */
19248 comp_mode = CCUNSmode;
19249 else
19250 comp_mode = CCmode;
19252 /* If we have an unsigned compare, make sure we don't have a signed value as
19253 an immediate. */
19254 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19255 && INTVAL (op1) < 0)
19257 op0 = copy_rtx_if_shared (op0);
19258 op1 = force_reg (GET_MODE (op0), op1);
19259 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19262 /* First, the compare. */
19263 compare_result = gen_reg_rtx (comp_mode);
19265 /* E500 FP compare instructions on the GPRs. Yuck! */
19266 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19267 && FLOAT_MODE_P (mode))
19269 rtx cmp, or_result, compare_result2;
19270 machine_mode op_mode = GET_MODE (op0);
19271 bool reverse_p;
19273 if (op_mode == VOIDmode)
19274 op_mode = GET_MODE (op1);
19276 /* First reverse the condition codes that aren't directly supported. */
19277 switch (code)
19279 case NE:
19280 case UNLT:
19281 case UNLE:
19282 case UNGT:
19283 case UNGE:
19284 code = reverse_condition_maybe_unordered (code);
19285 reverse_p = true;
19286 break;
19288 case EQ:
19289 case LT:
19290 case LE:
19291 case GT:
19292 case GE:
19293 reverse_p = false;
19294 break;
19296 default:
19297 gcc_unreachable ();
19300 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19301 This explains the following mess. */
19303 switch (code)
19305 case EQ:
19306 switch (op_mode)
19308 case SFmode:
19309 cmp = (flag_finite_math_only && !flag_trapping_math)
19310 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19311 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19312 break;
19314 case DFmode:
19315 cmp = (flag_finite_math_only && !flag_trapping_math)
19316 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19317 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19318 break;
19320 case TFmode:
19321 cmp = (flag_finite_math_only && !flag_trapping_math)
19322 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19323 : gen_cmptfeq_gpr (compare_result, op0, op1);
19324 break;
19326 default:
19327 gcc_unreachable ();
19329 break;
19331 case GT:
19332 case GE:
19333 switch (op_mode)
19335 case SFmode:
19336 cmp = (flag_finite_math_only && !flag_trapping_math)
19337 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19338 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19339 break;
19341 case DFmode:
19342 cmp = (flag_finite_math_only && !flag_trapping_math)
19343 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19344 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19345 break;
19347 case TFmode:
19348 cmp = (flag_finite_math_only && !flag_trapping_math)
19349 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19350 : gen_cmptfgt_gpr (compare_result, op0, op1);
19351 break;
19353 default:
19354 gcc_unreachable ();
19356 break;
19358 case LT:
19359 case LE:
19360 switch (op_mode)
19362 case SFmode:
19363 cmp = (flag_finite_math_only && !flag_trapping_math)
19364 ? gen_tstsflt_gpr (compare_result, op0, op1)
19365 : gen_cmpsflt_gpr (compare_result, op0, op1);
19366 break;
19368 case DFmode:
19369 cmp = (flag_finite_math_only && !flag_trapping_math)
19370 ? gen_tstdflt_gpr (compare_result, op0, op1)
19371 : gen_cmpdflt_gpr (compare_result, op0, op1);
19372 break;
19374 case TFmode:
19375 cmp = (flag_finite_math_only && !flag_trapping_math)
19376 ? gen_tsttflt_gpr (compare_result, op0, op1)
19377 : gen_cmptflt_gpr (compare_result, op0, op1);
19378 break;
19380 default:
19381 gcc_unreachable ();
19383 break;
19385 default:
19386 gcc_unreachable ();
19389 /* Synthesize LE and GE from LT/GT || EQ. */
19390 if (code == LE || code == GE)
19392 emit_insn (cmp);
19394 compare_result2 = gen_reg_rtx (CCFPmode);
19396 /* Do the EQ. */
19397 switch (op_mode)
19399 case SFmode:
19400 cmp = (flag_finite_math_only && !flag_trapping_math)
19401 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19402 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19403 break;
19405 case DFmode:
19406 cmp = (flag_finite_math_only && !flag_trapping_math)
19407 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19408 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19409 break;
19411 case TFmode:
19412 cmp = (flag_finite_math_only && !flag_trapping_math)
19413 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19414 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19415 break;
19417 default:
19418 gcc_unreachable ();
19421 emit_insn (cmp);
19423 /* OR them together. */
19424 or_result = gen_reg_rtx (CCFPmode);
19425 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19426 compare_result2);
19427 compare_result = or_result;
19430 code = reverse_p ? NE : EQ;
19432 emit_insn (cmp);
19434 else
19436 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19437 CLOBBERs to match cmptf_internal2 pattern. */
19438 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19439 && GET_MODE (op0) == TFmode
19440 && !TARGET_IEEEQUAD
19441 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19442 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19443 gen_rtvec (10,
19444 gen_rtx_SET (VOIDmode,
19445 compare_result,
19446 gen_rtx_COMPARE (comp_mode, op0, op1)),
19447 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19448 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19449 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19450 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19451 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19452 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19453 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19454 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19455 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19456 else if (GET_CODE (op1) == UNSPEC
19457 && XINT (op1, 1) == UNSPEC_SP_TEST)
19459 rtx op1b = XVECEXP (op1, 0, 0);
19460 comp_mode = CCEQmode;
19461 compare_result = gen_reg_rtx (CCEQmode);
19462 if (TARGET_64BIT)
19463 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19464 else
19465 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19467 else
19468 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19469 gen_rtx_COMPARE (comp_mode, op0, op1)));
19472 /* Some kinds of FP comparisons need an OR operation;
19473 under flag_finite_math_only we don't bother. */
19474 if (FLOAT_MODE_P (mode)
19475 && !flag_finite_math_only
19476 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19477 && (code == LE || code == GE
19478 || code == UNEQ || code == LTGT
19479 || code == UNGT || code == UNLT))
19481 enum rtx_code or1, or2;
19482 rtx or1_rtx, or2_rtx, compare2_rtx;
19483 rtx or_result = gen_reg_rtx (CCEQmode);
19485 switch (code)
19487 case LE: or1 = LT; or2 = EQ; break;
19488 case GE: or1 = GT; or2 = EQ; break;
19489 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19490 case LTGT: or1 = LT; or2 = GT; break;
19491 case UNGT: or1 = UNORDERED; or2 = GT; break;
19492 case UNLT: or1 = UNORDERED; or2 = LT; break;
19493 default: gcc_unreachable ();
19495 validate_condition_mode (or1, comp_mode);
19496 validate_condition_mode (or2, comp_mode);
19497 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19498 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19499 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19500 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19501 const_true_rtx);
19502 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19504 compare_result = or_result;
19505 code = EQ;
19508 validate_condition_mode (code, GET_MODE (compare_result));
19510 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19514 /* Emit the RTL for an sISEL pattern. */
19516 void
19517 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19519 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19522 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
19523 can be used as that dest register. Return the dest register. */
19526 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
19528 if (op2 == const0_rtx)
19529 return op1;
19531 if (GET_CODE (scratch) == SCRATCH)
19532 scratch = gen_reg_rtx (mode);
19534 if (logical_operand (op2, mode))
19535 emit_insn (gen_rtx_SET (VOIDmode, scratch, gen_rtx_XOR (mode, op1, op2)));
19536 else
19537 emit_insn (gen_rtx_SET (VOIDmode, scratch,
19538 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
19540 return scratch;
19543 void
19544 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19546 rtx condition_rtx;
19547 machine_mode op_mode;
19548 enum rtx_code cond_code;
19549 rtx result = operands[0];
19551 condition_rtx = rs6000_generate_compare (operands[1], mode);
19552 cond_code = GET_CODE (condition_rtx);
19554 if (FLOAT_MODE_P (mode)
19555 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19557 rtx t;
19559 PUT_MODE (condition_rtx, SImode);
19560 t = XEXP (condition_rtx, 0);
19562 gcc_assert (cond_code == NE || cond_code == EQ);
19564 if (cond_code == NE)
19565 emit_insn (gen_e500_flip_gt_bit (t, t));
19567 emit_insn (gen_move_from_CR_gt_bit (result, t));
19568 return;
19571 if (cond_code == NE
19572 || cond_code == GE || cond_code == LE
19573 || cond_code == GEU || cond_code == LEU
19574 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19576 rtx not_result = gen_reg_rtx (CCEQmode);
19577 rtx not_op, rev_cond_rtx;
19578 machine_mode cc_mode;
19580 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19582 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19583 SImode, XEXP (condition_rtx, 0), const0_rtx);
19584 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19585 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19586 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19589 op_mode = GET_MODE (XEXP (operands[1], 0));
19590 if (op_mode == VOIDmode)
19591 op_mode = GET_MODE (XEXP (operands[1], 1));
19593 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19595 PUT_MODE (condition_rtx, DImode);
19596 convert_move (result, condition_rtx, 0);
19598 else
19600 PUT_MODE (condition_rtx, SImode);
19601 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19605 /* Emit a branch of kind CODE to location LOC. */
19607 void
19608 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19610 rtx condition_rtx, loc_ref;
19612 condition_rtx = rs6000_generate_compare (operands[0], mode);
19613 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19614 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19615 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19616 loc_ref, pc_rtx)));
19619 /* Return the string to output a conditional branch to LABEL, which is
19620 the operand template of the label, or NULL if the branch is really a
19621 conditional return.
19623 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19624 condition code register and its mode specifies what kind of
19625 comparison we made.
19627 REVERSED is nonzero if we should reverse the sense of the comparison.
19629 INSN is the insn. */
19631 char *
19632 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19634 static char string[64];
19635 enum rtx_code code = GET_CODE (op);
19636 rtx cc_reg = XEXP (op, 0);
19637 machine_mode mode = GET_MODE (cc_reg);
19638 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19639 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19640 int really_reversed = reversed ^ need_longbranch;
19641 char *s = string;
19642 const char *ccode;
19643 const char *pred;
19644 rtx note;
19646 validate_condition_mode (code, mode);
19648 /* Work out which way this really branches. We could use
19649 reverse_condition_maybe_unordered here always but this
19650 makes the resulting assembler clearer. */
19651 if (really_reversed)
19653 /* Reversal of FP compares takes care -- an ordered compare
19654 becomes an unordered compare and vice versa. */
19655 if (mode == CCFPmode)
19656 code = reverse_condition_maybe_unordered (code);
19657 else
19658 code = reverse_condition (code);
19661 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19663 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19664 to the GT bit. */
19665 switch (code)
19667 case EQ:
19668 /* Opposite of GT. */
19669 code = GT;
19670 break;
19672 case NE:
19673 code = UNLE;
19674 break;
19676 default:
19677 gcc_unreachable ();
19681 switch (code)
19683 /* Not all of these are actually distinct opcodes, but
19684 we distinguish them for clarity of the resulting assembler. */
19685 case NE: case LTGT:
19686 ccode = "ne"; break;
19687 case EQ: case UNEQ:
19688 ccode = "eq"; break;
19689 case GE: case GEU:
19690 ccode = "ge"; break;
19691 case GT: case GTU: case UNGT:
19692 ccode = "gt"; break;
19693 case LE: case LEU:
19694 ccode = "le"; break;
19695 case LT: case LTU: case UNLT:
19696 ccode = "lt"; break;
19697 case UNORDERED: ccode = "un"; break;
19698 case ORDERED: ccode = "nu"; break;
19699 case UNGE: ccode = "nl"; break;
19700 case UNLE: ccode = "ng"; break;
19701 default:
19702 gcc_unreachable ();
19705 /* Maybe we have a guess as to how likely the branch is. */
19706 pred = "";
19707 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19708 if (note != NULL_RTX)
19710 /* PROB is the difference from 50%. */
19711 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19713 /* Only hint for highly probable/improbable branches on newer
19714 cpus as static prediction overrides processor dynamic
19715 prediction. For older cpus we may as well always hint, but
19716 assume not taken for branches that are very close to 50% as a
19717 mispredicted taken branch is more expensive than a
19718 mispredicted not-taken branch. */
19719 if (rs6000_always_hint
19720 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19721 && br_prob_note_reliable_p (note)))
19723 if (abs (prob) > REG_BR_PROB_BASE / 20
19724 && ((prob > 0) ^ need_longbranch))
19725 pred = "+";
19726 else
19727 pred = "-";
19731 if (label == NULL)
19732 s += sprintf (s, "b%slr%s ", ccode, pred);
19733 else
19734 s += sprintf (s, "b%s%s ", ccode, pred);
19736 /* We need to escape any '%' characters in the reg_names string.
19737 Assume they'd only be the first character.... */
19738 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19739 *s++ = '%';
19740 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19742 if (label != NULL)
19744 /* If the branch distance was too far, we may have to use an
19745 unconditional branch to go the distance. */
19746 if (need_longbranch)
19747 s += sprintf (s, ",$+8\n\tb %s", label);
19748 else
19749 s += sprintf (s, ",%s", label);
19752 return string;
19755 /* Return the string to flip the GT bit on a CR. */
19756 char *
19757 output_e500_flip_gt_bit (rtx dst, rtx src)
19759 static char string[64];
19760 int a, b;
19762 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19763 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19765 /* GT bit. */
19766 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19767 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19769 sprintf (string, "crnot %d,%d", a, b);
19770 return string;
19773 /* Return insn for VSX or Altivec comparisons. */
19775 static rtx
19776 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19778 rtx mask;
19779 machine_mode mode = GET_MODE (op0);
19781 switch (code)
19783 default:
19784 break;
19786 case GE:
19787 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19788 return NULL_RTX;
19790 case EQ:
19791 case GT:
19792 case GTU:
19793 case ORDERED:
19794 case UNORDERED:
19795 case UNEQ:
19796 case LTGT:
19797 mask = gen_reg_rtx (mode);
19798 emit_insn (gen_rtx_SET (VOIDmode,
19799 mask,
19800 gen_rtx_fmt_ee (code, mode, op0, op1)));
19801 return mask;
19804 return NULL_RTX;
19807 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19808 DMODE is expected destination mode. This is a recursive function. */
19810 static rtx
19811 rs6000_emit_vector_compare (enum rtx_code rcode,
19812 rtx op0, rtx op1,
19813 machine_mode dmode)
19815 rtx mask;
19816 bool swap_operands = false;
19817 bool try_again = false;
19819 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19820 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19822 /* See if the comparison works as is. */
19823 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19824 if (mask)
19825 return mask;
19827 switch (rcode)
19829 case LT:
19830 rcode = GT;
19831 swap_operands = true;
19832 try_again = true;
19833 break;
19834 case LTU:
19835 rcode = GTU;
19836 swap_operands = true;
19837 try_again = true;
19838 break;
19839 case NE:
19840 case UNLE:
19841 case UNLT:
19842 case UNGE:
19843 case UNGT:
19844 /* Invert condition and try again.
19845 e.g., A != B becomes ~(A==B). */
19847 enum rtx_code rev_code;
19848 enum insn_code nor_code;
19849 rtx mask2;
19851 rev_code = reverse_condition_maybe_unordered (rcode);
19852 if (rev_code == UNKNOWN)
19853 return NULL_RTX;
19855 nor_code = optab_handler (one_cmpl_optab, dmode);
19856 if (nor_code == CODE_FOR_nothing)
19857 return NULL_RTX;
19859 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19860 if (!mask2)
19861 return NULL_RTX;
19863 mask = gen_reg_rtx (dmode);
19864 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19865 return mask;
19867 break;
19868 case GE:
19869 case GEU:
19870 case LE:
19871 case LEU:
19872 /* Try GT/GTU/LT/LTU OR EQ */
19874 rtx c_rtx, eq_rtx;
19875 enum insn_code ior_code;
19876 enum rtx_code new_code;
19878 switch (rcode)
19880 case GE:
19881 new_code = GT;
19882 break;
19884 case GEU:
19885 new_code = GTU;
19886 break;
19888 case LE:
19889 new_code = LT;
19890 break;
19892 case LEU:
19893 new_code = LTU;
19894 break;
19896 default:
19897 gcc_unreachable ();
19900 ior_code = optab_handler (ior_optab, dmode);
19901 if (ior_code == CODE_FOR_nothing)
19902 return NULL_RTX;
19904 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19905 if (!c_rtx)
19906 return NULL_RTX;
19908 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19909 if (!eq_rtx)
19910 return NULL_RTX;
19912 mask = gen_reg_rtx (dmode);
19913 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19914 return mask;
19916 break;
19917 default:
19918 return NULL_RTX;
19921 if (try_again)
19923 if (swap_operands)
19924 std::swap (op0, op1);
19926 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19927 if (mask)
19928 return mask;
19931 /* You only get two chances. */
19932 return NULL_RTX;
19935 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19936 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19937 operands for the relation operation COND. */
19940 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19941 rtx cond, rtx cc_op0, rtx cc_op1)
19943 machine_mode dest_mode = GET_MODE (dest);
19944 machine_mode mask_mode = GET_MODE (cc_op0);
19945 enum rtx_code rcode = GET_CODE (cond);
19946 machine_mode cc_mode = CCmode;
19947 rtx mask;
19948 rtx cond2;
19949 rtx tmp;
19950 bool invert_move = false;
19952 if (VECTOR_UNIT_NONE_P (dest_mode))
19953 return 0;
19955 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19956 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19958 switch (rcode)
19960 /* Swap operands if we can, and fall back to doing the operation as
19961 specified, and doing a NOR to invert the test. */
19962 case NE:
19963 case UNLE:
19964 case UNLT:
19965 case UNGE:
19966 case UNGT:
19967 /* Invert condition and try again.
19968 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19969 invert_move = true;
19970 rcode = reverse_condition_maybe_unordered (rcode);
19971 if (rcode == UNKNOWN)
19972 return 0;
19973 break;
19975 /* Mark unsigned tests with CCUNSmode. */
19976 case GTU:
19977 case GEU:
19978 case LTU:
19979 case LEU:
19980 cc_mode = CCUNSmode;
19981 break;
19983 default:
19984 break;
19987 /* Get the vector mask for the given relational operations. */
19988 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19990 if (!mask)
19991 return 0;
19993 if (invert_move)
19995 tmp = op_true;
19996 op_true = op_false;
19997 op_false = tmp;
20000 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
20001 CONST0_RTX (dest_mode));
20002 emit_insn (gen_rtx_SET (VOIDmode,
20003 dest,
20004 gen_rtx_IF_THEN_ELSE (dest_mode,
20005 cond2,
20006 op_true,
20007 op_false)));
20008 return 1;
20011 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
20012 operands of the last comparison is nonzero/true, FALSE_COND if it
20013 is zero/false. Return 0 if the hardware has no such operation. */
20016 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20018 enum rtx_code code = GET_CODE (op);
20019 rtx op0 = XEXP (op, 0);
20020 rtx op1 = XEXP (op, 1);
20021 REAL_VALUE_TYPE c1;
20022 machine_mode compare_mode = GET_MODE (op0);
20023 machine_mode result_mode = GET_MODE (dest);
20024 rtx temp;
20025 bool is_against_zero;
20027 /* These modes should always match. */
20028 if (GET_MODE (op1) != compare_mode
20029 /* In the isel case however, we can use a compare immediate, so
20030 op1 may be a small constant. */
20031 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
20032 return 0;
20033 if (GET_MODE (true_cond) != result_mode)
20034 return 0;
20035 if (GET_MODE (false_cond) != result_mode)
20036 return 0;
20038 /* Don't allow using floating point comparisons for integer results for
20039 now. */
20040 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
20041 return 0;
20043 /* First, work out if the hardware can do this at all, or
20044 if it's too slow.... */
20045 if (!FLOAT_MODE_P (compare_mode))
20047 if (TARGET_ISEL)
20048 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
20049 return 0;
20051 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
20052 && SCALAR_FLOAT_MODE_P (compare_mode))
20053 return 0;
20055 is_against_zero = op1 == CONST0_RTX (compare_mode);
20057 /* A floating-point subtract might overflow, underflow, or produce
20058 an inexact result, thus changing the floating-point flags, so it
20059 can't be generated if we care about that. It's safe if one side
20060 of the construct is zero, since then no subtract will be
20061 generated. */
20062 if (SCALAR_FLOAT_MODE_P (compare_mode)
20063 && flag_trapping_math && ! is_against_zero)
20064 return 0;
20066 /* Eliminate half of the comparisons by switching operands, this
20067 makes the remaining code simpler. */
20068 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
20069 || code == LTGT || code == LT || code == UNLE)
20071 code = reverse_condition_maybe_unordered (code);
20072 temp = true_cond;
20073 true_cond = false_cond;
20074 false_cond = temp;
20077 /* UNEQ and LTGT take four instructions for a comparison with zero,
20078 it'll probably be faster to use a branch here too. */
20079 if (code == UNEQ && HONOR_NANS (compare_mode))
20080 return 0;
20082 if (GET_CODE (op1) == CONST_DOUBLE)
20083 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
20085 /* We're going to try to implement comparisons by performing
20086 a subtract, then comparing against zero. Unfortunately,
20087 Inf - Inf is NaN which is not zero, and so if we don't
20088 know that the operand is finite and the comparison
20089 would treat EQ different to UNORDERED, we can't do it. */
20090 if (HONOR_INFINITIES (compare_mode)
20091 && code != GT && code != UNGE
20092 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
20093 /* Constructs of the form (a OP b ? a : b) are safe. */
20094 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
20095 || (! rtx_equal_p (op0, true_cond)
20096 && ! rtx_equal_p (op1, true_cond))))
20097 return 0;
20099 /* At this point we know we can use fsel. */
20101 /* Reduce the comparison to a comparison against zero. */
20102 if (! is_against_zero)
20104 temp = gen_reg_rtx (compare_mode);
20105 emit_insn (gen_rtx_SET (VOIDmode, temp,
20106 gen_rtx_MINUS (compare_mode, op0, op1)));
20107 op0 = temp;
20108 op1 = CONST0_RTX (compare_mode);
20111 /* If we don't care about NaNs we can reduce some of the comparisons
20112 down to faster ones. */
20113 if (! HONOR_NANS (compare_mode))
20114 switch (code)
20116 case GT:
20117 code = LE;
20118 temp = true_cond;
20119 true_cond = false_cond;
20120 false_cond = temp;
20121 break;
20122 case UNGE:
20123 code = GE;
20124 break;
20125 case UNEQ:
20126 code = EQ;
20127 break;
20128 default:
20129 break;
20132 /* Now, reduce everything down to a GE. */
20133 switch (code)
20135 case GE:
20136 break;
20138 case LE:
20139 temp = gen_reg_rtx (compare_mode);
20140 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20141 op0 = temp;
20142 break;
20144 case ORDERED:
20145 temp = gen_reg_rtx (compare_mode);
20146 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20147 op0 = temp;
20148 break;
20150 case EQ:
20151 temp = gen_reg_rtx (compare_mode);
20152 emit_insn (gen_rtx_SET (VOIDmode, temp,
20153 gen_rtx_NEG (compare_mode,
20154 gen_rtx_ABS (compare_mode, op0))));
20155 op0 = temp;
20156 break;
20158 case UNGE:
20159 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20160 temp = gen_reg_rtx (result_mode);
20161 emit_insn (gen_rtx_SET (VOIDmode, temp,
20162 gen_rtx_IF_THEN_ELSE (result_mode,
20163 gen_rtx_GE (VOIDmode,
20164 op0, op1),
20165 true_cond, false_cond)));
20166 false_cond = true_cond;
20167 true_cond = temp;
20169 temp = gen_reg_rtx (compare_mode);
20170 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20171 op0 = temp;
20172 break;
20174 case GT:
20175 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20176 temp = gen_reg_rtx (result_mode);
20177 emit_insn (gen_rtx_SET (VOIDmode, temp,
20178 gen_rtx_IF_THEN_ELSE (result_mode,
20179 gen_rtx_GE (VOIDmode,
20180 op0, op1),
20181 true_cond, false_cond)));
20182 true_cond = false_cond;
20183 false_cond = temp;
20185 temp = gen_reg_rtx (compare_mode);
20186 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20187 op0 = temp;
20188 break;
20190 default:
20191 gcc_unreachable ();
20194 emit_insn (gen_rtx_SET (VOIDmode, dest,
20195 gen_rtx_IF_THEN_ELSE (result_mode,
20196 gen_rtx_GE (VOIDmode,
20197 op0, op1),
20198 true_cond, false_cond)));
20199 return 1;
20202 /* Same as above, but for ints (isel). */
20204 static int
20205 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20207 rtx condition_rtx, cr;
20208 machine_mode mode = GET_MODE (dest);
20209 enum rtx_code cond_code;
20210 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20211 bool signedp;
20213 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20214 return 0;
20216 /* We still have to do the compare, because isel doesn't do a
20217 compare, it just looks at the CRx bits set by a previous compare
20218 instruction. */
20219 condition_rtx = rs6000_generate_compare (op, mode);
20220 cond_code = GET_CODE (condition_rtx);
20221 cr = XEXP (condition_rtx, 0);
20222 signedp = GET_MODE (cr) == CCmode;
20224 isel_func = (mode == SImode
20225 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20226 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20228 switch (cond_code)
20230 case LT: case GT: case LTU: case GTU: case EQ:
20231 /* isel handles these directly. */
20232 break;
20234 default:
20235 /* We need to swap the sense of the comparison. */
20237 std::swap (false_cond, true_cond);
20238 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20240 break;
20243 false_cond = force_reg (mode, false_cond);
20244 if (true_cond != const0_rtx)
20245 true_cond = force_reg (mode, true_cond);
20247 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20249 return 1;
20252 const char *
20253 output_isel (rtx *operands)
20255 enum rtx_code code;
20257 code = GET_CODE (operands[1]);
20259 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20261 gcc_assert (GET_CODE (operands[2]) == REG
20262 && GET_CODE (operands[3]) == REG);
20263 PUT_CODE (operands[1], reverse_condition (code));
20264 return "isel %0,%3,%2,%j1";
20267 return "isel %0,%2,%3,%j1";
20270 void
20271 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20273 machine_mode mode = GET_MODE (op0);
20274 enum rtx_code c;
20275 rtx target;
20277 /* VSX/altivec have direct min/max insns. */
20278 if ((code == SMAX || code == SMIN)
20279 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20280 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20282 emit_insn (gen_rtx_SET (VOIDmode,
20283 dest,
20284 gen_rtx_fmt_ee (code, mode, op0, op1)));
20285 return;
20288 if (code == SMAX || code == SMIN)
20289 c = GE;
20290 else
20291 c = GEU;
20293 if (code == SMAX || code == UMAX)
20294 target = emit_conditional_move (dest, c, op0, op1, mode,
20295 op0, op1, mode, 0);
20296 else
20297 target = emit_conditional_move (dest, c, op0, op1, mode,
20298 op1, op0, mode, 0);
20299 gcc_assert (target);
20300 if (target != dest)
20301 emit_move_insn (dest, target);
20304 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20305 COND is true. Mark the jump as unlikely to be taken. */
20307 static void
20308 emit_unlikely_jump (rtx cond, rtx label)
20310 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20311 rtx x;
20313 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20314 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20315 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20318 /* A subroutine of the atomic operation splitters. Emit a load-locked
20319 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20320 the zero_extend operation. */
20322 static void
20323 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20325 rtx (*fn) (rtx, rtx) = NULL;
20327 switch (mode)
20329 case QImode:
20330 fn = gen_load_lockedqi;
20331 break;
20332 case HImode:
20333 fn = gen_load_lockedhi;
20334 break;
20335 case SImode:
20336 if (GET_MODE (mem) == QImode)
20337 fn = gen_load_lockedqi_si;
20338 else if (GET_MODE (mem) == HImode)
20339 fn = gen_load_lockedhi_si;
20340 else
20341 fn = gen_load_lockedsi;
20342 break;
20343 case DImode:
20344 fn = gen_load_lockeddi;
20345 break;
20346 case TImode:
20347 fn = gen_load_lockedti;
20348 break;
20349 default:
20350 gcc_unreachable ();
20352 emit_insn (fn (reg, mem));
20355 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20356 instruction in MODE. */
20358 static void
20359 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20361 rtx (*fn) (rtx, rtx, rtx) = NULL;
20363 switch (mode)
20365 case QImode:
20366 fn = gen_store_conditionalqi;
20367 break;
20368 case HImode:
20369 fn = gen_store_conditionalhi;
20370 break;
20371 case SImode:
20372 fn = gen_store_conditionalsi;
20373 break;
20374 case DImode:
20375 fn = gen_store_conditionaldi;
20376 break;
20377 case TImode:
20378 fn = gen_store_conditionalti;
20379 break;
20380 default:
20381 gcc_unreachable ();
20384 /* Emit sync before stwcx. to address PPC405 Erratum. */
20385 if (PPC405_ERRATUM77)
20386 emit_insn (gen_hwsync ());
20388 emit_insn (fn (res, mem, val));
20391 /* Expand barriers before and after a load_locked/store_cond sequence. */
20393 static rtx
20394 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20396 rtx addr = XEXP (mem, 0);
20397 int strict_p = (reload_in_progress || reload_completed);
20399 if (!legitimate_indirect_address_p (addr, strict_p)
20400 && !legitimate_indexed_address_p (addr, strict_p))
20402 addr = force_reg (Pmode, addr);
20403 mem = replace_equiv_address_nv (mem, addr);
20406 switch (model)
20408 case MEMMODEL_RELAXED:
20409 case MEMMODEL_CONSUME:
20410 case MEMMODEL_ACQUIRE:
20411 break;
20412 case MEMMODEL_RELEASE:
20413 case MEMMODEL_ACQ_REL:
20414 emit_insn (gen_lwsync ());
20415 break;
20416 case MEMMODEL_SEQ_CST:
20417 emit_insn (gen_hwsync ());
20418 break;
20419 default:
20420 gcc_unreachable ();
20422 return mem;
20425 static void
20426 rs6000_post_atomic_barrier (enum memmodel model)
20428 switch (model)
20430 case MEMMODEL_RELAXED:
20431 case MEMMODEL_CONSUME:
20432 case MEMMODEL_RELEASE:
20433 break;
20434 case MEMMODEL_ACQUIRE:
20435 case MEMMODEL_ACQ_REL:
20436 case MEMMODEL_SEQ_CST:
20437 emit_insn (gen_isync ());
20438 break;
20439 default:
20440 gcc_unreachable ();
20444 /* A subroutine of the various atomic expanders. For sub-word operations,
20445 we must adjust things to operate on SImode. Given the original MEM,
20446 return a new aligned memory. Also build and return the quantities by
20447 which to shift and mask. */
20449 static rtx
20450 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20452 rtx addr, align, shift, mask, mem;
20453 HOST_WIDE_INT shift_mask;
20454 machine_mode mode = GET_MODE (orig_mem);
20456 /* For smaller modes, we have to implement this via SImode. */
20457 shift_mask = (mode == QImode ? 0x18 : 0x10);
20459 addr = XEXP (orig_mem, 0);
20460 addr = force_reg (GET_MODE (addr), addr);
20462 /* Aligned memory containing subword. Generate a new memory. We
20463 do not want any of the existing MEM_ATTR data, as we're now
20464 accessing memory outside the original object. */
20465 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20466 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20467 mem = gen_rtx_MEM (SImode, align);
20468 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20469 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20470 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20472 /* Shift amount for subword relative to aligned word. */
20473 shift = gen_reg_rtx (SImode);
20474 addr = gen_lowpart (SImode, addr);
20475 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20476 if (BYTES_BIG_ENDIAN)
20477 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20478 shift, 1, OPTAB_LIB_WIDEN);
20479 *pshift = shift;
20481 /* Mask for insertion. */
20482 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20483 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20484 *pmask = mask;
20486 return mem;
20489 /* A subroutine of the various atomic expanders. For sub-word operands,
20490 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20492 static rtx
20493 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20495 rtx x;
20497 x = gen_reg_rtx (SImode);
20498 emit_insn (gen_rtx_SET (VOIDmode, x,
20499 gen_rtx_AND (SImode,
20500 gen_rtx_NOT (SImode, mask),
20501 oldval)));
20503 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20505 return x;
20508 /* A subroutine of the various atomic expanders. For sub-word operands,
20509 extract WIDE to NARROW via SHIFT. */
20511 static void
20512 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20514 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20515 wide, 1, OPTAB_LIB_WIDEN);
20516 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20519 /* Expand an atomic compare and swap operation. */
20521 void
20522 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20524 rtx boolval, retval, mem, oldval, newval, cond;
20525 rtx label1, label2, x, mask, shift;
20526 machine_mode mode, orig_mode;
20527 enum memmodel mod_s, mod_f;
20528 bool is_weak;
20530 boolval = operands[0];
20531 retval = operands[1];
20532 mem = operands[2];
20533 oldval = operands[3];
20534 newval = operands[4];
20535 is_weak = (INTVAL (operands[5]) != 0);
20536 mod_s = (enum memmodel) INTVAL (operands[6]);
20537 mod_f = (enum memmodel) INTVAL (operands[7]);
20538 orig_mode = mode = GET_MODE (mem);
20540 mask = shift = NULL_RTX;
20541 if (mode == QImode || mode == HImode)
20543 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20544 lwarx and shift/mask operations. With power8, we need to do the
20545 comparison in SImode, but the store is still done in QI/HImode. */
20546 oldval = convert_modes (SImode, mode, oldval, 1);
20548 if (!TARGET_SYNC_HI_QI)
20550 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20552 /* Shift and mask OLDVAL into position with the word. */
20553 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20554 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20556 /* Shift and mask NEWVAL into position within the word. */
20557 newval = convert_modes (SImode, mode, newval, 1);
20558 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20559 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20562 /* Prepare to adjust the return value. */
20563 retval = gen_reg_rtx (SImode);
20564 mode = SImode;
20566 else if (reg_overlap_mentioned_p (retval, oldval))
20567 oldval = copy_to_reg (oldval);
20569 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20571 label1 = NULL_RTX;
20572 if (!is_weak)
20574 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20575 emit_label (XEXP (label1, 0));
20577 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20579 emit_load_locked (mode, retval, mem);
20581 x = retval;
20582 if (mask)
20584 x = expand_simple_binop (SImode, AND, retval, mask,
20585 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20588 cond = gen_reg_rtx (CCmode);
20589 /* If we have TImode, synthesize a comparison. */
20590 if (mode != TImode)
20591 x = gen_rtx_COMPARE (CCmode, x, oldval);
20592 else
20594 rtx xor1_result = gen_reg_rtx (DImode);
20595 rtx xor2_result = gen_reg_rtx (DImode);
20596 rtx or_result = gen_reg_rtx (DImode);
20597 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20598 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20599 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20600 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20602 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20603 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20604 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20605 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20608 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20610 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20611 emit_unlikely_jump (x, label2);
20613 x = newval;
20614 if (mask)
20615 x = rs6000_mask_atomic_subword (retval, newval, mask);
20617 emit_store_conditional (orig_mode, cond, mem, x);
20619 if (!is_weak)
20621 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20622 emit_unlikely_jump (x, label1);
20625 if (mod_f != MEMMODEL_RELAXED)
20626 emit_label (XEXP (label2, 0));
20628 rs6000_post_atomic_barrier (mod_s);
20630 if (mod_f == MEMMODEL_RELAXED)
20631 emit_label (XEXP (label2, 0));
20633 if (shift)
20634 rs6000_finish_atomic_subword (operands[1], retval, shift);
20635 else if (mode != GET_MODE (operands[1]))
20636 convert_move (operands[1], retval, 1);
20638 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20639 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20640 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20643 /* Expand an atomic exchange operation. */
20645 void
20646 rs6000_expand_atomic_exchange (rtx operands[])
20648 rtx retval, mem, val, cond;
20649 machine_mode mode;
20650 enum memmodel model;
20651 rtx label, x, mask, shift;
20653 retval = operands[0];
20654 mem = operands[1];
20655 val = operands[2];
20656 model = (enum memmodel) INTVAL (operands[3]);
20657 mode = GET_MODE (mem);
20659 mask = shift = NULL_RTX;
20660 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20662 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20664 /* Shift and mask VAL into position with the word. */
20665 val = convert_modes (SImode, mode, val, 1);
20666 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20667 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20669 /* Prepare to adjust the return value. */
20670 retval = gen_reg_rtx (SImode);
20671 mode = SImode;
20674 mem = rs6000_pre_atomic_barrier (mem, model);
20676 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20677 emit_label (XEXP (label, 0));
20679 emit_load_locked (mode, retval, mem);
20681 x = val;
20682 if (mask)
20683 x = rs6000_mask_atomic_subword (retval, val, mask);
20685 cond = gen_reg_rtx (CCmode);
20686 emit_store_conditional (mode, cond, mem, x);
20688 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20689 emit_unlikely_jump (x, label);
20691 rs6000_post_atomic_barrier (model);
20693 if (shift)
20694 rs6000_finish_atomic_subword (operands[0], retval, shift);
20697 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20698 to perform. MEM is the memory on which to operate. VAL is the second
20699 operand of the binary operator. BEFORE and AFTER are optional locations to
20700 return the value of MEM either before of after the operation. MODEL_RTX
20701 is a CONST_INT containing the memory model to use. */
20703 void
20704 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20705 rtx orig_before, rtx orig_after, rtx model_rtx)
20707 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20708 machine_mode mode = GET_MODE (mem);
20709 machine_mode store_mode = mode;
20710 rtx label, x, cond, mask, shift;
20711 rtx before = orig_before, after = orig_after;
20713 mask = shift = NULL_RTX;
20714 /* On power8, we want to use SImode for the operation. On previous systems,
20715 use the operation in a subword and shift/mask to get the proper byte or
20716 halfword. */
20717 if (mode == QImode || mode == HImode)
20719 if (TARGET_SYNC_HI_QI)
20721 val = convert_modes (SImode, mode, val, 1);
20723 /* Prepare to adjust the return value. */
20724 before = gen_reg_rtx (SImode);
20725 if (after)
20726 after = gen_reg_rtx (SImode);
20727 mode = SImode;
20729 else
20731 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20733 /* Shift and mask VAL into position with the word. */
20734 val = convert_modes (SImode, mode, val, 1);
20735 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20736 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20738 switch (code)
20740 case IOR:
20741 case XOR:
20742 /* We've already zero-extended VAL. That is sufficient to
20743 make certain that it does not affect other bits. */
20744 mask = NULL;
20745 break;
20747 case AND:
20748 /* If we make certain that all of the other bits in VAL are
20749 set, that will be sufficient to not affect other bits. */
20750 x = gen_rtx_NOT (SImode, mask);
20751 x = gen_rtx_IOR (SImode, x, val);
20752 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20753 mask = NULL;
20754 break;
20756 case NOT:
20757 case PLUS:
20758 case MINUS:
20759 /* These will all affect bits outside the field and need
20760 adjustment via MASK within the loop. */
20761 break;
20763 default:
20764 gcc_unreachable ();
20767 /* Prepare to adjust the return value. */
20768 before = gen_reg_rtx (SImode);
20769 if (after)
20770 after = gen_reg_rtx (SImode);
20771 store_mode = mode = SImode;
20775 mem = rs6000_pre_atomic_barrier (mem, model);
20777 label = gen_label_rtx ();
20778 emit_label (label);
20779 label = gen_rtx_LABEL_REF (VOIDmode, label);
20781 if (before == NULL_RTX)
20782 before = gen_reg_rtx (mode);
20784 emit_load_locked (mode, before, mem);
20786 if (code == NOT)
20788 x = expand_simple_binop (mode, AND, before, val,
20789 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20790 after = expand_simple_unop (mode, NOT, x, after, 1);
20792 else
20794 after = expand_simple_binop (mode, code, before, val,
20795 after, 1, OPTAB_LIB_WIDEN);
20798 x = after;
20799 if (mask)
20801 x = expand_simple_binop (SImode, AND, after, mask,
20802 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20803 x = rs6000_mask_atomic_subword (before, x, mask);
20805 else if (store_mode != mode)
20806 x = convert_modes (store_mode, mode, x, 1);
20808 cond = gen_reg_rtx (CCmode);
20809 emit_store_conditional (store_mode, cond, mem, x);
20811 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20812 emit_unlikely_jump (x, label);
20814 rs6000_post_atomic_barrier (model);
20816 if (shift)
20818 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20819 then do the calcuations in a SImode register. */
20820 if (orig_before)
20821 rs6000_finish_atomic_subword (orig_before, before, shift);
20822 if (orig_after)
20823 rs6000_finish_atomic_subword (orig_after, after, shift);
20825 else if (store_mode != mode)
20827 /* QImode/HImode on machines with lbarx/lharx where we do the native
20828 operation and then do the calcuations in a SImode register. */
20829 if (orig_before)
20830 convert_move (orig_before, before, 1);
20831 if (orig_after)
20832 convert_move (orig_after, after, 1);
20834 else if (orig_after && after != orig_after)
20835 emit_move_insn (orig_after, after);
20838 /* Emit instructions to move SRC to DST. Called by splitters for
20839 multi-register moves. It will emit at most one instruction for
20840 each register that is accessed; that is, it won't emit li/lis pairs
20841 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20842 register. */
20844 void
20845 rs6000_split_multireg_move (rtx dst, rtx src)
20847 /* The register number of the first register being moved. */
20848 int reg;
20849 /* The mode that is to be moved. */
20850 machine_mode mode;
20851 /* The mode that the move is being done in, and its size. */
20852 machine_mode reg_mode;
20853 int reg_mode_size;
20854 /* The number of registers that will be moved. */
20855 int nregs;
20857 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20858 mode = GET_MODE (dst);
20859 nregs = hard_regno_nregs[reg][mode];
20860 if (FP_REGNO_P (reg))
20861 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20862 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20863 else if (ALTIVEC_REGNO_P (reg))
20864 reg_mode = V16QImode;
20865 else if (TARGET_E500_DOUBLE && mode == TFmode)
20866 reg_mode = DFmode;
20867 else
20868 reg_mode = word_mode;
20869 reg_mode_size = GET_MODE_SIZE (reg_mode);
20871 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20873 /* TDmode residing in FP registers is special, since the ISA requires that
20874 the lower-numbered word of a register pair is always the most significant
20875 word, even in little-endian mode. This does not match the usual subreg
20876 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20877 the appropriate constituent registers "by hand" in little-endian mode.
20879 Note we do not need to check for destructive overlap here since TDmode
20880 can only reside in even/odd register pairs. */
20881 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20883 rtx p_src, p_dst;
20884 int i;
20886 for (i = 0; i < nregs; i++)
20888 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20889 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20890 else
20891 p_src = simplify_gen_subreg (reg_mode, src, mode,
20892 i * reg_mode_size);
20894 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20895 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20896 else
20897 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20898 i * reg_mode_size);
20900 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20903 return;
20906 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20908 /* Move register range backwards, if we might have destructive
20909 overlap. */
20910 int i;
20911 for (i = nregs - 1; i >= 0; i--)
20912 emit_insn (gen_rtx_SET (VOIDmode,
20913 simplify_gen_subreg (reg_mode, dst, mode,
20914 i * reg_mode_size),
20915 simplify_gen_subreg (reg_mode, src, mode,
20916 i * reg_mode_size)));
20918 else
20920 int i;
20921 int j = -1;
20922 bool used_update = false;
20923 rtx restore_basereg = NULL_RTX;
20925 if (MEM_P (src) && INT_REGNO_P (reg))
20927 rtx breg;
20929 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20930 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20932 rtx delta_rtx;
20933 breg = XEXP (XEXP (src, 0), 0);
20934 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20935 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20936 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20937 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20938 src = replace_equiv_address (src, breg);
20940 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20942 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20944 rtx basereg = XEXP (XEXP (src, 0), 0);
20945 if (TARGET_UPDATE)
20947 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20948 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20949 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20950 used_update = true;
20952 else
20953 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20954 XEXP (XEXP (src, 0), 1)));
20955 src = replace_equiv_address (src, basereg);
20957 else
20959 rtx basereg = gen_rtx_REG (Pmode, reg);
20960 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20961 src = replace_equiv_address (src, basereg);
20965 breg = XEXP (src, 0);
20966 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20967 breg = XEXP (breg, 0);
20969 /* If the base register we are using to address memory is
20970 also a destination reg, then change that register last. */
20971 if (REG_P (breg)
20972 && REGNO (breg) >= REGNO (dst)
20973 && REGNO (breg) < REGNO (dst) + nregs)
20974 j = REGNO (breg) - REGNO (dst);
20976 else if (MEM_P (dst) && INT_REGNO_P (reg))
20978 rtx breg;
20980 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20981 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20983 rtx delta_rtx;
20984 breg = XEXP (XEXP (dst, 0), 0);
20985 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20986 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20987 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20989 /* We have to update the breg before doing the store.
20990 Use store with update, if available. */
20992 if (TARGET_UPDATE)
20994 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20995 emit_insn (TARGET_32BIT
20996 ? (TARGET_POWERPC64
20997 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20998 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20999 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
21000 used_update = true;
21002 else
21003 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
21004 dst = replace_equiv_address (dst, breg);
21006 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
21007 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
21009 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
21011 rtx basereg = XEXP (XEXP (dst, 0), 0);
21012 if (TARGET_UPDATE)
21014 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21015 emit_insn (gen_rtx_SET (VOIDmode,
21016 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
21017 used_update = true;
21019 else
21020 emit_insn (gen_rtx_SET (VOIDmode, basereg,
21021 XEXP (XEXP (dst, 0), 1)));
21022 dst = replace_equiv_address (dst, basereg);
21024 else
21026 rtx basereg = XEXP (XEXP (dst, 0), 0);
21027 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
21028 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
21029 && REG_P (basereg)
21030 && REG_P (offsetreg)
21031 && REGNO (basereg) != REGNO (offsetreg));
21032 if (REGNO (basereg) == 0)
21034 rtx tmp = offsetreg;
21035 offsetreg = basereg;
21036 basereg = tmp;
21038 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
21039 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
21040 dst = replace_equiv_address (dst, basereg);
21043 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
21044 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
21047 for (i = 0; i < nregs; i++)
21049 /* Calculate index to next subword. */
21050 ++j;
21051 if (j == nregs)
21052 j = 0;
21054 /* If compiler already emitted move of first word by
21055 store with update, no need to do anything. */
21056 if (j == 0 && used_update)
21057 continue;
21059 emit_insn (gen_rtx_SET (VOIDmode,
21060 simplify_gen_subreg (reg_mode, dst, mode,
21061 j * reg_mode_size),
21062 simplify_gen_subreg (reg_mode, src, mode,
21063 j * reg_mode_size)));
21065 if (restore_basereg != NULL_RTX)
21066 emit_insn (restore_basereg);
21071 /* This page contains routines that are used to determine what the
21072 function prologue and epilogue code will do and write them out. */
21074 static inline bool
21075 save_reg_p (int r)
21077 return !call_used_regs[r] && df_regs_ever_live_p (r);
21080 /* Return the first fixed-point register that is required to be
21081 saved. 32 if none. */
21084 first_reg_to_save (void)
21086 int first_reg;
21088 /* Find lowest numbered live register. */
21089 for (first_reg = 13; first_reg <= 31; first_reg++)
21090 if (save_reg_p (first_reg))
21091 break;
21093 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
21094 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21095 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
21096 || (TARGET_TOC && TARGET_MINIMAL_TOC))
21097 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21098 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
21100 #if TARGET_MACHO
21101 if (flag_pic
21102 && crtl->uses_pic_offset_table
21103 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
21104 return RS6000_PIC_OFFSET_TABLE_REGNUM;
21105 #endif
21107 return first_reg;
21110 /* Similar, for FP regs. */
21113 first_fp_reg_to_save (void)
21115 int first_reg;
21117 /* Find lowest numbered live register. */
21118 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21119 if (save_reg_p (first_reg))
21120 break;
21122 return first_reg;
21125 /* Similar, for AltiVec regs. */
21127 static int
21128 first_altivec_reg_to_save (void)
21130 int i;
21132 /* Stack frame remains as is unless we are in AltiVec ABI. */
21133 if (! TARGET_ALTIVEC_ABI)
21134 return LAST_ALTIVEC_REGNO + 1;
21136 /* On Darwin, the unwind routines are compiled without
21137 TARGET_ALTIVEC, and use save_world to save/restore the
21138 altivec registers when necessary. */
21139 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21140 && ! TARGET_ALTIVEC)
21141 return FIRST_ALTIVEC_REGNO + 20;
21143 /* Find lowest numbered live register. */
21144 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21145 if (save_reg_p (i))
21146 break;
21148 return i;
21151 /* Return a 32-bit mask of the AltiVec registers we need to set in
21152 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21153 the 32-bit word is 0. */
21155 static unsigned int
21156 compute_vrsave_mask (void)
21158 unsigned int i, mask = 0;
21160 /* On Darwin, the unwind routines are compiled without
21161 TARGET_ALTIVEC, and use save_world to save/restore the
21162 call-saved altivec registers when necessary. */
21163 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21164 && ! TARGET_ALTIVEC)
21165 mask |= 0xFFF;
21167 /* First, find out if we use _any_ altivec registers. */
21168 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21169 if (df_regs_ever_live_p (i))
21170 mask |= ALTIVEC_REG_BIT (i);
21172 if (mask == 0)
21173 return mask;
21175 /* Next, remove the argument registers from the set. These must
21176 be in the VRSAVE mask set by the caller, so we don't need to add
21177 them in again. More importantly, the mask we compute here is
21178 used to generate CLOBBERs in the set_vrsave insn, and we do not
21179 wish the argument registers to die. */
21180 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
21181 mask &= ~ALTIVEC_REG_BIT (i);
21183 /* Similarly, remove the return value from the set. */
21185 bool yes = false;
21186 diddle_return_value (is_altivec_return_reg, &yes);
21187 if (yes)
21188 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21191 return mask;
21194 /* For a very restricted set of circumstances, we can cut down the
21195 size of prologues/epilogues by calling our own save/restore-the-world
21196 routines. */
21198 static void
21199 compute_save_world_info (rs6000_stack_t *info_ptr)
21201 info_ptr->world_save_p = 1;
21202 info_ptr->world_save_p
21203 = (WORLD_SAVE_P (info_ptr)
21204 && DEFAULT_ABI == ABI_DARWIN
21205 && !cfun->has_nonlocal_label
21206 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21207 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21208 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21209 && info_ptr->cr_save_p);
21211 /* This will not work in conjunction with sibcalls. Make sure there
21212 are none. (This check is expensive, but seldom executed.) */
21213 if (WORLD_SAVE_P (info_ptr))
21215 rtx_insn *insn;
21216 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21217 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21219 info_ptr->world_save_p = 0;
21220 break;
21224 if (WORLD_SAVE_P (info_ptr))
21226 /* Even if we're not touching VRsave, make sure there's room on the
21227 stack for it, if it looks like we're calling SAVE_WORLD, which
21228 will attempt to save it. */
21229 info_ptr->vrsave_size = 4;
21231 /* If we are going to save the world, we need to save the link register too. */
21232 info_ptr->lr_save_p = 1;
21234 /* "Save" the VRsave register too if we're saving the world. */
21235 if (info_ptr->vrsave_mask == 0)
21236 info_ptr->vrsave_mask = compute_vrsave_mask ();
21238 /* Because the Darwin register save/restore routines only handle
21239 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21240 check. */
21241 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21242 && (info_ptr->first_altivec_reg_save
21243 >= FIRST_SAVED_ALTIVEC_REGNO));
21245 return;
21249 static void
21250 is_altivec_return_reg (rtx reg, void *xyes)
21252 bool *yes = (bool *) xyes;
21253 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21254 *yes = true;
21258 /* Look for user-defined global regs in the range FIRST to LAST-1.
21259 We should not restore these, and so cannot use lmw or out-of-line
21260 restore functions if there are any. We also can't save them
21261 (well, emit frame notes for them), because frame unwinding during
21262 exception handling will restore saved registers. */
21264 static bool
21265 global_regs_p (unsigned first, unsigned last)
21267 while (first < last)
21268 if (global_regs[first++])
21269 return true;
21270 return false;
21273 /* Determine the strategy for savings/restoring registers. */
21275 enum {
21276 SAVRES_MULTIPLE = 0x1,
21277 SAVE_INLINE_FPRS = 0x2,
21278 SAVE_INLINE_GPRS = 0x4,
21279 REST_INLINE_FPRS = 0x8,
21280 REST_INLINE_GPRS = 0x10,
21281 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21282 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21283 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21284 SAVE_INLINE_VRS = 0x100,
21285 REST_INLINE_VRS = 0x200
21288 static int
21289 rs6000_savres_strategy (rs6000_stack_t *info,
21290 bool using_static_chain_p)
21292 int strategy = 0;
21293 bool lr_save_p;
21295 if (TARGET_MULTIPLE
21296 && !TARGET_POWERPC64
21297 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21298 && info->first_gp_reg_save < 31
21299 && !global_regs_p (info->first_gp_reg_save, 32))
21300 strategy |= SAVRES_MULTIPLE;
21302 if (crtl->calls_eh_return
21303 || cfun->machine->ra_need_lr)
21304 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21305 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21306 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21308 if (info->first_fp_reg_save == 64
21309 /* The out-of-line FP routines use double-precision stores;
21310 we can't use those routines if we don't have such stores. */
21311 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21312 || global_regs_p (info->first_fp_reg_save, 64))
21313 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21315 if (info->first_gp_reg_save == 32
21316 || (!(strategy & SAVRES_MULTIPLE)
21317 && global_regs_p (info->first_gp_reg_save, 32)))
21318 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21320 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21321 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21322 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21324 /* Define cutoff for using out-of-line functions to save registers. */
21325 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21327 if (!optimize_size)
21329 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21330 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21331 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21333 else
21335 /* Prefer out-of-line restore if it will exit. */
21336 if (info->first_fp_reg_save > 61)
21337 strategy |= SAVE_INLINE_FPRS;
21338 if (info->first_gp_reg_save > 29)
21340 if (info->first_fp_reg_save == 64)
21341 strategy |= SAVE_INLINE_GPRS;
21342 else
21343 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21345 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21346 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21349 else if (DEFAULT_ABI == ABI_DARWIN)
21351 if (info->first_fp_reg_save > 60)
21352 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21353 if (info->first_gp_reg_save > 29)
21354 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21355 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21357 else
21359 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21360 if (info->first_fp_reg_save > 61)
21361 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21362 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21363 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21366 /* Don't bother to try to save things out-of-line if r11 is occupied
21367 by the static chain. It would require too much fiddling and the
21368 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21369 pointer on Darwin, and AIX uses r1 or r12. */
21370 if (using_static_chain_p
21371 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21372 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21373 | SAVE_INLINE_GPRS
21374 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21376 /* We can only use the out-of-line routines to restore if we've
21377 saved all the registers from first_fp_reg_save in the prologue.
21378 Otherwise, we risk loading garbage. */
21379 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21381 int i;
21383 for (i = info->first_fp_reg_save; i < 64; i++)
21384 if (!save_reg_p (i))
21386 strategy |= REST_INLINE_FPRS;
21387 break;
21391 /* If we are going to use store multiple, then don't even bother
21392 with the out-of-line routines, since the store-multiple
21393 instruction will always be smaller. */
21394 if ((strategy & SAVRES_MULTIPLE))
21395 strategy |= SAVE_INLINE_GPRS;
21397 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21398 saved is an out-of-line save or restore. Set up the value for
21399 the next test (excluding out-of-line gpr restore). */
21400 lr_save_p = (info->lr_save_p
21401 || !(strategy & SAVE_INLINE_GPRS)
21402 || !(strategy & SAVE_INLINE_FPRS)
21403 || !(strategy & SAVE_INLINE_VRS)
21404 || !(strategy & REST_INLINE_FPRS)
21405 || !(strategy & REST_INLINE_VRS));
21407 /* The situation is more complicated with load multiple. We'd
21408 prefer to use the out-of-line routines for restores, since the
21409 "exit" out-of-line routines can handle the restore of LR and the
21410 frame teardown. However if doesn't make sense to use the
21411 out-of-line routine if that is the only reason we'd need to save
21412 LR, and we can't use the "exit" out-of-line gpr restore if we
21413 have saved some fprs; In those cases it is advantageous to use
21414 load multiple when available. */
21415 if ((strategy & SAVRES_MULTIPLE)
21416 && (!lr_save_p
21417 || info->first_fp_reg_save != 64))
21418 strategy |= REST_INLINE_GPRS;
21420 /* Saving CR interferes with the exit routines used on the SPE, so
21421 just punt here. */
21422 if (TARGET_SPE_ABI
21423 && info->spe_64bit_regs_used
21424 && info->cr_save_p)
21425 strategy |= REST_INLINE_GPRS;
21427 /* We can only use load multiple or the out-of-line routines to
21428 restore if we've used store multiple or out-of-line routines
21429 in the prologue, i.e. if we've saved all the registers from
21430 first_gp_reg_save. Otherwise, we risk loading garbage. */
21431 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21432 == SAVE_INLINE_GPRS)
21434 int i;
21436 for (i = info->first_gp_reg_save; i < 32; i++)
21437 if (!save_reg_p (i))
21439 strategy |= REST_INLINE_GPRS;
21440 break;
21444 if (TARGET_ELF && TARGET_64BIT)
21446 if (!(strategy & SAVE_INLINE_FPRS))
21447 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21448 else if (!(strategy & SAVE_INLINE_GPRS)
21449 && info->first_fp_reg_save == 64)
21450 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21452 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21453 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21455 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21456 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21458 return strategy;
21461 /* Calculate the stack information for the current function. This is
21462 complicated by having two separate calling sequences, the AIX calling
21463 sequence and the V.4 calling sequence.
21465 AIX (and Darwin/Mac OS X) stack frames look like:
21466 32-bit 64-bit
21467 SP----> +---------------------------------------+
21468 | back chain to caller | 0 0
21469 +---------------------------------------+
21470 | saved CR | 4 8 (8-11)
21471 +---------------------------------------+
21472 | saved LR | 8 16
21473 +---------------------------------------+
21474 | reserved for compilers | 12 24
21475 +---------------------------------------+
21476 | reserved for binders | 16 32
21477 +---------------------------------------+
21478 | saved TOC pointer | 20 40
21479 +---------------------------------------+
21480 | Parameter save area (P) | 24 48
21481 +---------------------------------------+
21482 | Alloca space (A) | 24+P etc.
21483 +---------------------------------------+
21484 | Local variable space (L) | 24+P+A
21485 +---------------------------------------+
21486 | Float/int conversion temporary (X) | 24+P+A+L
21487 +---------------------------------------+
21488 | Save area for AltiVec registers (W) | 24+P+A+L+X
21489 +---------------------------------------+
21490 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21491 +---------------------------------------+
21492 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21493 +---------------------------------------+
21494 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21495 +---------------------------------------+
21496 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21497 +---------------------------------------+
21498 old SP->| back chain to caller's caller |
21499 +---------------------------------------+
21501 The required alignment for AIX configurations is two words (i.e., 8
21502 or 16 bytes).
21504 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21506 SP----> +---------------------------------------+
21507 | Back chain to caller | 0
21508 +---------------------------------------+
21509 | Save area for CR | 8
21510 +---------------------------------------+
21511 | Saved LR | 16
21512 +---------------------------------------+
21513 | Saved TOC pointer | 24
21514 +---------------------------------------+
21515 | Parameter save area (P) | 32
21516 +---------------------------------------+
21517 | Alloca space (A) | 32+P
21518 +---------------------------------------+
21519 | Local variable space (L) | 32+P+A
21520 +---------------------------------------+
21521 | Save area for AltiVec registers (W) | 32+P+A+L
21522 +---------------------------------------+
21523 | AltiVec alignment padding (Y) | 32+P+A+L+W
21524 +---------------------------------------+
21525 | Save area for GP registers (G) | 32+P+A+L+W+Y
21526 +---------------------------------------+
21527 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21528 +---------------------------------------+
21529 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21530 +---------------------------------------+
21533 V.4 stack frames look like:
21535 SP----> +---------------------------------------+
21536 | back chain to caller | 0
21537 +---------------------------------------+
21538 | caller's saved LR | 4
21539 +---------------------------------------+
21540 | Parameter save area (P) | 8
21541 +---------------------------------------+
21542 | Alloca space (A) | 8+P
21543 +---------------------------------------+
21544 | Varargs save area (V) | 8+P+A
21545 +---------------------------------------+
21546 | Local variable space (L) | 8+P+A+V
21547 +---------------------------------------+
21548 | Float/int conversion temporary (X) | 8+P+A+V+L
21549 +---------------------------------------+
21550 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21551 +---------------------------------------+
21552 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21553 +---------------------------------------+
21554 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21555 +---------------------------------------+
21556 | SPE: area for 64-bit GP registers |
21557 +---------------------------------------+
21558 | SPE alignment padding |
21559 +---------------------------------------+
21560 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21561 +---------------------------------------+
21562 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21563 +---------------------------------------+
21564 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21565 +---------------------------------------+
21566 old SP->| back chain to caller's caller |
21567 +---------------------------------------+
21569 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21570 given. (But note below and in sysv4.h that we require only 8 and
21571 may round up the size of our stack frame anyways. The historical
21572 reason is early versions of powerpc-linux which didn't properly
21573 align the stack at program startup. A happy side-effect is that
21574 -mno-eabi libraries can be used with -meabi programs.)
21576 The EABI configuration defaults to the V.4 layout. However,
21577 the stack alignment requirements may differ. If -mno-eabi is not
21578 given, the required stack alignment is 8 bytes; if -mno-eabi is
21579 given, the required alignment is 16 bytes. (But see V.4 comment
21580 above.) */
21582 #ifndef ABI_STACK_BOUNDARY
21583 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21584 #endif
21586 static rs6000_stack_t *
21587 rs6000_stack_info (void)
21589 /* We should never be called for thunks, we are not set up for that. */
21590 gcc_assert (!cfun->is_thunk);
21592 rs6000_stack_t *info_ptr = &stack_info;
21593 int reg_size = TARGET_32BIT ? 4 : 8;
21594 int ehrd_size;
21595 int ehcr_size;
21596 int save_align;
21597 int first_gp;
21598 HOST_WIDE_INT non_fixed_size;
21599 bool using_static_chain_p;
21601 if (reload_completed && info_ptr->reload_completed)
21602 return info_ptr;
21604 memset (info_ptr, 0, sizeof (*info_ptr));
21605 info_ptr->reload_completed = reload_completed;
21607 if (TARGET_SPE)
21609 /* Cache value so we don't rescan instruction chain over and over. */
21610 if (cfun->machine->insn_chain_scanned_p == 0)
21611 cfun->machine->insn_chain_scanned_p
21612 = spe_func_has_64bit_regs_p () + 1;
21613 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21616 /* Select which calling sequence. */
21617 info_ptr->abi = DEFAULT_ABI;
21619 /* Calculate which registers need to be saved & save area size. */
21620 info_ptr->first_gp_reg_save = first_reg_to_save ();
21621 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21622 even if it currently looks like we won't. Reload may need it to
21623 get at a constant; if so, it will have already created a constant
21624 pool entry for it. */
21625 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21626 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21627 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21628 && crtl->uses_const_pool
21629 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21630 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21631 else
21632 first_gp = info_ptr->first_gp_reg_save;
21634 info_ptr->gp_size = reg_size * (32 - first_gp);
21636 /* For the SPE, we have an additional upper 32-bits on each GPR.
21637 Ideally we should save the entire 64-bits only when the upper
21638 half is used in SIMD instructions. Since we only record
21639 registers live (not the size they are used in), this proves
21640 difficult because we'd have to traverse the instruction chain at
21641 the right time, taking reload into account. This is a real pain,
21642 so we opt to save the GPRs in 64-bits always if but one register
21643 gets used in 64-bits. Otherwise, all the registers in the frame
21644 get saved in 32-bits.
21646 So... since when we save all GPRs (except the SP) in 64-bits, the
21647 traditional GP save area will be empty. */
21648 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21649 info_ptr->gp_size = 0;
21651 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21652 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21654 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21655 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21656 - info_ptr->first_altivec_reg_save);
21658 /* Does this function call anything? */
21659 info_ptr->calls_p = (! crtl->is_leaf
21660 || cfun->machine->ra_needs_full_frame);
21662 /* Determine if we need to save the condition code registers. */
21663 if (df_regs_ever_live_p (CR2_REGNO)
21664 || df_regs_ever_live_p (CR3_REGNO)
21665 || df_regs_ever_live_p (CR4_REGNO))
21667 info_ptr->cr_save_p = 1;
21668 if (DEFAULT_ABI == ABI_V4)
21669 info_ptr->cr_size = reg_size;
21672 /* If the current function calls __builtin_eh_return, then we need
21673 to allocate stack space for registers that will hold data for
21674 the exception handler. */
21675 if (crtl->calls_eh_return)
21677 unsigned int i;
21678 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21679 continue;
21681 /* SPE saves EH registers in 64-bits. */
21682 ehrd_size = i * (TARGET_SPE_ABI
21683 && info_ptr->spe_64bit_regs_used != 0
21684 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21686 else
21687 ehrd_size = 0;
21689 /* In the ELFv2 ABI, we also need to allocate space for separate
21690 CR field save areas if the function calls __builtin_eh_return. */
21691 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21693 /* This hard-codes that we have three call-saved CR fields. */
21694 ehcr_size = 3 * reg_size;
21695 /* We do *not* use the regular CR save mechanism. */
21696 info_ptr->cr_save_p = 0;
21698 else
21699 ehcr_size = 0;
21701 /* Determine various sizes. */
21702 info_ptr->reg_size = reg_size;
21703 info_ptr->fixed_size = RS6000_SAVE_AREA;
21704 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21705 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21706 TARGET_ALTIVEC ? 16 : 8);
21707 if (FRAME_GROWS_DOWNWARD)
21708 info_ptr->vars_size
21709 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21710 + info_ptr->parm_size,
21711 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21712 - (info_ptr->fixed_size + info_ptr->vars_size
21713 + info_ptr->parm_size);
21715 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21716 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21717 else
21718 info_ptr->spe_gp_size = 0;
21720 if (TARGET_ALTIVEC_ABI)
21721 info_ptr->vrsave_mask = compute_vrsave_mask ();
21722 else
21723 info_ptr->vrsave_mask = 0;
21725 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21726 info_ptr->vrsave_size = 4;
21727 else
21728 info_ptr->vrsave_size = 0;
21730 compute_save_world_info (info_ptr);
21732 /* Calculate the offsets. */
21733 switch (DEFAULT_ABI)
21735 case ABI_NONE:
21736 default:
21737 gcc_unreachable ();
21739 case ABI_AIX:
21740 case ABI_ELFv2:
21741 case ABI_DARWIN:
21742 info_ptr->fp_save_offset = - info_ptr->fp_size;
21743 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21745 if (TARGET_ALTIVEC_ABI)
21747 info_ptr->vrsave_save_offset
21748 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21750 /* Align stack so vector save area is on a quadword boundary.
21751 The padding goes above the vectors. */
21752 if (info_ptr->altivec_size != 0)
21753 info_ptr->altivec_padding_size
21754 = info_ptr->vrsave_save_offset & 0xF;
21755 else
21756 info_ptr->altivec_padding_size = 0;
21758 info_ptr->altivec_save_offset
21759 = info_ptr->vrsave_save_offset
21760 - info_ptr->altivec_padding_size
21761 - info_ptr->altivec_size;
21762 gcc_assert (info_ptr->altivec_size == 0
21763 || info_ptr->altivec_save_offset % 16 == 0);
21765 /* Adjust for AltiVec case. */
21766 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21768 else
21769 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21771 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21772 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21773 info_ptr->lr_save_offset = 2*reg_size;
21774 break;
21776 case ABI_V4:
21777 info_ptr->fp_save_offset = - info_ptr->fp_size;
21778 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21779 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21781 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21783 /* Align stack so SPE GPR save area is aligned on a
21784 double-word boundary. */
21785 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21786 info_ptr->spe_padding_size
21787 = 8 - (-info_ptr->cr_save_offset % 8);
21788 else
21789 info_ptr->spe_padding_size = 0;
21791 info_ptr->spe_gp_save_offset
21792 = info_ptr->cr_save_offset
21793 - info_ptr->spe_padding_size
21794 - info_ptr->spe_gp_size;
21796 /* Adjust for SPE case. */
21797 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21799 else if (TARGET_ALTIVEC_ABI)
21801 info_ptr->vrsave_save_offset
21802 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21804 /* Align stack so vector save area is on a quadword boundary. */
21805 if (info_ptr->altivec_size != 0)
21806 info_ptr->altivec_padding_size
21807 = 16 - (-info_ptr->vrsave_save_offset % 16);
21808 else
21809 info_ptr->altivec_padding_size = 0;
21811 info_ptr->altivec_save_offset
21812 = info_ptr->vrsave_save_offset
21813 - info_ptr->altivec_padding_size
21814 - info_ptr->altivec_size;
21816 /* Adjust for AltiVec case. */
21817 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21819 else
21820 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21821 info_ptr->ehrd_offset -= ehrd_size;
21822 info_ptr->lr_save_offset = reg_size;
21823 break;
21826 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21827 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21828 + info_ptr->gp_size
21829 + info_ptr->altivec_size
21830 + info_ptr->altivec_padding_size
21831 + info_ptr->spe_gp_size
21832 + info_ptr->spe_padding_size
21833 + ehrd_size
21834 + ehcr_size
21835 + info_ptr->cr_size
21836 + info_ptr->vrsave_size,
21837 save_align);
21839 non_fixed_size = (info_ptr->vars_size
21840 + info_ptr->parm_size
21841 + info_ptr->save_size);
21843 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21844 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21846 /* Determine if we need to save the link register. */
21847 if (info_ptr->calls_p
21848 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21849 && crtl->profile
21850 && !TARGET_PROFILE_KERNEL)
21851 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21852 #ifdef TARGET_RELOCATABLE
21853 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21854 #endif
21855 || rs6000_ra_ever_killed ())
21856 info_ptr->lr_save_p = 1;
21858 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21859 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21860 && call_used_regs[STATIC_CHAIN_REGNUM]);
21861 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21862 using_static_chain_p);
21864 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21865 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21866 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21867 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21868 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21869 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21870 info_ptr->lr_save_p = 1;
21872 if (info_ptr->lr_save_p)
21873 df_set_regs_ever_live (LR_REGNO, true);
21875 /* Determine if we need to allocate any stack frame:
21877 For AIX we need to push the stack if a frame pointer is needed
21878 (because the stack might be dynamically adjusted), if we are
21879 debugging, if we make calls, or if the sum of fp_save, gp_save,
21880 and local variables are more than the space needed to save all
21881 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21882 + 18*8 = 288 (GPR13 reserved).
21884 For V.4 we don't have the stack cushion that AIX uses, but assume
21885 that the debugger can handle stackless frames. */
21887 if (info_ptr->calls_p)
21888 info_ptr->push_p = 1;
21890 else if (DEFAULT_ABI == ABI_V4)
21891 info_ptr->push_p = non_fixed_size != 0;
21893 else if (frame_pointer_needed)
21894 info_ptr->push_p = 1;
21896 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21897 info_ptr->push_p = 1;
21899 else
21900 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21902 /* Zero offsets if we're not saving those registers. */
21903 if (info_ptr->fp_size == 0)
21904 info_ptr->fp_save_offset = 0;
21906 if (info_ptr->gp_size == 0)
21907 info_ptr->gp_save_offset = 0;
21909 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21910 info_ptr->altivec_save_offset = 0;
21912 /* Zero VRSAVE offset if not saved and restored. */
21913 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21914 info_ptr->vrsave_save_offset = 0;
21916 if (! TARGET_SPE_ABI
21917 || info_ptr->spe_64bit_regs_used == 0
21918 || info_ptr->spe_gp_size == 0)
21919 info_ptr->spe_gp_save_offset = 0;
21921 if (! info_ptr->lr_save_p)
21922 info_ptr->lr_save_offset = 0;
21924 if (! info_ptr->cr_save_p)
21925 info_ptr->cr_save_offset = 0;
21927 return info_ptr;
21930 /* Return true if the current function uses any GPRs in 64-bit SIMD
21931 mode. */
21933 static bool
21934 spe_func_has_64bit_regs_p (void)
21936 rtx_insn *insns, *insn;
21938 /* Functions that save and restore all the call-saved registers will
21939 need to save/restore the registers in 64-bits. */
21940 if (crtl->calls_eh_return
21941 || cfun->calls_setjmp
21942 || crtl->has_nonlocal_goto)
21943 return true;
21945 insns = get_insns ();
21947 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21949 if (INSN_P (insn))
21951 rtx i;
21953 /* FIXME: This should be implemented with attributes...
21955 (set_attr "spe64" "true")....then,
21956 if (get_spe64(insn)) return true;
21958 It's the only reliable way to do the stuff below. */
21960 i = PATTERN (insn);
21961 if (GET_CODE (i) == SET)
21963 machine_mode mode = GET_MODE (SET_SRC (i));
21965 if (SPE_VECTOR_MODE (mode))
21966 return true;
21967 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21968 return true;
21973 return false;
21976 static void
21977 debug_stack_info (rs6000_stack_t *info)
21979 const char *abi_string;
21981 if (! info)
21982 info = rs6000_stack_info ();
21984 fprintf (stderr, "\nStack information for function %s:\n",
21985 ((current_function_decl && DECL_NAME (current_function_decl))
21986 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21987 : "<unknown>"));
21989 switch (info->abi)
21991 default: abi_string = "Unknown"; break;
21992 case ABI_NONE: abi_string = "NONE"; break;
21993 case ABI_AIX: abi_string = "AIX"; break;
21994 case ABI_ELFv2: abi_string = "ELFv2"; break;
21995 case ABI_DARWIN: abi_string = "Darwin"; break;
21996 case ABI_V4: abi_string = "V.4"; break;
21999 fprintf (stderr, "\tABI = %5s\n", abi_string);
22001 if (TARGET_ALTIVEC_ABI)
22002 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
22004 if (TARGET_SPE_ABI)
22005 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
22007 if (info->first_gp_reg_save != 32)
22008 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
22010 if (info->first_fp_reg_save != 64)
22011 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
22013 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
22014 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
22015 info->first_altivec_reg_save);
22017 if (info->lr_save_p)
22018 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
22020 if (info->cr_save_p)
22021 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
22023 if (info->vrsave_mask)
22024 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
22026 if (info->push_p)
22027 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
22029 if (info->calls_p)
22030 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
22032 if (info->gp_save_offset)
22033 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
22035 if (info->fp_save_offset)
22036 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
22038 if (info->altivec_save_offset)
22039 fprintf (stderr, "\taltivec_save_offset = %5d\n",
22040 info->altivec_save_offset);
22042 if (info->spe_gp_save_offset)
22043 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
22044 info->spe_gp_save_offset);
22046 if (info->vrsave_save_offset)
22047 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
22048 info->vrsave_save_offset);
22050 if (info->lr_save_offset)
22051 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
22053 if (info->cr_save_offset)
22054 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
22056 if (info->varargs_save_offset)
22057 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
22059 if (info->total_size)
22060 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22061 info->total_size);
22063 if (info->vars_size)
22064 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
22065 info->vars_size);
22067 if (info->parm_size)
22068 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
22070 if (info->fixed_size)
22071 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
22073 if (info->gp_size)
22074 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
22076 if (info->spe_gp_size)
22077 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
22079 if (info->fp_size)
22080 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
22082 if (info->altivec_size)
22083 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
22085 if (info->vrsave_size)
22086 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
22088 if (info->altivec_padding_size)
22089 fprintf (stderr, "\taltivec_padding_size= %5d\n",
22090 info->altivec_padding_size);
22092 if (info->spe_padding_size)
22093 fprintf (stderr, "\tspe_padding_size = %5d\n",
22094 info->spe_padding_size);
22096 if (info->cr_size)
22097 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
22099 if (info->save_size)
22100 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
22102 if (info->reg_size != 4)
22103 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
22105 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
22107 fprintf (stderr, "\n");
22111 rs6000_return_addr (int count, rtx frame)
22113 /* Currently we don't optimize very well between prolog and body
22114 code and for PIC code the code can be actually quite bad, so
22115 don't try to be too clever here. */
22116 if (count != 0
22117 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22119 cfun->machine->ra_needs_full_frame = 1;
22121 return
22122 gen_rtx_MEM
22123 (Pmode,
22124 memory_address
22125 (Pmode,
22126 plus_constant (Pmode,
22127 copy_to_reg
22128 (gen_rtx_MEM (Pmode,
22129 memory_address (Pmode, frame))),
22130 RETURN_ADDRESS_OFFSET)));
22133 cfun->machine->ra_need_lr = 1;
22134 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22137 /* Say whether a function is a candidate for sibcall handling or not. */
22139 static bool
22140 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22142 tree fntype;
22144 if (decl)
22145 fntype = TREE_TYPE (decl);
22146 else
22147 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22149 /* We can't do it if the called function has more vector parameters
22150 than the current function; there's nowhere to put the VRsave code. */
22151 if (TARGET_ALTIVEC_ABI
22152 && TARGET_ALTIVEC_VRSAVE
22153 && !(decl && decl == current_function_decl))
22155 function_args_iterator args_iter;
22156 tree type;
22157 int nvreg = 0;
22159 /* Functions with vector parameters are required to have a
22160 prototype, so the argument type info must be available
22161 here. */
22162 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22163 if (TREE_CODE (type) == VECTOR_TYPE
22164 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22165 nvreg++;
22167 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22168 if (TREE_CODE (type) == VECTOR_TYPE
22169 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22170 nvreg--;
22172 if (nvreg > 0)
22173 return false;
22176 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22177 functions, because the callee may have a different TOC pointer to
22178 the caller and there's no way to ensure we restore the TOC when
22179 we return. With the secure-plt SYSV ABI we can't make non-local
22180 calls when -fpic/PIC because the plt call stubs use r30. */
22181 if (DEFAULT_ABI == ABI_DARWIN
22182 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22183 && decl
22184 && !DECL_EXTERNAL (decl)
22185 && (*targetm.binds_local_p) (decl))
22186 || (DEFAULT_ABI == ABI_V4
22187 && (!TARGET_SECURE_PLT
22188 || !flag_pic
22189 || (decl
22190 && (*targetm.binds_local_p) (decl)))))
22192 tree attr_list = TYPE_ATTRIBUTES (fntype);
22194 if (!lookup_attribute ("longcall", attr_list)
22195 || lookup_attribute ("shortcall", attr_list))
22196 return true;
22199 return false;
22202 static int
22203 rs6000_ra_ever_killed (void)
22205 rtx_insn *top;
22206 rtx reg;
22207 rtx_insn *insn;
22209 if (cfun->is_thunk)
22210 return 0;
22212 if (cfun->machine->lr_save_state)
22213 return cfun->machine->lr_save_state - 1;
22215 /* regs_ever_live has LR marked as used if any sibcalls are present,
22216 but this should not force saving and restoring in the
22217 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22218 clobbers LR, so that is inappropriate. */
22220 /* Also, the prologue can generate a store into LR that
22221 doesn't really count, like this:
22223 move LR->R0
22224 bcl to set PIC register
22225 move LR->R31
22226 move R0->LR
22228 When we're called from the epilogue, we need to avoid counting
22229 this as a store. */
22231 push_topmost_sequence ();
22232 top = get_insns ();
22233 pop_topmost_sequence ();
22234 reg = gen_rtx_REG (Pmode, LR_REGNO);
22236 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22238 if (INSN_P (insn))
22240 if (CALL_P (insn))
22242 if (!SIBLING_CALL_P (insn))
22243 return 1;
22245 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22246 return 1;
22247 else if (set_of (reg, insn) != NULL_RTX
22248 && !prologue_epilogue_contains (insn))
22249 return 1;
22252 return 0;
22255 /* Emit instructions needed to load the TOC register.
22256 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22257 a constant pool; or for SVR4 -fpic. */
22259 void
22260 rs6000_emit_load_toc_table (int fromprolog)
22262 rtx dest;
22263 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22265 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22267 char buf[30];
22268 rtx lab, tmp1, tmp2, got;
22270 lab = gen_label_rtx ();
22271 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22272 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22273 if (flag_pic == 2)
22274 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22275 else
22276 got = rs6000_got_sym ();
22277 tmp1 = tmp2 = dest;
22278 if (!fromprolog)
22280 tmp1 = gen_reg_rtx (Pmode);
22281 tmp2 = gen_reg_rtx (Pmode);
22283 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22284 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22285 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22286 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22288 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22290 emit_insn (gen_load_toc_v4_pic_si ());
22291 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22293 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22295 char buf[30];
22296 rtx temp0 = (fromprolog
22297 ? gen_rtx_REG (Pmode, 0)
22298 : gen_reg_rtx (Pmode));
22300 if (fromprolog)
22302 rtx symF, symL;
22304 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22305 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22307 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22308 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22310 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22311 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22312 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22314 else
22316 rtx tocsym, lab;
22318 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22319 lab = gen_label_rtx ();
22320 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22321 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22322 if (TARGET_LINK_STACK)
22323 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22324 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22326 emit_insn (gen_addsi3 (dest, temp0, dest));
22328 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22330 /* This is for AIX code running in non-PIC ELF32. */
22331 char buf[30];
22332 rtx realsym;
22333 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22334 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22336 emit_insn (gen_elf_high (dest, realsym));
22337 emit_insn (gen_elf_low (dest, dest, realsym));
22339 else
22341 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22343 if (TARGET_32BIT)
22344 emit_insn (gen_load_toc_aix_si (dest));
22345 else
22346 emit_insn (gen_load_toc_aix_di (dest));
22350 /* Emit instructions to restore the link register after determining where
22351 its value has been stored. */
22353 void
22354 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22356 rs6000_stack_t *info = rs6000_stack_info ();
22357 rtx operands[2];
22359 operands[0] = source;
22360 operands[1] = scratch;
22362 if (info->lr_save_p)
22364 rtx frame_rtx = stack_pointer_rtx;
22365 HOST_WIDE_INT sp_offset = 0;
22366 rtx tmp;
22368 if (frame_pointer_needed
22369 || cfun->calls_alloca
22370 || info->total_size > 32767)
22372 tmp = gen_frame_mem (Pmode, frame_rtx);
22373 emit_move_insn (operands[1], tmp);
22374 frame_rtx = operands[1];
22376 else if (info->push_p)
22377 sp_offset = info->total_size;
22379 tmp = plus_constant (Pmode, frame_rtx,
22380 info->lr_save_offset + sp_offset);
22381 tmp = gen_frame_mem (Pmode, tmp);
22382 emit_move_insn (tmp, operands[0]);
22384 else
22385 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22387 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22388 state of lr_save_p so any change from here on would be a bug. In
22389 particular, stop rs6000_ra_ever_killed from considering the SET
22390 of lr we may have added just above. */
22391 cfun->machine->lr_save_state = info->lr_save_p + 1;
22394 static GTY(()) alias_set_type set = -1;
22396 alias_set_type
22397 get_TOC_alias_set (void)
22399 if (set == -1)
22400 set = new_alias_set ();
22401 return set;
22404 /* This returns nonzero if the current function uses the TOC. This is
22405 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22406 is generated by the ABI_V4 load_toc_* patterns. */
22407 #if TARGET_ELF
22408 static int
22409 uses_TOC (void)
22411 rtx_insn *insn;
22413 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22414 if (INSN_P (insn))
22416 rtx pat = PATTERN (insn);
22417 int i;
22419 if (GET_CODE (pat) == PARALLEL)
22420 for (i = 0; i < XVECLEN (pat, 0); i++)
22422 rtx sub = XVECEXP (pat, 0, i);
22423 if (GET_CODE (sub) == USE)
22425 sub = XEXP (sub, 0);
22426 if (GET_CODE (sub) == UNSPEC
22427 && XINT (sub, 1) == UNSPEC_TOC)
22428 return 1;
22432 return 0;
22434 #endif
22437 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22439 rtx tocrel, tocreg, hi;
22441 if (TARGET_DEBUG_ADDR)
22443 if (GET_CODE (symbol) == SYMBOL_REF)
22444 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22445 XSTR (symbol, 0));
22446 else
22448 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22449 GET_RTX_NAME (GET_CODE (symbol)));
22450 debug_rtx (symbol);
22454 if (!can_create_pseudo_p ())
22455 df_set_regs_ever_live (TOC_REGISTER, true);
22457 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22458 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22459 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22460 return tocrel;
22462 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22463 if (largetoc_reg != NULL)
22465 emit_move_insn (largetoc_reg, hi);
22466 hi = largetoc_reg;
22468 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22471 /* Issue assembly directives that create a reference to the given DWARF
22472 FRAME_TABLE_LABEL from the current function section. */
22473 void
22474 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22476 fprintf (asm_out_file, "\t.ref %s\n",
22477 (* targetm.strip_name_encoding) (frame_table_label));
22480 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22481 and the change to the stack pointer. */
22483 static void
22484 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22486 rtvec p;
22487 int i;
22488 rtx regs[3];
22490 i = 0;
22491 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22492 if (hard_frame_needed)
22493 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22494 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22495 || (hard_frame_needed
22496 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22497 regs[i++] = fp;
22499 p = rtvec_alloc (i);
22500 while (--i >= 0)
22502 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22503 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22506 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22509 /* Emit the correct code for allocating stack space, as insns.
22510 If COPY_REG, make sure a copy of the old frame is left there.
22511 The generated code may use hard register 0 as a temporary. */
22513 static void
22514 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22516 rtx_insn *insn;
22517 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22518 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22519 rtx todec = gen_int_mode (-size, Pmode);
22520 rtx par, set, mem;
22522 if (INTVAL (todec) != -size)
22524 warning (0, "stack frame too large");
22525 emit_insn (gen_trap ());
22526 return;
22529 if (crtl->limit_stack)
22531 if (REG_P (stack_limit_rtx)
22532 && REGNO (stack_limit_rtx) > 1
22533 && REGNO (stack_limit_rtx) <= 31)
22535 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22536 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22537 const0_rtx));
22539 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22540 && TARGET_32BIT
22541 && DEFAULT_ABI == ABI_V4)
22543 rtx toload = gen_rtx_CONST (VOIDmode,
22544 gen_rtx_PLUS (Pmode,
22545 stack_limit_rtx,
22546 GEN_INT (size)));
22548 emit_insn (gen_elf_high (tmp_reg, toload));
22549 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22550 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22551 const0_rtx));
22553 else
22554 warning (0, "stack limit expression is not supported");
22557 if (copy_reg)
22559 if (copy_off != 0)
22560 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22561 else
22562 emit_move_insn (copy_reg, stack_reg);
22565 if (size > 32767)
22567 /* Need a note here so that try_split doesn't get confused. */
22568 if (get_last_insn () == NULL_RTX)
22569 emit_note (NOTE_INSN_DELETED);
22570 insn = emit_move_insn (tmp_reg, todec);
22571 try_split (PATTERN (insn), insn, 0);
22572 todec = tmp_reg;
22575 insn = emit_insn (TARGET_32BIT
22576 ? gen_movsi_update_stack (stack_reg, stack_reg,
22577 todec, stack_reg)
22578 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22579 todec, stack_reg));
22580 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22581 it now and set the alias set/attributes. The above gen_*_update
22582 calls will generate a PARALLEL with the MEM set being the first
22583 operation. */
22584 par = PATTERN (insn);
22585 gcc_assert (GET_CODE (par) == PARALLEL);
22586 set = XVECEXP (par, 0, 0);
22587 gcc_assert (GET_CODE (set) == SET);
22588 mem = SET_DEST (set);
22589 gcc_assert (MEM_P (mem));
22590 MEM_NOTRAP_P (mem) = 1;
22591 set_mem_alias_set (mem, get_frame_alias_set ());
22593 RTX_FRAME_RELATED_P (insn) = 1;
22594 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22595 gen_rtx_SET (VOIDmode, stack_reg,
22596 gen_rtx_PLUS (Pmode, stack_reg,
22597 GEN_INT (-size))));
22600 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22602 #if PROBE_INTERVAL > 32768
22603 #error Cannot use indexed addressing mode for stack probing
22604 #endif
22606 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22607 inclusive. These are offsets from the current stack pointer. */
22609 static void
22610 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22612 /* See if we have a constant small number of probes to generate. If so,
22613 that's the easy case. */
22614 if (first + size <= 32768)
22616 HOST_WIDE_INT i;
22618 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22619 it exceeds SIZE. If only one probe is needed, this will not
22620 generate any code. Then probe at FIRST + SIZE. */
22621 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22622 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22623 -(first + i)));
22625 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22626 -(first + size)));
22629 /* Otherwise, do the same as above, but in a loop. Note that we must be
22630 extra careful with variables wrapping around because we might be at
22631 the very top (or the very bottom) of the address space and we have
22632 to be able to handle this case properly; in particular, we use an
22633 equality test for the loop condition. */
22634 else
22636 HOST_WIDE_INT rounded_size;
22637 rtx r12 = gen_rtx_REG (Pmode, 12);
22638 rtx r0 = gen_rtx_REG (Pmode, 0);
22640 /* Sanity check for the addressing mode we're going to use. */
22641 gcc_assert (first <= 32768);
22643 /* Step 1: round SIZE to the previous multiple of the interval. */
22645 rounded_size = size & -PROBE_INTERVAL;
22648 /* Step 2: compute initial and final value of the loop counter. */
22650 /* TEST_ADDR = SP + FIRST. */
22651 emit_insn (gen_rtx_SET (VOIDmode, r12,
22652 plus_constant (Pmode, stack_pointer_rtx,
22653 -first)));
22655 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22656 if (rounded_size > 32768)
22658 emit_move_insn (r0, GEN_INT (-rounded_size));
22659 emit_insn (gen_rtx_SET (VOIDmode, r0,
22660 gen_rtx_PLUS (Pmode, r12, r0)));
22662 else
22663 emit_insn (gen_rtx_SET (VOIDmode, r0,
22664 plus_constant (Pmode, r12, -rounded_size)));
22667 /* Step 3: the loop
22669 while (TEST_ADDR != LAST_ADDR)
22671 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22672 probe at TEST_ADDR
22675 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22676 until it is equal to ROUNDED_SIZE. */
22678 if (TARGET_64BIT)
22679 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22680 else
22681 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22684 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22685 that SIZE is equal to ROUNDED_SIZE. */
22687 if (size != rounded_size)
22688 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22692 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22693 absolute addresses. */
22695 const char *
22696 output_probe_stack_range (rtx reg1, rtx reg2)
22698 static int labelno = 0;
22699 char loop_lab[32], end_lab[32];
22700 rtx xops[2];
22702 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22703 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22705 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22707 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22708 xops[0] = reg1;
22709 xops[1] = reg2;
22710 if (TARGET_64BIT)
22711 output_asm_insn ("cmpd 0,%0,%1", xops);
22712 else
22713 output_asm_insn ("cmpw 0,%0,%1", xops);
22715 fputs ("\tbeq 0,", asm_out_file);
22716 assemble_name_raw (asm_out_file, end_lab);
22717 fputc ('\n', asm_out_file);
22719 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22720 xops[1] = GEN_INT (-PROBE_INTERVAL);
22721 output_asm_insn ("addi %0,%0,%1", xops);
22723 /* Probe at TEST_ADDR and branch. */
22724 xops[1] = gen_rtx_REG (Pmode, 0);
22725 output_asm_insn ("stw %1,0(%0)", xops);
22726 fprintf (asm_out_file, "\tb ");
22727 assemble_name_raw (asm_out_file, loop_lab);
22728 fputc ('\n', asm_out_file);
22730 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22732 return "";
22735 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22736 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22737 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22738 deduce these equivalences by itself so it wasn't necessary to hold
22739 its hand so much. Don't be tempted to always supply d2_f_d_e with
22740 the actual cfa register, ie. r31 when we are using a hard frame
22741 pointer. That fails when saving regs off r1, and sched moves the
22742 r31 setup past the reg saves. */
22744 static rtx
22745 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22746 rtx reg2, rtx rreg, rtx split_reg)
22748 rtx real, temp;
22750 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22752 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22753 int i;
22755 gcc_checking_assert (val == 0);
22756 real = PATTERN (insn);
22757 if (GET_CODE (real) == PARALLEL)
22758 for (i = 0; i < XVECLEN (real, 0); i++)
22759 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22761 rtx set = XVECEXP (real, 0, i);
22763 RTX_FRAME_RELATED_P (set) = 1;
22765 RTX_FRAME_RELATED_P (insn) = 1;
22766 return insn;
22769 /* copy_rtx will not make unique copies of registers, so we need to
22770 ensure we don't have unwanted sharing here. */
22771 if (reg == reg2)
22772 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22774 if (reg == rreg)
22775 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22777 real = copy_rtx (PATTERN (insn));
22779 if (reg2 != NULL_RTX)
22780 real = replace_rtx (real, reg2, rreg);
22782 if (REGNO (reg) == STACK_POINTER_REGNUM)
22783 gcc_checking_assert (val == 0);
22784 else
22785 real = replace_rtx (real, reg,
22786 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22787 STACK_POINTER_REGNUM),
22788 GEN_INT (val)));
22790 /* We expect that 'real' is either a SET or a PARALLEL containing
22791 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22792 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22794 if (GET_CODE (real) == SET)
22796 rtx set = real;
22798 temp = simplify_rtx (SET_SRC (set));
22799 if (temp)
22800 SET_SRC (set) = temp;
22801 temp = simplify_rtx (SET_DEST (set));
22802 if (temp)
22803 SET_DEST (set) = temp;
22804 if (GET_CODE (SET_DEST (set)) == MEM)
22806 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22807 if (temp)
22808 XEXP (SET_DEST (set), 0) = temp;
22811 else
22813 int i;
22815 gcc_assert (GET_CODE (real) == PARALLEL);
22816 for (i = 0; i < XVECLEN (real, 0); i++)
22817 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22819 rtx set = XVECEXP (real, 0, i);
22821 temp = simplify_rtx (SET_SRC (set));
22822 if (temp)
22823 SET_SRC (set) = temp;
22824 temp = simplify_rtx (SET_DEST (set));
22825 if (temp)
22826 SET_DEST (set) = temp;
22827 if (GET_CODE (SET_DEST (set)) == MEM)
22829 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22830 if (temp)
22831 XEXP (SET_DEST (set), 0) = temp;
22833 RTX_FRAME_RELATED_P (set) = 1;
22837 /* If a store insn has been split into multiple insns, the
22838 true source register is given by split_reg. */
22839 if (split_reg != NULL_RTX)
22840 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22842 RTX_FRAME_RELATED_P (insn) = 1;
22843 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22845 return insn;
22848 /* Returns an insn that has a vrsave set operation with the
22849 appropriate CLOBBERs. */
22851 static rtx
22852 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22854 int nclobs, i;
22855 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22856 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22858 clobs[0]
22859 = gen_rtx_SET (VOIDmode,
22860 vrsave,
22861 gen_rtx_UNSPEC_VOLATILE (SImode,
22862 gen_rtvec (2, reg, vrsave),
22863 UNSPECV_SET_VRSAVE));
22865 nclobs = 1;
22867 /* We need to clobber the registers in the mask so the scheduler
22868 does not move sets to VRSAVE before sets of AltiVec registers.
22870 However, if the function receives nonlocal gotos, reload will set
22871 all call saved registers live. We will end up with:
22873 (set (reg 999) (mem))
22874 (parallel [ (set (reg vrsave) (unspec blah))
22875 (clobber (reg 999))])
22877 The clobber will cause the store into reg 999 to be dead, and
22878 flow will attempt to delete an epilogue insn. In this case, we
22879 need an unspec use/set of the register. */
22881 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22882 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22884 if (!epiloguep || call_used_regs [i])
22885 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22886 gen_rtx_REG (V4SImode, i));
22887 else
22889 rtx reg = gen_rtx_REG (V4SImode, i);
22891 clobs[nclobs++]
22892 = gen_rtx_SET (VOIDmode,
22893 reg,
22894 gen_rtx_UNSPEC (V4SImode,
22895 gen_rtvec (1, reg), 27));
22899 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22901 for (i = 0; i < nclobs; ++i)
22902 XVECEXP (insn, 0, i) = clobs[i];
22904 return insn;
22907 static rtx
22908 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22910 rtx addr, mem;
22912 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22913 mem = gen_frame_mem (GET_MODE (reg), addr);
22914 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22917 static rtx
22918 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22920 return gen_frame_set (reg, frame_reg, offset, false);
22923 static rtx
22924 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22926 return gen_frame_set (reg, frame_reg, offset, true);
22929 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22930 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22932 static rtx
22933 emit_frame_save (rtx frame_reg, machine_mode mode,
22934 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22936 rtx reg, insn;
22938 /* Some cases that need register indexed addressing. */
22939 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22940 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22941 || (TARGET_E500_DOUBLE && mode == DFmode)
22942 || (TARGET_SPE_ABI
22943 && SPE_VECTOR_MODE (mode)
22944 && !SPE_CONST_OFFSET_OK (offset))));
22946 reg = gen_rtx_REG (mode, regno);
22947 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22948 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22949 NULL_RTX, NULL_RTX, NULL_RTX);
22952 /* Emit an offset memory reference suitable for a frame store, while
22953 converting to a valid addressing mode. */
22955 static rtx
22956 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
22958 rtx int_rtx, offset_rtx;
22960 int_rtx = GEN_INT (offset);
22962 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22963 || (TARGET_E500_DOUBLE && mode == DFmode))
22965 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22966 emit_move_insn (offset_rtx, int_rtx);
22968 else
22969 offset_rtx = int_rtx;
22971 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22974 #ifndef TARGET_FIX_AND_CONTINUE
22975 #define TARGET_FIX_AND_CONTINUE 0
22976 #endif
22978 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22979 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22980 #define LAST_SAVRES_REGISTER 31
22981 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22983 enum {
22984 SAVRES_LR = 0x1,
22985 SAVRES_SAVE = 0x2,
22986 SAVRES_REG = 0x0c,
22987 SAVRES_GPR = 0,
22988 SAVRES_FPR = 4,
22989 SAVRES_VR = 8
22992 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22994 /* Temporary holding space for an out-of-line register save/restore
22995 routine name. */
22996 static char savres_routine_name[30];
22998 /* Return the name for an out-of-line register save/restore routine.
22999 We are saving/restoring GPRs if GPR is true. */
23001 static char *
23002 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
23004 const char *prefix = "";
23005 const char *suffix = "";
23007 /* Different targets are supposed to define
23008 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
23009 routine name could be defined with:
23011 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
23013 This is a nice idea in practice, but in reality, things are
23014 complicated in several ways:
23016 - ELF targets have save/restore routines for GPRs.
23018 - SPE targets use different prefixes for 32/64-bit registers, and
23019 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
23021 - PPC64 ELF targets have routines for save/restore of GPRs that
23022 differ in what they do with the link register, so having a set
23023 prefix doesn't work. (We only use one of the save routines at
23024 the moment, though.)
23026 - PPC32 elf targets have "exit" versions of the restore routines
23027 that restore the link register and can save some extra space.
23028 These require an extra suffix. (There are also "tail" versions
23029 of the restore routines and "GOT" versions of the save routines,
23030 but we don't generate those at present. Same problems apply,
23031 though.)
23033 We deal with all this by synthesizing our own prefix/suffix and
23034 using that for the simple sprintf call shown above. */
23035 if (TARGET_SPE)
23037 /* No floating point saves on the SPE. */
23038 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
23040 if ((sel & SAVRES_SAVE))
23041 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
23042 else
23043 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
23045 if ((sel & SAVRES_LR))
23046 suffix = "_x";
23048 else if (DEFAULT_ABI == ABI_V4)
23050 if (TARGET_64BIT)
23051 goto aix_names;
23053 if ((sel & SAVRES_REG) == SAVRES_GPR)
23054 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
23055 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23056 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
23057 else if ((sel & SAVRES_REG) == SAVRES_VR)
23058 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23059 else
23060 abort ();
23062 if ((sel & SAVRES_LR))
23063 suffix = "_x";
23065 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23067 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
23068 /* No out-of-line save/restore routines for GPRs on AIX. */
23069 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
23070 #endif
23072 aix_names:
23073 if ((sel & SAVRES_REG) == SAVRES_GPR)
23074 prefix = ((sel & SAVRES_SAVE)
23075 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
23076 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
23077 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23079 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
23080 if ((sel & SAVRES_LR))
23081 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
23082 else
23083 #endif
23085 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
23086 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
23089 else if ((sel & SAVRES_REG) == SAVRES_VR)
23090 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23091 else
23092 abort ();
23095 if (DEFAULT_ABI == ABI_DARWIN)
23097 /* The Darwin approach is (slightly) different, in order to be
23098 compatible with code generated by the system toolchain. There is a
23099 single symbol for the start of save sequence, and the code here
23100 embeds an offset into that code on the basis of the first register
23101 to be saved. */
23102 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
23103 if ((sel & SAVRES_REG) == SAVRES_GPR)
23104 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
23105 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
23106 (regno - 13) * 4, prefix, regno);
23107 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23108 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
23109 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
23110 else if ((sel & SAVRES_REG) == SAVRES_VR)
23111 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
23112 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
23113 else
23114 abort ();
23116 else
23117 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
23119 return savres_routine_name;
23122 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23123 We are saving/restoring GPRs if GPR is true. */
23125 static rtx
23126 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23128 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23129 ? info->first_gp_reg_save
23130 : (sel & SAVRES_REG) == SAVRES_FPR
23131 ? info->first_fp_reg_save - 32
23132 : (sel & SAVRES_REG) == SAVRES_VR
23133 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23134 : -1);
23135 rtx sym;
23136 int select = sel;
23138 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23139 versions of the gpr routines. */
23140 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23141 && info->spe_64bit_regs_used)
23142 select ^= SAVRES_FPR ^ SAVRES_GPR;
23144 /* Don't generate bogus routine names. */
23145 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23146 && regno <= LAST_SAVRES_REGISTER
23147 && select >= 0 && select <= 12);
23149 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23151 if (sym == NULL)
23153 char *name;
23155 name = rs6000_savres_routine_name (info, regno, sel);
23157 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23158 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23159 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23162 return sym;
23165 /* Emit a sequence of insns, including a stack tie if needed, for
23166 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23167 reset the stack pointer, but move the base of the frame into
23168 reg UPDT_REGNO for use by out-of-line register restore routines. */
23170 static rtx
23171 rs6000_emit_stack_reset (rs6000_stack_t *info,
23172 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23173 unsigned updt_regno)
23175 rtx updt_reg_rtx;
23177 /* This blockage is needed so that sched doesn't decide to move
23178 the sp change before the register restores. */
23179 if (DEFAULT_ABI == ABI_V4
23180 || (TARGET_SPE_ABI
23181 && info->spe_64bit_regs_used != 0
23182 && info->first_gp_reg_save != 32))
23183 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23185 /* If we are restoring registers out-of-line, we will be using the
23186 "exit" variants of the restore routines, which will reset the
23187 stack for us. But we do need to point updt_reg into the
23188 right place for those routines. */
23189 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23191 if (frame_off != 0)
23192 return emit_insn (gen_add3_insn (updt_reg_rtx,
23193 frame_reg_rtx, GEN_INT (frame_off)));
23194 else if (REGNO (frame_reg_rtx) != updt_regno)
23195 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23197 return NULL_RTX;
23200 /* Return the register number used as a pointer by out-of-line
23201 save/restore functions. */
23203 static inline unsigned
23204 ptr_regno_for_savres (int sel)
23206 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23207 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23208 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23211 /* Construct a parallel rtx describing the effect of a call to an
23212 out-of-line register save/restore routine, and emit the insn
23213 or jump_insn as appropriate. */
23215 static rtx
23216 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23217 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23218 machine_mode reg_mode, int sel)
23220 int i;
23221 int offset, start_reg, end_reg, n_regs, use_reg;
23222 int reg_size = GET_MODE_SIZE (reg_mode);
23223 rtx sym;
23224 rtvec p;
23225 rtx par, insn;
23227 offset = 0;
23228 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23229 ? info->first_gp_reg_save
23230 : (sel & SAVRES_REG) == SAVRES_FPR
23231 ? info->first_fp_reg_save
23232 : (sel & SAVRES_REG) == SAVRES_VR
23233 ? info->first_altivec_reg_save
23234 : -1);
23235 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23236 ? 32
23237 : (sel & SAVRES_REG) == SAVRES_FPR
23238 ? 64
23239 : (sel & SAVRES_REG) == SAVRES_VR
23240 ? LAST_ALTIVEC_REGNO + 1
23241 : -1);
23242 n_regs = end_reg - start_reg;
23243 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23244 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23245 + n_regs);
23247 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23248 RTVEC_ELT (p, offset++) = ret_rtx;
23250 RTVEC_ELT (p, offset++)
23251 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23253 sym = rs6000_savres_routine_sym (info, sel);
23254 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23256 use_reg = ptr_regno_for_savres (sel);
23257 if ((sel & SAVRES_REG) == SAVRES_VR)
23259 /* Vector regs are saved/restored using [reg+reg] addressing. */
23260 RTVEC_ELT (p, offset++)
23261 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23262 RTVEC_ELT (p, offset++)
23263 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23265 else
23266 RTVEC_ELT (p, offset++)
23267 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23269 for (i = 0; i < end_reg - start_reg; i++)
23270 RTVEC_ELT (p, i + offset)
23271 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23272 frame_reg_rtx, save_area_offset + reg_size * i,
23273 (sel & SAVRES_SAVE) != 0);
23275 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23276 RTVEC_ELT (p, i + offset)
23277 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23279 par = gen_rtx_PARALLEL (VOIDmode, p);
23281 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23283 insn = emit_jump_insn (par);
23284 JUMP_LABEL (insn) = ret_rtx;
23286 else
23287 insn = emit_insn (par);
23288 return insn;
23291 /* Emit code to store CR fields that need to be saved into REG. */
23293 static void
23294 rs6000_emit_move_from_cr (rtx reg)
23296 /* Only the ELFv2 ABI allows storing only selected fields. */
23297 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23299 int i, cr_reg[8], count = 0;
23301 /* Collect CR fields that must be saved. */
23302 for (i = 0; i < 8; i++)
23303 if (save_reg_p (CR0_REGNO + i))
23304 cr_reg[count++] = i;
23306 /* If it's just a single one, use mfcrf. */
23307 if (count == 1)
23309 rtvec p = rtvec_alloc (1);
23310 rtvec r = rtvec_alloc (2);
23311 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23312 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23313 RTVEC_ELT (p, 0)
23314 = gen_rtx_SET (VOIDmode, reg,
23315 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23317 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23318 return;
23321 /* ??? It might be better to handle count == 2 / 3 cases here
23322 as well, using logical operations to combine the values. */
23325 emit_insn (gen_movesi_from_cr (reg));
23328 /* Determine whether the gp REG is really used. */
23330 static bool
23331 rs6000_reg_live_or_pic_offset_p (int reg)
23333 /* If the function calls eh_return, claim used all the registers that would
23334 be checked for liveness otherwise. This is required for the PIC offset
23335 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23336 register allocation purposes in this case. */
23338 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23339 && (!call_used_regs[reg]
23340 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23341 && !TARGET_SINGLE_PIC_BASE
23342 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23343 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23344 && !TARGET_SINGLE_PIC_BASE
23345 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23346 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23349 /* Emit function prologue as insns. */
23351 void
23352 rs6000_emit_prologue (void)
23354 rs6000_stack_t *info = rs6000_stack_info ();
23355 machine_mode reg_mode = Pmode;
23356 int reg_size = TARGET_32BIT ? 4 : 8;
23357 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23358 rtx frame_reg_rtx = sp_reg_rtx;
23359 unsigned int cr_save_regno;
23360 rtx cr_save_rtx = NULL_RTX;
23361 rtx insn;
23362 int strategy;
23363 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23364 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23365 && call_used_regs[STATIC_CHAIN_REGNUM]);
23366 /* Offset to top of frame for frame_reg and sp respectively. */
23367 HOST_WIDE_INT frame_off = 0;
23368 HOST_WIDE_INT sp_off = 0;
23370 #ifdef ENABLE_CHECKING
23371 /* Track and check usage of r0, r11, r12. */
23372 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23373 #define START_USE(R) do \
23375 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23376 reg_inuse |= 1 << (R); \
23377 } while (0)
23378 #define END_USE(R) do \
23380 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23381 reg_inuse &= ~(1 << (R)); \
23382 } while (0)
23383 #define NOT_INUSE(R) do \
23385 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23386 } while (0)
23387 #else
23388 #define START_USE(R) do {} while (0)
23389 #define END_USE(R) do {} while (0)
23390 #define NOT_INUSE(R) do {} while (0)
23391 #endif
23393 if (DEFAULT_ABI == ABI_ELFv2)
23395 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23397 /* With -mminimal-toc we may generate an extra use of r2 below. */
23398 if (!TARGET_SINGLE_PIC_BASE
23399 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23400 cfun->machine->r2_setup_needed = true;
23404 if (flag_stack_usage_info)
23405 current_function_static_stack_size = info->total_size;
23407 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23409 HOST_WIDE_INT size = info->total_size;
23411 if (crtl->is_leaf && !cfun->calls_alloca)
23413 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23414 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23415 size - STACK_CHECK_PROTECT);
23417 else if (size > 0)
23418 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23421 if (TARGET_FIX_AND_CONTINUE)
23423 /* gdb on darwin arranges to forward a function from the old
23424 address by modifying the first 5 instructions of the function
23425 to branch to the overriding function. This is necessary to
23426 permit function pointers that point to the old function to
23427 actually forward to the new function. */
23428 emit_insn (gen_nop ());
23429 emit_insn (gen_nop ());
23430 emit_insn (gen_nop ());
23431 emit_insn (gen_nop ());
23432 emit_insn (gen_nop ());
23435 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23437 reg_mode = V2SImode;
23438 reg_size = 8;
23441 /* Handle world saves specially here. */
23442 if (WORLD_SAVE_P (info))
23444 int i, j, sz;
23445 rtx treg;
23446 rtvec p;
23447 rtx reg0;
23449 /* save_world expects lr in r0. */
23450 reg0 = gen_rtx_REG (Pmode, 0);
23451 if (info->lr_save_p)
23453 insn = emit_move_insn (reg0,
23454 gen_rtx_REG (Pmode, LR_REGNO));
23455 RTX_FRAME_RELATED_P (insn) = 1;
23458 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23459 assumptions about the offsets of various bits of the stack
23460 frame. */
23461 gcc_assert (info->gp_save_offset == -220
23462 && info->fp_save_offset == -144
23463 && info->lr_save_offset == 8
23464 && info->cr_save_offset == 4
23465 && info->push_p
23466 && info->lr_save_p
23467 && (!crtl->calls_eh_return
23468 || info->ehrd_offset == -432)
23469 && info->vrsave_save_offset == -224
23470 && info->altivec_save_offset == -416);
23472 treg = gen_rtx_REG (SImode, 11);
23473 emit_move_insn (treg, GEN_INT (-info->total_size));
23475 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23476 in R11. It also clobbers R12, so beware! */
23478 /* Preserve CR2 for save_world prologues */
23479 sz = 5;
23480 sz += 32 - info->first_gp_reg_save;
23481 sz += 64 - info->first_fp_reg_save;
23482 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23483 p = rtvec_alloc (sz);
23484 j = 0;
23485 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23486 gen_rtx_REG (SImode,
23487 LR_REGNO));
23488 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23489 gen_rtx_SYMBOL_REF (Pmode,
23490 "*save_world"));
23491 /* We do floats first so that the instruction pattern matches
23492 properly. */
23493 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23494 RTVEC_ELT (p, j++)
23495 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23496 ? DFmode : SFmode,
23497 info->first_fp_reg_save + i),
23498 frame_reg_rtx,
23499 info->fp_save_offset + frame_off + 8 * i);
23500 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23501 RTVEC_ELT (p, j++)
23502 = gen_frame_store (gen_rtx_REG (V4SImode,
23503 info->first_altivec_reg_save + i),
23504 frame_reg_rtx,
23505 info->altivec_save_offset + frame_off + 16 * i);
23506 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23507 RTVEC_ELT (p, j++)
23508 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23509 frame_reg_rtx,
23510 info->gp_save_offset + frame_off + reg_size * i);
23512 /* CR register traditionally saved as CR2. */
23513 RTVEC_ELT (p, j++)
23514 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23515 frame_reg_rtx, info->cr_save_offset + frame_off);
23516 /* Explain about use of R0. */
23517 if (info->lr_save_p)
23518 RTVEC_ELT (p, j++)
23519 = gen_frame_store (reg0,
23520 frame_reg_rtx, info->lr_save_offset + frame_off);
23521 /* Explain what happens to the stack pointer. */
23523 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23524 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23527 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23528 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23529 treg, GEN_INT (-info->total_size), NULL_RTX);
23530 sp_off = frame_off = info->total_size;
23533 strategy = info->savres_strategy;
23535 /* For V.4, update stack before we do any saving and set back pointer. */
23536 if (! WORLD_SAVE_P (info)
23537 && info->push_p
23538 && (DEFAULT_ABI == ABI_V4
23539 || crtl->calls_eh_return))
23541 bool need_r11 = (TARGET_SPE
23542 ? (!(strategy & SAVE_INLINE_GPRS)
23543 && info->spe_64bit_regs_used == 0)
23544 : (!(strategy & SAVE_INLINE_FPRS)
23545 || !(strategy & SAVE_INLINE_GPRS)
23546 || !(strategy & SAVE_INLINE_VRS)));
23547 int ptr_regno = -1;
23548 rtx ptr_reg = NULL_RTX;
23549 int ptr_off = 0;
23551 if (info->total_size < 32767)
23552 frame_off = info->total_size;
23553 else if (need_r11)
23554 ptr_regno = 11;
23555 else if (info->cr_save_p
23556 || info->lr_save_p
23557 || info->first_fp_reg_save < 64
23558 || info->first_gp_reg_save < 32
23559 || info->altivec_size != 0
23560 || info->vrsave_mask != 0
23561 || crtl->calls_eh_return)
23562 ptr_regno = 12;
23563 else
23565 /* The prologue won't be saving any regs so there is no need
23566 to set up a frame register to access any frame save area.
23567 We also won't be using frame_off anywhere below, but set
23568 the correct value anyway to protect against future
23569 changes to this function. */
23570 frame_off = info->total_size;
23572 if (ptr_regno != -1)
23574 /* Set up the frame offset to that needed by the first
23575 out-of-line save function. */
23576 START_USE (ptr_regno);
23577 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23578 frame_reg_rtx = ptr_reg;
23579 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23580 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23581 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23582 ptr_off = info->gp_save_offset + info->gp_size;
23583 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23584 ptr_off = info->altivec_save_offset + info->altivec_size;
23585 frame_off = -ptr_off;
23587 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23588 sp_off = info->total_size;
23589 if (frame_reg_rtx != sp_reg_rtx)
23590 rs6000_emit_stack_tie (frame_reg_rtx, false);
23593 /* If we use the link register, get it into r0. */
23594 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23596 rtx addr, reg, mem;
23598 reg = gen_rtx_REG (Pmode, 0);
23599 START_USE (0);
23600 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23601 RTX_FRAME_RELATED_P (insn) = 1;
23603 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23604 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23606 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23607 GEN_INT (info->lr_save_offset + frame_off));
23608 mem = gen_rtx_MEM (Pmode, addr);
23609 /* This should not be of rs6000_sr_alias_set, because of
23610 __builtin_return_address. */
23612 insn = emit_move_insn (mem, reg);
23613 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23614 NULL_RTX, NULL_RTX, NULL_RTX);
23615 END_USE (0);
23619 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23620 r12 will be needed by out-of-line gpr restore. */
23621 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23622 && !(strategy & (SAVE_INLINE_GPRS
23623 | SAVE_NOINLINE_GPRS_SAVES_LR))
23624 ? 11 : 12);
23625 if (!WORLD_SAVE_P (info)
23626 && info->cr_save_p
23627 && REGNO (frame_reg_rtx) != cr_save_regno
23628 && !(using_static_chain_p && cr_save_regno == 11))
23630 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23631 START_USE (cr_save_regno);
23632 rs6000_emit_move_from_cr (cr_save_rtx);
23635 /* Do any required saving of fpr's. If only one or two to save, do
23636 it ourselves. Otherwise, call function. */
23637 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23639 int i;
23640 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23641 if (save_reg_p (info->first_fp_reg_save + i))
23642 emit_frame_save (frame_reg_rtx,
23643 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23644 ? DFmode : SFmode),
23645 info->first_fp_reg_save + i,
23646 info->fp_save_offset + frame_off + 8 * i,
23647 sp_off - frame_off);
23649 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23651 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23652 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23653 unsigned ptr_regno = ptr_regno_for_savres (sel);
23654 rtx ptr_reg = frame_reg_rtx;
23656 if (REGNO (frame_reg_rtx) == ptr_regno)
23657 gcc_checking_assert (frame_off == 0);
23658 else
23660 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23661 NOT_INUSE (ptr_regno);
23662 emit_insn (gen_add3_insn (ptr_reg,
23663 frame_reg_rtx, GEN_INT (frame_off)));
23665 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23666 info->fp_save_offset,
23667 info->lr_save_offset,
23668 DFmode, sel);
23669 rs6000_frame_related (insn, ptr_reg, sp_off,
23670 NULL_RTX, NULL_RTX, NULL_RTX);
23671 if (lr)
23672 END_USE (0);
23675 /* Save GPRs. This is done as a PARALLEL if we are using
23676 the store-multiple instructions. */
23677 if (!WORLD_SAVE_P (info)
23678 && TARGET_SPE_ABI
23679 && info->spe_64bit_regs_used != 0
23680 && info->first_gp_reg_save != 32)
23682 int i;
23683 rtx spe_save_area_ptr;
23684 HOST_WIDE_INT save_off;
23685 int ool_adjust = 0;
23687 /* Determine whether we can address all of the registers that need
23688 to be saved with an offset from frame_reg_rtx that fits in
23689 the small const field for SPE memory instructions. */
23690 int spe_regs_addressable
23691 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23692 + reg_size * (32 - info->first_gp_reg_save - 1))
23693 && (strategy & SAVE_INLINE_GPRS));
23695 if (spe_regs_addressable)
23697 spe_save_area_ptr = frame_reg_rtx;
23698 save_off = frame_off;
23700 else
23702 /* Make r11 point to the start of the SPE save area. We need
23703 to be careful here if r11 is holding the static chain. If
23704 it is, then temporarily save it in r0. */
23705 HOST_WIDE_INT offset;
23707 if (!(strategy & SAVE_INLINE_GPRS))
23708 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23709 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23710 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23711 save_off = frame_off - offset;
23713 if (using_static_chain_p)
23715 rtx r0 = gen_rtx_REG (Pmode, 0);
23717 START_USE (0);
23718 gcc_assert (info->first_gp_reg_save > 11);
23720 emit_move_insn (r0, spe_save_area_ptr);
23722 else if (REGNO (frame_reg_rtx) != 11)
23723 START_USE (11);
23725 emit_insn (gen_addsi3 (spe_save_area_ptr,
23726 frame_reg_rtx, GEN_INT (offset)));
23727 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23728 frame_off = -info->spe_gp_save_offset + ool_adjust;
23731 if ((strategy & SAVE_INLINE_GPRS))
23733 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23734 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23735 emit_frame_save (spe_save_area_ptr, reg_mode,
23736 info->first_gp_reg_save + i,
23737 (info->spe_gp_save_offset + save_off
23738 + reg_size * i),
23739 sp_off - save_off);
23741 else
23743 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23744 info->spe_gp_save_offset + save_off,
23745 0, reg_mode,
23746 SAVRES_SAVE | SAVRES_GPR);
23748 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23749 NULL_RTX, NULL_RTX, NULL_RTX);
23752 /* Move the static chain pointer back. */
23753 if (!spe_regs_addressable)
23755 if (using_static_chain_p)
23757 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23758 END_USE (0);
23760 else if (REGNO (frame_reg_rtx) != 11)
23761 END_USE (11);
23764 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23766 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23767 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23768 unsigned ptr_regno = ptr_regno_for_savres (sel);
23769 rtx ptr_reg = frame_reg_rtx;
23770 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23771 int end_save = info->gp_save_offset + info->gp_size;
23772 int ptr_off;
23774 if (!ptr_set_up)
23775 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23777 /* Need to adjust r11 (r12) if we saved any FPRs. */
23778 if (end_save + frame_off != 0)
23780 rtx offset = GEN_INT (end_save + frame_off);
23782 if (ptr_set_up)
23783 frame_off = -end_save;
23784 else
23785 NOT_INUSE (ptr_regno);
23786 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23788 else if (!ptr_set_up)
23790 NOT_INUSE (ptr_regno);
23791 emit_move_insn (ptr_reg, frame_reg_rtx);
23793 ptr_off = -end_save;
23794 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23795 info->gp_save_offset + ptr_off,
23796 info->lr_save_offset + ptr_off,
23797 reg_mode, sel);
23798 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23799 NULL_RTX, NULL_RTX, NULL_RTX);
23800 if (lr)
23801 END_USE (0);
23803 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23805 rtvec p;
23806 int i;
23807 p = rtvec_alloc (32 - info->first_gp_reg_save);
23808 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23809 RTVEC_ELT (p, i)
23810 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23811 frame_reg_rtx,
23812 info->gp_save_offset + frame_off + reg_size * i);
23813 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23814 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23815 NULL_RTX, NULL_RTX, NULL_RTX);
23817 else if (!WORLD_SAVE_P (info))
23819 int i;
23820 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23821 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23822 emit_frame_save (frame_reg_rtx, reg_mode,
23823 info->first_gp_reg_save + i,
23824 info->gp_save_offset + frame_off + reg_size * i,
23825 sp_off - frame_off);
23828 if (crtl->calls_eh_return)
23830 unsigned int i;
23831 rtvec p;
23833 for (i = 0; ; ++i)
23835 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23836 if (regno == INVALID_REGNUM)
23837 break;
23840 p = rtvec_alloc (i);
23842 for (i = 0; ; ++i)
23844 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23845 if (regno == INVALID_REGNUM)
23846 break;
23848 insn
23849 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23850 sp_reg_rtx,
23851 info->ehrd_offset + sp_off + reg_size * (int) i);
23852 RTVEC_ELT (p, i) = insn;
23853 RTX_FRAME_RELATED_P (insn) = 1;
23856 insn = emit_insn (gen_blockage ());
23857 RTX_FRAME_RELATED_P (insn) = 1;
23858 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23861 /* In AIX ABI we need to make sure r2 is really saved. */
23862 if (TARGET_AIX && crtl->calls_eh_return)
23864 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23865 rtx save_insn, join_insn, note;
23866 long toc_restore_insn;
23868 tmp_reg = gen_rtx_REG (Pmode, 11);
23869 tmp_reg_si = gen_rtx_REG (SImode, 11);
23870 if (using_static_chain_p)
23872 START_USE (0);
23873 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23875 else
23876 START_USE (11);
23877 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23878 /* Peek at instruction to which this function returns. If it's
23879 restoring r2, then we know we've already saved r2. We can't
23880 unconditionally save r2 because the value we have will already
23881 be updated if we arrived at this function via a plt call or
23882 toc adjusting stub. */
23883 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23884 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23885 + RS6000_TOC_SAVE_SLOT);
23886 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23887 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23888 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23889 validate_condition_mode (EQ, CCUNSmode);
23890 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23891 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23892 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23893 toc_save_done = gen_label_rtx ();
23894 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23895 gen_rtx_EQ (VOIDmode, compare_result,
23896 const0_rtx),
23897 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23898 pc_rtx);
23899 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23900 JUMP_LABEL (jump) = toc_save_done;
23901 LABEL_NUSES (toc_save_done) += 1;
23903 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23904 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23905 sp_off - frame_off);
23907 emit_label (toc_save_done);
23909 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23910 have a CFG that has different saves along different paths.
23911 Move the note to a dummy blockage insn, which describes that
23912 R2 is unconditionally saved after the label. */
23913 /* ??? An alternate representation might be a special insn pattern
23914 containing both the branch and the store. That might let the
23915 code that minimizes the number of DW_CFA_advance opcodes better
23916 freedom in placing the annotations. */
23917 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23918 if (note)
23919 remove_note (save_insn, note);
23920 else
23921 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23922 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23923 RTX_FRAME_RELATED_P (save_insn) = 0;
23925 join_insn = emit_insn (gen_blockage ());
23926 REG_NOTES (join_insn) = note;
23927 RTX_FRAME_RELATED_P (join_insn) = 1;
23929 if (using_static_chain_p)
23931 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23932 END_USE (0);
23934 else
23935 END_USE (11);
23938 /* Save CR if we use any that must be preserved. */
23939 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23941 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23942 GEN_INT (info->cr_save_offset + frame_off));
23943 rtx mem = gen_frame_mem (SImode, addr);
23945 /* If we didn't copy cr before, do so now using r0. */
23946 if (cr_save_rtx == NULL_RTX)
23948 START_USE (0);
23949 cr_save_rtx = gen_rtx_REG (SImode, 0);
23950 rs6000_emit_move_from_cr (cr_save_rtx);
23953 /* Saving CR requires a two-instruction sequence: one instruction
23954 to move the CR to a general-purpose register, and a second
23955 instruction that stores the GPR to memory.
23957 We do not emit any DWARF CFI records for the first of these,
23958 because we cannot properly represent the fact that CR is saved in
23959 a register. One reason is that we cannot express that multiple
23960 CR fields are saved; another reason is that on 64-bit, the size
23961 of the CR register in DWARF (4 bytes) differs from the size of
23962 a general-purpose register.
23964 This means if any intervening instruction were to clobber one of
23965 the call-saved CR fields, we'd have incorrect CFI. To prevent
23966 this from happening, we mark the store to memory as a use of
23967 those CR fields, which prevents any such instruction from being
23968 scheduled in between the two instructions. */
23969 rtx crsave_v[9];
23970 int n_crsave = 0;
23971 int i;
23973 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23974 for (i = 0; i < 8; i++)
23975 if (save_reg_p (CR0_REGNO + i))
23976 crsave_v[n_crsave++]
23977 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23979 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23980 gen_rtvec_v (n_crsave, crsave_v)));
23981 END_USE (REGNO (cr_save_rtx));
23983 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23984 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23985 so we need to construct a frame expression manually. */
23986 RTX_FRAME_RELATED_P (insn) = 1;
23988 /* Update address to be stack-pointer relative, like
23989 rs6000_frame_related would do. */
23990 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23991 GEN_INT (info->cr_save_offset + sp_off));
23992 mem = gen_frame_mem (SImode, addr);
23994 if (DEFAULT_ABI == ABI_ELFv2)
23996 /* In the ELFv2 ABI we generate separate CFI records for each
23997 CR field that was actually saved. They all point to the
23998 same 32-bit stack slot. */
23999 rtx crframe[8];
24000 int n_crframe = 0;
24002 for (i = 0; i < 8; i++)
24003 if (save_reg_p (CR0_REGNO + i))
24005 crframe[n_crframe]
24006 = gen_rtx_SET (VOIDmode, mem,
24007 gen_rtx_REG (SImode, CR0_REGNO + i));
24009 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
24010 n_crframe++;
24013 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24014 gen_rtx_PARALLEL (VOIDmode,
24015 gen_rtvec_v (n_crframe, crframe)));
24017 else
24019 /* In other ABIs, by convention, we use a single CR regnum to
24020 represent the fact that all call-saved CR fields are saved.
24021 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
24022 rtx set = gen_rtx_SET (VOIDmode, mem,
24023 gen_rtx_REG (SImode, CR2_REGNO));
24024 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
24028 /* In the ELFv2 ABI we need to save all call-saved CR fields into
24029 *separate* slots if the routine calls __builtin_eh_return, so
24030 that they can be independently restored by the unwinder. */
24031 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24033 int i, cr_off = info->ehcr_offset;
24034 rtx crsave;
24036 /* ??? We might get better performance by using multiple mfocrf
24037 instructions. */
24038 crsave = gen_rtx_REG (SImode, 0);
24039 emit_insn (gen_movesi_from_cr (crsave));
24041 for (i = 0; i < 8; i++)
24042 if (!call_used_regs[CR0_REGNO + i])
24044 rtvec p = rtvec_alloc (2);
24045 RTVEC_ELT (p, 0)
24046 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
24047 RTVEC_ELT (p, 1)
24048 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24050 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24052 RTX_FRAME_RELATED_P (insn) = 1;
24053 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24054 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
24055 sp_reg_rtx, cr_off + sp_off));
24057 cr_off += reg_size;
24061 /* Update stack and set back pointer unless this is V.4,
24062 for which it was done previously. */
24063 if (!WORLD_SAVE_P (info) && info->push_p
24064 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
24066 rtx ptr_reg = NULL;
24067 int ptr_off = 0;
24069 /* If saving altivec regs we need to be able to address all save
24070 locations using a 16-bit offset. */
24071 if ((strategy & SAVE_INLINE_VRS) == 0
24072 || (info->altivec_size != 0
24073 && (info->altivec_save_offset + info->altivec_size - 16
24074 + info->total_size - frame_off) > 32767)
24075 || (info->vrsave_size != 0
24076 && (info->vrsave_save_offset
24077 + info->total_size - frame_off) > 32767))
24079 int sel = SAVRES_SAVE | SAVRES_VR;
24080 unsigned ptr_regno = ptr_regno_for_savres (sel);
24082 if (using_static_chain_p
24083 && ptr_regno == STATIC_CHAIN_REGNUM)
24084 ptr_regno = 12;
24085 if (REGNO (frame_reg_rtx) != ptr_regno)
24086 START_USE (ptr_regno);
24087 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24088 frame_reg_rtx = ptr_reg;
24089 ptr_off = info->altivec_save_offset + info->altivec_size;
24090 frame_off = -ptr_off;
24092 else if (REGNO (frame_reg_rtx) == 1)
24093 frame_off = info->total_size;
24094 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
24095 sp_off = info->total_size;
24096 if (frame_reg_rtx != sp_reg_rtx)
24097 rs6000_emit_stack_tie (frame_reg_rtx, false);
24100 /* Set frame pointer, if needed. */
24101 if (frame_pointer_needed)
24103 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
24104 sp_reg_rtx);
24105 RTX_FRAME_RELATED_P (insn) = 1;
24108 /* Save AltiVec registers if needed. Save here because the red zone does
24109 not always include AltiVec registers. */
24110 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24111 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
24113 int end_save = info->altivec_save_offset + info->altivec_size;
24114 int ptr_off;
24115 /* Oddly, the vector save/restore functions point r0 at the end
24116 of the save area, then use r11 or r12 to load offsets for
24117 [reg+reg] addressing. */
24118 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24119 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24120 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24122 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24123 NOT_INUSE (0);
24124 if (end_save + frame_off != 0)
24126 rtx offset = GEN_INT (end_save + frame_off);
24128 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24130 else
24131 emit_move_insn (ptr_reg, frame_reg_rtx);
24133 ptr_off = -end_save;
24134 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24135 info->altivec_save_offset + ptr_off,
24136 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24137 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24138 NULL_RTX, NULL_RTX, NULL_RTX);
24139 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24141 /* The oddity mentioned above clobbered our frame reg. */
24142 emit_move_insn (frame_reg_rtx, ptr_reg);
24143 frame_off = ptr_off;
24146 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24147 && info->altivec_size != 0)
24149 int i;
24151 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24152 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24154 rtx areg, savereg, mem, split_reg;
24155 int offset;
24157 offset = (info->altivec_save_offset + frame_off
24158 + 16 * (i - info->first_altivec_reg_save));
24160 savereg = gen_rtx_REG (V4SImode, i);
24162 NOT_INUSE (0);
24163 areg = gen_rtx_REG (Pmode, 0);
24164 emit_move_insn (areg, GEN_INT (offset));
24166 /* AltiVec addressing mode is [reg+reg]. */
24167 mem = gen_frame_mem (V4SImode,
24168 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24170 insn = emit_move_insn (mem, savereg);
24172 /* When we split a VSX store into two insns, we need to make
24173 sure the DWARF info knows which register we are storing.
24174 Pass it in to be used on the appropriate note. */
24175 if (!BYTES_BIG_ENDIAN
24176 && GET_CODE (PATTERN (insn)) == SET
24177 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
24178 split_reg = savereg;
24179 else
24180 split_reg = NULL_RTX;
24182 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24183 areg, GEN_INT (offset), split_reg);
24187 /* VRSAVE is a bit vector representing which AltiVec registers
24188 are used. The OS uses this to determine which vector
24189 registers to save on a context switch. We need to save
24190 VRSAVE on the stack frame, add whatever AltiVec registers we
24191 used in this function, and do the corresponding magic in the
24192 epilogue. */
24194 if (!WORLD_SAVE_P (info)
24195 && TARGET_ALTIVEC
24196 && TARGET_ALTIVEC_VRSAVE
24197 && info->vrsave_mask != 0)
24199 rtx reg, vrsave;
24200 int offset;
24201 int save_regno;
24203 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24204 be using r12 as frame_reg_rtx and r11 as the static chain
24205 pointer for nested functions. */
24206 save_regno = 12;
24207 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24208 && !using_static_chain_p)
24209 save_regno = 11;
24210 else if (REGNO (frame_reg_rtx) == 12)
24212 save_regno = 11;
24213 if (using_static_chain_p)
24214 save_regno = 0;
24217 NOT_INUSE (save_regno);
24218 reg = gen_rtx_REG (SImode, save_regno);
24219 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24220 if (TARGET_MACHO)
24221 emit_insn (gen_get_vrsave_internal (reg));
24222 else
24223 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24225 /* Save VRSAVE. */
24226 offset = info->vrsave_save_offset + frame_off;
24227 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24229 /* Include the registers in the mask. */
24230 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24232 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24235 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24236 if (!TARGET_SINGLE_PIC_BASE
24237 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24238 || (DEFAULT_ABI == ABI_V4
24239 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24240 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24242 /* If emit_load_toc_table will use the link register, we need to save
24243 it. We use R12 for this purpose because emit_load_toc_table
24244 can use register 0. This allows us to use a plain 'blr' to return
24245 from the procedure more often. */
24246 int save_LR_around_toc_setup = (TARGET_ELF
24247 && DEFAULT_ABI == ABI_V4
24248 && flag_pic
24249 && ! info->lr_save_p
24250 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24251 if (save_LR_around_toc_setup)
24253 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24254 rtx tmp = gen_rtx_REG (Pmode, 12);
24256 insn = emit_move_insn (tmp, lr);
24257 RTX_FRAME_RELATED_P (insn) = 1;
24259 rs6000_emit_load_toc_table (TRUE);
24261 insn = emit_move_insn (lr, tmp);
24262 add_reg_note (insn, REG_CFA_RESTORE, lr);
24263 RTX_FRAME_RELATED_P (insn) = 1;
24265 else
24266 rs6000_emit_load_toc_table (TRUE);
24269 #if TARGET_MACHO
24270 if (!TARGET_SINGLE_PIC_BASE
24271 && DEFAULT_ABI == ABI_DARWIN
24272 && flag_pic && crtl->uses_pic_offset_table)
24274 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24275 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24277 /* Save and restore LR locally around this call (in R0). */
24278 if (!info->lr_save_p)
24279 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24281 emit_insn (gen_load_macho_picbase (src));
24283 emit_move_insn (gen_rtx_REG (Pmode,
24284 RS6000_PIC_OFFSET_TABLE_REGNUM),
24285 lr);
24287 if (!info->lr_save_p)
24288 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24290 #endif
24292 /* If we need to, save the TOC register after doing the stack setup.
24293 Do not emit eh frame info for this save. The unwinder wants info,
24294 conceptually attached to instructions in this function, about
24295 register values in the caller of this function. This R2 may have
24296 already been changed from the value in the caller.
24297 We don't attempt to write accurate DWARF EH frame info for R2
24298 because code emitted by gcc for a (non-pointer) function call
24299 doesn't save and restore R2. Instead, R2 is managed out-of-line
24300 by a linker generated plt call stub when the function resides in
24301 a shared library. This behaviour is costly to describe in DWARF,
24302 both in terms of the size of DWARF info and the time taken in the
24303 unwinder to interpret it. R2 changes, apart from the
24304 calls_eh_return case earlier in this function, are handled by
24305 linux-unwind.h frob_update_context. */
24306 if (rs6000_save_toc_in_prologue_p ())
24308 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24309 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24313 /* Output .extern statements for the save/restore routines we use. */
24315 static void
24316 rs6000_output_savres_externs (FILE *file)
24318 rs6000_stack_t *info = rs6000_stack_info ();
24320 if (TARGET_DEBUG_STACK)
24321 debug_stack_info (info);
24323 /* Write .extern for any function we will call to save and restore
24324 fp values. */
24325 if (info->first_fp_reg_save < 64
24326 && !TARGET_MACHO
24327 && !TARGET_ELF)
24329 char *name;
24330 int regno = info->first_fp_reg_save - 32;
24332 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24334 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24335 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24336 name = rs6000_savres_routine_name (info, regno, sel);
24337 fprintf (file, "\t.extern %s\n", name);
24339 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24341 bool lr = (info->savres_strategy
24342 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24343 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24344 name = rs6000_savres_routine_name (info, regno, sel);
24345 fprintf (file, "\t.extern %s\n", name);
24350 /* Write function prologue. */
24352 static void
24353 rs6000_output_function_prologue (FILE *file,
24354 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24356 if (!cfun->is_thunk)
24357 rs6000_output_savres_externs (file);
24359 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24360 immediately after the global entry point label. */
24361 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24363 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24365 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24366 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24368 fputs ("\t.localentry\t", file);
24369 assemble_name (file, name);
24370 fputs (",.-", file);
24371 assemble_name (file, name);
24372 fputs ("\n", file);
24375 /* Output -mprofile-kernel code. This needs to be done here instead of
24376 in output_function_profile since it must go after the ELFv2 ABI
24377 local entry point. */
24378 if (TARGET_PROFILE_KERNEL && crtl->profile)
24380 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24381 gcc_assert (!TARGET_32BIT);
24383 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24384 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24386 /* In the ELFv2 ABI we have no compiler stack word. It must be
24387 the resposibility of _mcount to preserve the static chain
24388 register if required. */
24389 if (DEFAULT_ABI != ABI_ELFv2
24390 && cfun->static_chain_decl != NULL)
24392 asm_fprintf (file, "\tstd %s,24(%s)\n",
24393 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24394 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24395 asm_fprintf (file, "\tld %s,24(%s)\n",
24396 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24398 else
24399 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24402 rs6000_pic_labelno++;
24405 /* Non-zero if vmx regs are restored before the frame pop, zero if
24406 we restore after the pop when possible. */
24407 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24409 /* Restoring cr is a two step process: loading a reg from the frame
24410 save, then moving the reg to cr. For ABI_V4 we must let the
24411 unwinder know that the stack location is no longer valid at or
24412 before the stack deallocation, but we can't emit a cfa_restore for
24413 cr at the stack deallocation like we do for other registers.
24414 The trouble is that it is possible for the move to cr to be
24415 scheduled after the stack deallocation. So say exactly where cr
24416 is located on each of the two insns. */
24418 static rtx
24419 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24421 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24422 rtx reg = gen_rtx_REG (SImode, regno);
24423 rtx_insn *insn = emit_move_insn (reg, mem);
24425 if (!exit_func && DEFAULT_ABI == ABI_V4)
24427 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24428 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24430 add_reg_note (insn, REG_CFA_REGISTER, set);
24431 RTX_FRAME_RELATED_P (insn) = 1;
24433 return reg;
24436 /* Reload CR from REG. */
24438 static void
24439 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24441 int count = 0;
24442 int i;
24444 if (using_mfcr_multiple)
24446 for (i = 0; i < 8; i++)
24447 if (save_reg_p (CR0_REGNO + i))
24448 count++;
24449 gcc_assert (count);
24452 if (using_mfcr_multiple && count > 1)
24454 rtx_insn *insn;
24455 rtvec p;
24456 int ndx;
24458 p = rtvec_alloc (count);
24460 ndx = 0;
24461 for (i = 0; i < 8; i++)
24462 if (save_reg_p (CR0_REGNO + i))
24464 rtvec r = rtvec_alloc (2);
24465 RTVEC_ELT (r, 0) = reg;
24466 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24467 RTVEC_ELT (p, ndx) =
24468 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24469 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24470 ndx++;
24472 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24473 gcc_assert (ndx == count);
24475 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24476 CR field separately. */
24477 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24479 for (i = 0; i < 8; i++)
24480 if (save_reg_p (CR0_REGNO + i))
24481 add_reg_note (insn, REG_CFA_RESTORE,
24482 gen_rtx_REG (SImode, CR0_REGNO + i));
24484 RTX_FRAME_RELATED_P (insn) = 1;
24487 else
24488 for (i = 0; i < 8; i++)
24489 if (save_reg_p (CR0_REGNO + i))
24491 rtx insn = emit_insn (gen_movsi_to_cr_one
24492 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24494 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24495 CR field separately, attached to the insn that in fact
24496 restores this particular CR field. */
24497 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24499 add_reg_note (insn, REG_CFA_RESTORE,
24500 gen_rtx_REG (SImode, CR0_REGNO + i));
24502 RTX_FRAME_RELATED_P (insn) = 1;
24506 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24507 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24508 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24510 rtx_insn *insn = get_last_insn ();
24511 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24513 add_reg_note (insn, REG_CFA_RESTORE, cr);
24514 RTX_FRAME_RELATED_P (insn) = 1;
24518 /* Like cr, the move to lr instruction can be scheduled after the
24519 stack deallocation, but unlike cr, its stack frame save is still
24520 valid. So we only need to emit the cfa_restore on the correct
24521 instruction. */
24523 static void
24524 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24526 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24527 rtx reg = gen_rtx_REG (Pmode, regno);
24529 emit_move_insn (reg, mem);
24532 static void
24533 restore_saved_lr (int regno, bool exit_func)
24535 rtx reg = gen_rtx_REG (Pmode, regno);
24536 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24537 rtx_insn *insn = emit_move_insn (lr, reg);
24539 if (!exit_func && flag_shrink_wrap)
24541 add_reg_note (insn, REG_CFA_RESTORE, lr);
24542 RTX_FRAME_RELATED_P (insn) = 1;
24546 static rtx
24547 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24549 if (DEFAULT_ABI == ABI_ELFv2)
24551 int i;
24552 for (i = 0; i < 8; i++)
24553 if (save_reg_p (CR0_REGNO + i))
24555 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24556 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24557 cfa_restores);
24560 else if (info->cr_save_p)
24561 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24562 gen_rtx_REG (SImode, CR2_REGNO),
24563 cfa_restores);
24565 if (info->lr_save_p)
24566 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24567 gen_rtx_REG (Pmode, LR_REGNO),
24568 cfa_restores);
24569 return cfa_restores;
24572 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24573 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24574 below stack pointer not cloberred by signals. */
24576 static inline bool
24577 offset_below_red_zone_p (HOST_WIDE_INT offset)
24579 return offset < (DEFAULT_ABI == ABI_V4
24581 : TARGET_32BIT ? -220 : -288);
24584 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24586 static void
24587 emit_cfa_restores (rtx cfa_restores)
24589 rtx_insn *insn = get_last_insn ();
24590 rtx *loc = &REG_NOTES (insn);
24592 while (*loc)
24593 loc = &XEXP (*loc, 1);
24594 *loc = cfa_restores;
24595 RTX_FRAME_RELATED_P (insn) = 1;
24598 /* Emit function epilogue as insns. */
24600 void
24601 rs6000_emit_epilogue (int sibcall)
24603 rs6000_stack_t *info;
24604 int restoring_GPRs_inline;
24605 int restoring_FPRs_inline;
24606 int using_load_multiple;
24607 int using_mtcr_multiple;
24608 int use_backchain_to_restore_sp;
24609 int restore_lr;
24610 int strategy;
24611 HOST_WIDE_INT frame_off = 0;
24612 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24613 rtx frame_reg_rtx = sp_reg_rtx;
24614 rtx cfa_restores = NULL_RTX;
24615 rtx insn;
24616 rtx cr_save_reg = NULL_RTX;
24617 machine_mode reg_mode = Pmode;
24618 int reg_size = TARGET_32BIT ? 4 : 8;
24619 int i;
24620 bool exit_func;
24621 unsigned ptr_regno;
24623 info = rs6000_stack_info ();
24625 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24627 reg_mode = V2SImode;
24628 reg_size = 8;
24631 strategy = info->savres_strategy;
24632 using_load_multiple = strategy & SAVRES_MULTIPLE;
24633 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24634 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24635 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24636 || rs6000_cpu == PROCESSOR_PPC603
24637 || rs6000_cpu == PROCESSOR_PPC750
24638 || optimize_size);
24639 /* Restore via the backchain when we have a large frame, since this
24640 is more efficient than an addis, addi pair. The second condition
24641 here will not trigger at the moment; We don't actually need a
24642 frame pointer for alloca, but the generic parts of the compiler
24643 give us one anyway. */
24644 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24645 || (cfun->calls_alloca
24646 && !frame_pointer_needed));
24647 restore_lr = (info->lr_save_p
24648 && (restoring_FPRs_inline
24649 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24650 && (restoring_GPRs_inline
24651 || info->first_fp_reg_save < 64));
24653 if (WORLD_SAVE_P (info))
24655 int i, j;
24656 char rname[30];
24657 const char *alloc_rname;
24658 rtvec p;
24660 /* eh_rest_world_r10 will return to the location saved in the LR
24661 stack slot (which is not likely to be our caller.)
24662 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24663 rest_world is similar, except any R10 parameter is ignored.
24664 The exception-handling stuff that was here in 2.95 is no
24665 longer necessary. */
24667 p = rtvec_alloc (9
24669 + 32 - info->first_gp_reg_save
24670 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24671 + 63 + 1 - info->first_fp_reg_save);
24673 strcpy (rname, ((crtl->calls_eh_return) ?
24674 "*eh_rest_world_r10" : "*rest_world"));
24675 alloc_rname = ggc_strdup (rname);
24677 j = 0;
24678 RTVEC_ELT (p, j++) = ret_rtx;
24679 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24680 gen_rtx_REG (Pmode,
24681 LR_REGNO));
24682 RTVEC_ELT (p, j++)
24683 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24684 /* The instruction pattern requires a clobber here;
24685 it is shared with the restVEC helper. */
24686 RTVEC_ELT (p, j++)
24687 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24690 /* CR register traditionally saved as CR2. */
24691 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24692 RTVEC_ELT (p, j++)
24693 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24694 if (flag_shrink_wrap)
24696 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24697 gen_rtx_REG (Pmode, LR_REGNO),
24698 cfa_restores);
24699 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24703 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24705 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24706 RTVEC_ELT (p, j++)
24707 = gen_frame_load (reg,
24708 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24709 if (flag_shrink_wrap)
24710 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24712 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24714 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24715 RTVEC_ELT (p, j++)
24716 = gen_frame_load (reg,
24717 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24718 if (flag_shrink_wrap)
24719 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24721 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24723 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24724 ? DFmode : SFmode),
24725 info->first_fp_reg_save + i);
24726 RTVEC_ELT (p, j++)
24727 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24728 if (flag_shrink_wrap)
24729 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24731 RTVEC_ELT (p, j++)
24732 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24733 RTVEC_ELT (p, j++)
24734 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24735 RTVEC_ELT (p, j++)
24736 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24737 RTVEC_ELT (p, j++)
24738 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24739 RTVEC_ELT (p, j++)
24740 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24741 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24743 if (flag_shrink_wrap)
24745 REG_NOTES (insn) = cfa_restores;
24746 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24747 RTX_FRAME_RELATED_P (insn) = 1;
24749 return;
24752 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24753 if (info->push_p)
24754 frame_off = info->total_size;
24756 /* Restore AltiVec registers if we must do so before adjusting the
24757 stack. */
24758 if (TARGET_ALTIVEC_ABI
24759 && info->altivec_size != 0
24760 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24761 || (DEFAULT_ABI != ABI_V4
24762 && offset_below_red_zone_p (info->altivec_save_offset))))
24764 int i;
24765 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24767 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24768 if (use_backchain_to_restore_sp)
24770 int frame_regno = 11;
24772 if ((strategy & REST_INLINE_VRS) == 0)
24774 /* Of r11 and r12, select the one not clobbered by an
24775 out-of-line restore function for the frame register. */
24776 frame_regno = 11 + 12 - scratch_regno;
24778 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24779 emit_move_insn (frame_reg_rtx,
24780 gen_rtx_MEM (Pmode, sp_reg_rtx));
24781 frame_off = 0;
24783 else if (frame_pointer_needed)
24784 frame_reg_rtx = hard_frame_pointer_rtx;
24786 if ((strategy & REST_INLINE_VRS) == 0)
24788 int end_save = info->altivec_save_offset + info->altivec_size;
24789 int ptr_off;
24790 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24791 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24793 if (end_save + frame_off != 0)
24795 rtx offset = GEN_INT (end_save + frame_off);
24797 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24799 else
24800 emit_move_insn (ptr_reg, frame_reg_rtx);
24802 ptr_off = -end_save;
24803 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24804 info->altivec_save_offset + ptr_off,
24805 0, V4SImode, SAVRES_VR);
24807 else
24809 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24810 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24812 rtx addr, areg, mem, reg;
24814 areg = gen_rtx_REG (Pmode, 0);
24815 emit_move_insn
24816 (areg, GEN_INT (info->altivec_save_offset
24817 + frame_off
24818 + 16 * (i - info->first_altivec_reg_save)));
24820 /* AltiVec addressing mode is [reg+reg]. */
24821 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24822 mem = gen_frame_mem (V4SImode, addr);
24824 reg = gen_rtx_REG (V4SImode, i);
24825 emit_move_insn (reg, mem);
24829 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24830 if (((strategy & REST_INLINE_VRS) == 0
24831 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24832 && (flag_shrink_wrap
24833 || (offset_below_red_zone_p
24834 (info->altivec_save_offset
24835 + 16 * (i - info->first_altivec_reg_save)))))
24837 rtx reg = gen_rtx_REG (V4SImode, i);
24838 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24842 /* Restore VRSAVE if we must do so before adjusting the stack. */
24843 if (TARGET_ALTIVEC
24844 && TARGET_ALTIVEC_VRSAVE
24845 && info->vrsave_mask != 0
24846 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24847 || (DEFAULT_ABI != ABI_V4
24848 && offset_below_red_zone_p (info->vrsave_save_offset))))
24850 rtx reg;
24852 if (frame_reg_rtx == sp_reg_rtx)
24854 if (use_backchain_to_restore_sp)
24856 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24857 emit_move_insn (frame_reg_rtx,
24858 gen_rtx_MEM (Pmode, sp_reg_rtx));
24859 frame_off = 0;
24861 else if (frame_pointer_needed)
24862 frame_reg_rtx = hard_frame_pointer_rtx;
24865 reg = gen_rtx_REG (SImode, 12);
24866 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24867 info->vrsave_save_offset + frame_off));
24869 emit_insn (generate_set_vrsave (reg, info, 1));
24872 insn = NULL_RTX;
24873 /* If we have a large stack frame, restore the old stack pointer
24874 using the backchain. */
24875 if (use_backchain_to_restore_sp)
24877 if (frame_reg_rtx == sp_reg_rtx)
24879 /* Under V.4, don't reset the stack pointer until after we're done
24880 loading the saved registers. */
24881 if (DEFAULT_ABI == ABI_V4)
24882 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24884 insn = emit_move_insn (frame_reg_rtx,
24885 gen_rtx_MEM (Pmode, sp_reg_rtx));
24886 frame_off = 0;
24888 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24889 && DEFAULT_ABI == ABI_V4)
24890 /* frame_reg_rtx has been set up by the altivec restore. */
24892 else
24894 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24895 frame_reg_rtx = sp_reg_rtx;
24898 /* If we have a frame pointer, we can restore the old stack pointer
24899 from it. */
24900 else if (frame_pointer_needed)
24902 frame_reg_rtx = sp_reg_rtx;
24903 if (DEFAULT_ABI == ABI_V4)
24904 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24905 /* Prevent reordering memory accesses against stack pointer restore. */
24906 else if (cfun->calls_alloca
24907 || offset_below_red_zone_p (-info->total_size))
24908 rs6000_emit_stack_tie (frame_reg_rtx, true);
24910 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24911 GEN_INT (info->total_size)));
24912 frame_off = 0;
24914 else if (info->push_p
24915 && DEFAULT_ABI != ABI_V4
24916 && !crtl->calls_eh_return)
24918 /* Prevent reordering memory accesses against stack pointer restore. */
24919 if (cfun->calls_alloca
24920 || offset_below_red_zone_p (-info->total_size))
24921 rs6000_emit_stack_tie (frame_reg_rtx, false);
24922 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24923 GEN_INT (info->total_size)));
24924 frame_off = 0;
24926 if (insn && frame_reg_rtx == sp_reg_rtx)
24928 if (cfa_restores)
24930 REG_NOTES (insn) = cfa_restores;
24931 cfa_restores = NULL_RTX;
24933 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24934 RTX_FRAME_RELATED_P (insn) = 1;
24937 /* Restore AltiVec registers if we have not done so already. */
24938 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24939 && TARGET_ALTIVEC_ABI
24940 && info->altivec_size != 0
24941 && (DEFAULT_ABI == ABI_V4
24942 || !offset_below_red_zone_p (info->altivec_save_offset)))
24944 int i;
24946 if ((strategy & REST_INLINE_VRS) == 0)
24948 int end_save = info->altivec_save_offset + info->altivec_size;
24949 int ptr_off;
24950 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24951 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24952 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24954 if (end_save + frame_off != 0)
24956 rtx offset = GEN_INT (end_save + frame_off);
24958 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24960 else
24961 emit_move_insn (ptr_reg, frame_reg_rtx);
24963 ptr_off = -end_save;
24964 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24965 info->altivec_save_offset + ptr_off,
24966 0, V4SImode, SAVRES_VR);
24967 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24969 /* Frame reg was clobbered by out-of-line save. Restore it
24970 from ptr_reg, and if we are calling out-of-line gpr or
24971 fpr restore set up the correct pointer and offset. */
24972 unsigned newptr_regno = 1;
24973 if (!restoring_GPRs_inline)
24975 bool lr = info->gp_save_offset + info->gp_size == 0;
24976 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24977 newptr_regno = ptr_regno_for_savres (sel);
24978 end_save = info->gp_save_offset + info->gp_size;
24980 else if (!restoring_FPRs_inline)
24982 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24983 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24984 newptr_regno = ptr_regno_for_savres (sel);
24985 end_save = info->fp_save_offset + info->fp_size;
24988 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24989 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24991 if (end_save + ptr_off != 0)
24993 rtx offset = GEN_INT (end_save + ptr_off);
24995 frame_off = -end_save;
24996 if (TARGET_32BIT)
24997 emit_insn (gen_addsi3_carry (frame_reg_rtx,
24998 ptr_reg, offset));
24999 else
25000 emit_insn (gen_adddi3_carry (frame_reg_rtx,
25001 ptr_reg, offset));
25003 else
25005 frame_off = ptr_off;
25006 emit_move_insn (frame_reg_rtx, ptr_reg);
25010 else
25012 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25013 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25015 rtx addr, areg, mem, reg;
25017 areg = gen_rtx_REG (Pmode, 0);
25018 emit_move_insn
25019 (areg, GEN_INT (info->altivec_save_offset
25020 + frame_off
25021 + 16 * (i - info->first_altivec_reg_save)));
25023 /* AltiVec addressing mode is [reg+reg]. */
25024 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
25025 mem = gen_frame_mem (V4SImode, addr);
25027 reg = gen_rtx_REG (V4SImode, i);
25028 emit_move_insn (reg, mem);
25032 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25033 if (((strategy & REST_INLINE_VRS) == 0
25034 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25035 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25037 rtx reg = gen_rtx_REG (V4SImode, i);
25038 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25042 /* Restore VRSAVE if we have not done so already. */
25043 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25044 && TARGET_ALTIVEC
25045 && TARGET_ALTIVEC_VRSAVE
25046 && info->vrsave_mask != 0
25047 && (DEFAULT_ABI == ABI_V4
25048 || !offset_below_red_zone_p (info->vrsave_save_offset)))
25050 rtx reg;
25052 reg = gen_rtx_REG (SImode, 12);
25053 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25054 info->vrsave_save_offset + frame_off));
25056 emit_insn (generate_set_vrsave (reg, info, 1));
25059 /* If we exit by an out-of-line restore function on ABI_V4 then that
25060 function will deallocate the stack, so we don't need to worry
25061 about the unwinder restoring cr from an invalid stack frame
25062 location. */
25063 exit_func = (!restoring_FPRs_inline
25064 || (!restoring_GPRs_inline
25065 && info->first_fp_reg_save == 64));
25067 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
25068 *separate* slots if the routine calls __builtin_eh_return, so
25069 that they can be independently restored by the unwinder. */
25070 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25072 int i, cr_off = info->ehcr_offset;
25074 for (i = 0; i < 8; i++)
25075 if (!call_used_regs[CR0_REGNO + i])
25077 rtx reg = gen_rtx_REG (SImode, 0);
25078 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25079 cr_off + frame_off));
25081 insn = emit_insn (gen_movsi_to_cr_one
25082 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25084 if (!exit_func && flag_shrink_wrap)
25086 add_reg_note (insn, REG_CFA_RESTORE,
25087 gen_rtx_REG (SImode, CR0_REGNO + i));
25089 RTX_FRAME_RELATED_P (insn) = 1;
25092 cr_off += reg_size;
25096 /* Get the old lr if we saved it. If we are restoring registers
25097 out-of-line, then the out-of-line routines can do this for us. */
25098 if (restore_lr && restoring_GPRs_inline)
25099 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25101 /* Get the old cr if we saved it. */
25102 if (info->cr_save_p)
25104 unsigned cr_save_regno = 12;
25106 if (!restoring_GPRs_inline)
25108 /* Ensure we don't use the register used by the out-of-line
25109 gpr register restore below. */
25110 bool lr = info->gp_save_offset + info->gp_size == 0;
25111 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25112 int gpr_ptr_regno = ptr_regno_for_savres (sel);
25114 if (gpr_ptr_regno == 12)
25115 cr_save_regno = 11;
25116 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
25118 else if (REGNO (frame_reg_rtx) == 12)
25119 cr_save_regno = 11;
25121 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
25122 info->cr_save_offset + frame_off,
25123 exit_func);
25126 /* Set LR here to try to overlap restores below. */
25127 if (restore_lr && restoring_GPRs_inline)
25128 restore_saved_lr (0, exit_func);
25130 /* Load exception handler data registers, if needed. */
25131 if (crtl->calls_eh_return)
25133 unsigned int i, regno;
25135 if (TARGET_AIX)
25137 rtx reg = gen_rtx_REG (reg_mode, 2);
25138 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25139 frame_off + RS6000_TOC_SAVE_SLOT));
25142 for (i = 0; ; ++i)
25144 rtx mem;
25146 regno = EH_RETURN_DATA_REGNO (i);
25147 if (regno == INVALID_REGNUM)
25148 break;
25150 /* Note: possible use of r0 here to address SPE regs. */
25151 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25152 info->ehrd_offset + frame_off
25153 + reg_size * (int) i);
25155 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25159 /* Restore GPRs. This is done as a PARALLEL if we are using
25160 the load-multiple instructions. */
25161 if (TARGET_SPE_ABI
25162 && info->spe_64bit_regs_used
25163 && info->first_gp_reg_save != 32)
25165 /* Determine whether we can address all of the registers that need
25166 to be saved with an offset from frame_reg_rtx that fits in
25167 the small const field for SPE memory instructions. */
25168 int spe_regs_addressable
25169 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25170 + reg_size * (32 - info->first_gp_reg_save - 1))
25171 && restoring_GPRs_inline);
25173 if (!spe_regs_addressable)
25175 int ool_adjust = 0;
25176 rtx old_frame_reg_rtx = frame_reg_rtx;
25177 /* Make r11 point to the start of the SPE save area. We worried about
25178 not clobbering it when we were saving registers in the prologue.
25179 There's no need to worry here because the static chain is passed
25180 anew to every function. */
25182 if (!restoring_GPRs_inline)
25183 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25184 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25185 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25186 GEN_INT (info->spe_gp_save_offset
25187 + frame_off
25188 - ool_adjust)));
25189 /* Keep the invariant that frame_reg_rtx + frame_off points
25190 at the top of the stack frame. */
25191 frame_off = -info->spe_gp_save_offset + ool_adjust;
25194 if (restoring_GPRs_inline)
25196 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25198 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25199 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25201 rtx offset, addr, mem, reg;
25203 /* We're doing all this to ensure that the immediate offset
25204 fits into the immediate field of 'evldd'. */
25205 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25207 offset = GEN_INT (spe_offset + reg_size * i);
25208 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25209 mem = gen_rtx_MEM (V2SImode, addr);
25210 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25212 emit_move_insn (reg, mem);
25215 else
25216 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25217 info->spe_gp_save_offset + frame_off,
25218 info->lr_save_offset + frame_off,
25219 reg_mode,
25220 SAVRES_GPR | SAVRES_LR);
25222 else if (!restoring_GPRs_inline)
25224 /* We are jumping to an out-of-line function. */
25225 rtx ptr_reg;
25226 int end_save = info->gp_save_offset + info->gp_size;
25227 bool can_use_exit = end_save == 0;
25228 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25229 int ptr_off;
25231 /* Emit stack reset code if we need it. */
25232 ptr_regno = ptr_regno_for_savres (sel);
25233 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25234 if (can_use_exit)
25235 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25236 else if (end_save + frame_off != 0)
25237 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25238 GEN_INT (end_save + frame_off)));
25239 else if (REGNO (frame_reg_rtx) != ptr_regno)
25240 emit_move_insn (ptr_reg, frame_reg_rtx);
25241 if (REGNO (frame_reg_rtx) == ptr_regno)
25242 frame_off = -end_save;
25244 if (can_use_exit && info->cr_save_p)
25245 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25247 ptr_off = -end_save;
25248 rs6000_emit_savres_rtx (info, ptr_reg,
25249 info->gp_save_offset + ptr_off,
25250 info->lr_save_offset + ptr_off,
25251 reg_mode, sel);
25253 else if (using_load_multiple)
25255 rtvec p;
25256 p = rtvec_alloc (32 - info->first_gp_reg_save);
25257 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25258 RTVEC_ELT (p, i)
25259 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25260 frame_reg_rtx,
25261 info->gp_save_offset + frame_off + reg_size * i);
25262 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25264 else
25266 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25267 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25268 emit_insn (gen_frame_load
25269 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25270 frame_reg_rtx,
25271 info->gp_save_offset + frame_off + reg_size * i));
25274 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25276 /* If the frame pointer was used then we can't delay emitting
25277 a REG_CFA_DEF_CFA note. This must happen on the insn that
25278 restores the frame pointer, r31. We may have already emitted
25279 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25280 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25281 be harmless if emitted. */
25282 if (frame_pointer_needed)
25284 insn = get_last_insn ();
25285 add_reg_note (insn, REG_CFA_DEF_CFA,
25286 plus_constant (Pmode, frame_reg_rtx, frame_off));
25287 RTX_FRAME_RELATED_P (insn) = 1;
25290 /* Set up cfa_restores. We always need these when
25291 shrink-wrapping. If not shrink-wrapping then we only need
25292 the cfa_restore when the stack location is no longer valid.
25293 The cfa_restores must be emitted on or before the insn that
25294 invalidates the stack, and of course must not be emitted
25295 before the insn that actually does the restore. The latter
25296 is why it is a bad idea to emit the cfa_restores as a group
25297 on the last instruction here that actually does a restore:
25298 That insn may be reordered with respect to others doing
25299 restores. */
25300 if (flag_shrink_wrap
25301 && !restoring_GPRs_inline
25302 && info->first_fp_reg_save == 64)
25303 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25305 for (i = info->first_gp_reg_save; i < 32; i++)
25306 if (!restoring_GPRs_inline
25307 || using_load_multiple
25308 || rs6000_reg_live_or_pic_offset_p (i))
25310 rtx reg = gen_rtx_REG (reg_mode, i);
25312 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25316 if (!restoring_GPRs_inline
25317 && info->first_fp_reg_save == 64)
25319 /* We are jumping to an out-of-line function. */
25320 if (cfa_restores)
25321 emit_cfa_restores (cfa_restores);
25322 return;
25325 if (restore_lr && !restoring_GPRs_inline)
25327 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25328 restore_saved_lr (0, exit_func);
25331 /* Restore fpr's if we need to do it without calling a function. */
25332 if (restoring_FPRs_inline)
25333 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25334 if (save_reg_p (info->first_fp_reg_save + i))
25336 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25337 ? DFmode : SFmode),
25338 info->first_fp_reg_save + i);
25339 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25340 info->fp_save_offset + frame_off + 8 * i));
25341 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25342 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25345 /* If we saved cr, restore it here. Just those that were used. */
25346 if (info->cr_save_p)
25347 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25349 /* If this is V.4, unwind the stack pointer after all of the loads
25350 have been done, or set up r11 if we are restoring fp out of line. */
25351 ptr_regno = 1;
25352 if (!restoring_FPRs_inline)
25354 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25355 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25356 ptr_regno = ptr_regno_for_savres (sel);
25359 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25360 if (REGNO (frame_reg_rtx) == ptr_regno)
25361 frame_off = 0;
25363 if (insn && restoring_FPRs_inline)
25365 if (cfa_restores)
25367 REG_NOTES (insn) = cfa_restores;
25368 cfa_restores = NULL_RTX;
25370 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25371 RTX_FRAME_RELATED_P (insn) = 1;
25374 if (crtl->calls_eh_return)
25376 rtx sa = EH_RETURN_STACKADJ_RTX;
25377 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25380 if (!sibcall)
25382 rtvec p;
25383 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25384 if (! restoring_FPRs_inline)
25386 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25387 RTVEC_ELT (p, 0) = ret_rtx;
25389 else
25391 if (cfa_restores)
25393 /* We can't hang the cfa_restores off a simple return,
25394 since the shrink-wrap code sometimes uses an existing
25395 return. This means there might be a path from
25396 pre-prologue code to this return, and dwarf2cfi code
25397 wants the eh_frame unwinder state to be the same on
25398 all paths to any point. So we need to emit the
25399 cfa_restores before the return. For -m64 we really
25400 don't need epilogue cfa_restores at all, except for
25401 this irritating dwarf2cfi with shrink-wrap
25402 requirement; The stack red-zone means eh_frame info
25403 from the prologue telling the unwinder to restore
25404 from the stack is perfectly good right to the end of
25405 the function. */
25406 emit_insn (gen_blockage ());
25407 emit_cfa_restores (cfa_restores);
25408 cfa_restores = NULL_RTX;
25410 p = rtvec_alloc (2);
25411 RTVEC_ELT (p, 0) = simple_return_rtx;
25414 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25415 ? gen_rtx_USE (VOIDmode,
25416 gen_rtx_REG (Pmode, LR_REGNO))
25417 : gen_rtx_CLOBBER (VOIDmode,
25418 gen_rtx_REG (Pmode, LR_REGNO)));
25420 /* If we have to restore more than two FP registers, branch to the
25421 restore function. It will return to our caller. */
25422 if (! restoring_FPRs_inline)
25424 int i;
25425 int reg;
25426 rtx sym;
25428 if (flag_shrink_wrap)
25429 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25431 sym = rs6000_savres_routine_sym (info,
25432 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25433 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25434 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25435 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25437 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25439 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25441 RTVEC_ELT (p, i + 4)
25442 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25443 if (flag_shrink_wrap)
25444 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25445 cfa_restores);
25449 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25452 if (cfa_restores)
25454 if (sibcall)
25455 /* Ensure the cfa_restores are hung off an insn that won't
25456 be reordered above other restores. */
25457 emit_insn (gen_blockage ());
25459 emit_cfa_restores (cfa_restores);
25463 /* Write function epilogue. */
25465 static void
25466 rs6000_output_function_epilogue (FILE *file,
25467 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25469 #if TARGET_MACHO
25470 macho_branch_islands ();
25471 /* Mach-O doesn't support labels at the end of objects, so if
25472 it looks like we might want one, insert a NOP. */
25474 rtx_insn *insn = get_last_insn ();
25475 rtx_insn *deleted_debug_label = NULL;
25476 while (insn
25477 && NOTE_P (insn)
25478 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25480 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25481 notes only, instead set their CODE_LABEL_NUMBER to -1,
25482 otherwise there would be code generation differences
25483 in between -g and -g0. */
25484 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25485 deleted_debug_label = insn;
25486 insn = PREV_INSN (insn);
25488 if (insn
25489 && (LABEL_P (insn)
25490 || (NOTE_P (insn)
25491 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25492 fputs ("\tnop\n", file);
25493 else if (deleted_debug_label)
25494 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25495 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25496 CODE_LABEL_NUMBER (insn) = -1;
25498 #endif
25500 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25501 on its format.
25503 We don't output a traceback table if -finhibit-size-directive was
25504 used. The documentation for -finhibit-size-directive reads
25505 ``don't output a @code{.size} assembler directive, or anything
25506 else that would cause trouble if the function is split in the
25507 middle, and the two halves are placed at locations far apart in
25508 memory.'' The traceback table has this property, since it
25509 includes the offset from the start of the function to the
25510 traceback table itself.
25512 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25513 different traceback table. */
25514 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25515 && ! flag_inhibit_size_directive
25516 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25518 const char *fname = NULL;
25519 const char *language_string = lang_hooks.name;
25520 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25521 int i;
25522 int optional_tbtab;
25523 rs6000_stack_t *info = rs6000_stack_info ();
25525 if (rs6000_traceback == traceback_full)
25526 optional_tbtab = 1;
25527 else if (rs6000_traceback == traceback_part)
25528 optional_tbtab = 0;
25529 else
25530 optional_tbtab = !optimize_size && !TARGET_ELF;
25532 if (optional_tbtab)
25534 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25535 while (*fname == '.') /* V.4 encodes . in the name */
25536 fname++;
25538 /* Need label immediately before tbtab, so we can compute
25539 its offset from the function start. */
25540 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25541 ASM_OUTPUT_LABEL (file, fname);
25544 /* The .tbtab pseudo-op can only be used for the first eight
25545 expressions, since it can't handle the possibly variable
25546 length fields that follow. However, if you omit the optional
25547 fields, the assembler outputs zeros for all optional fields
25548 anyways, giving each variable length field is minimum length
25549 (as defined in sys/debug.h). Thus we can not use the .tbtab
25550 pseudo-op at all. */
25552 /* An all-zero word flags the start of the tbtab, for debuggers
25553 that have to find it by searching forward from the entry
25554 point or from the current pc. */
25555 fputs ("\t.long 0\n", file);
25557 /* Tbtab format type. Use format type 0. */
25558 fputs ("\t.byte 0,", file);
25560 /* Language type. Unfortunately, there does not seem to be any
25561 official way to discover the language being compiled, so we
25562 use language_string.
25563 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25564 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25565 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
25566 either, so for now use 0. */
25567 if (lang_GNU_C ()
25568 || ! strcmp (language_string, "GNU GIMPLE")
25569 || ! strcmp (language_string, "GNU Go")
25570 || ! strcmp (language_string, "libgccjit"))
25571 i = 0;
25572 else if (! strcmp (language_string, "GNU F77")
25573 || lang_GNU_Fortran ())
25574 i = 1;
25575 else if (! strcmp (language_string, "GNU Pascal"))
25576 i = 2;
25577 else if (! strcmp (language_string, "GNU Ada"))
25578 i = 3;
25579 else if (lang_GNU_CXX ()
25580 || ! strcmp (language_string, "GNU Objective-C++"))
25581 i = 9;
25582 else if (! strcmp (language_string, "GNU Java"))
25583 i = 13;
25584 else if (! strcmp (language_string, "GNU Objective-C"))
25585 i = 14;
25586 else
25587 gcc_unreachable ();
25588 fprintf (file, "%d,", i);
25590 /* 8 single bit fields: global linkage (not set for C extern linkage,
25591 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25592 from start of procedure stored in tbtab, internal function, function
25593 has controlled storage, function has no toc, function uses fp,
25594 function logs/aborts fp operations. */
25595 /* Assume that fp operations are used if any fp reg must be saved. */
25596 fprintf (file, "%d,",
25597 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25599 /* 6 bitfields: function is interrupt handler, name present in
25600 proc table, function calls alloca, on condition directives
25601 (controls stack walks, 3 bits), saves condition reg, saves
25602 link reg. */
25603 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25604 set up as a frame pointer, even when there is no alloca call. */
25605 fprintf (file, "%d,",
25606 ((optional_tbtab << 6)
25607 | ((optional_tbtab & frame_pointer_needed) << 5)
25608 | (info->cr_save_p << 1)
25609 | (info->lr_save_p)));
25611 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25612 (6 bits). */
25613 fprintf (file, "%d,",
25614 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25616 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25617 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25619 if (optional_tbtab)
25621 /* Compute the parameter info from the function decl argument
25622 list. */
25623 tree decl;
25624 int next_parm_info_bit = 31;
25626 for (decl = DECL_ARGUMENTS (current_function_decl);
25627 decl; decl = DECL_CHAIN (decl))
25629 rtx parameter = DECL_INCOMING_RTL (decl);
25630 machine_mode mode = GET_MODE (parameter);
25632 if (GET_CODE (parameter) == REG)
25634 if (SCALAR_FLOAT_MODE_P (mode))
25636 int bits;
25638 float_parms++;
25640 switch (mode)
25642 case SFmode:
25643 case SDmode:
25644 bits = 0x2;
25645 break;
25647 case DFmode:
25648 case DDmode:
25649 case TFmode:
25650 case TDmode:
25651 bits = 0x3;
25652 break;
25654 default:
25655 gcc_unreachable ();
25658 /* If only one bit will fit, don't or in this entry. */
25659 if (next_parm_info_bit > 0)
25660 parm_info |= (bits << (next_parm_info_bit - 1));
25661 next_parm_info_bit -= 2;
25663 else
25665 fixed_parms += ((GET_MODE_SIZE (mode)
25666 + (UNITS_PER_WORD - 1))
25667 / UNITS_PER_WORD);
25668 next_parm_info_bit -= 1;
25674 /* Number of fixed point parameters. */
25675 /* This is actually the number of words of fixed point parameters; thus
25676 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25677 fprintf (file, "%d,", fixed_parms);
25679 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25680 all on stack. */
25681 /* This is actually the number of fp registers that hold parameters;
25682 and thus the maximum value is 13. */
25683 /* Set parameters on stack bit if parameters are not in their original
25684 registers, regardless of whether they are on the stack? Xlc
25685 seems to set the bit when not optimizing. */
25686 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25688 if (! optional_tbtab)
25689 return;
25691 /* Optional fields follow. Some are variable length. */
25693 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25694 11 double float. */
25695 /* There is an entry for each parameter in a register, in the order that
25696 they occur in the parameter list. Any intervening arguments on the
25697 stack are ignored. If the list overflows a long (max possible length
25698 34 bits) then completely leave off all elements that don't fit. */
25699 /* Only emit this long if there was at least one parameter. */
25700 if (fixed_parms || float_parms)
25701 fprintf (file, "\t.long %d\n", parm_info);
25703 /* Offset from start of code to tb table. */
25704 fputs ("\t.long ", file);
25705 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25706 RS6000_OUTPUT_BASENAME (file, fname);
25707 putc ('-', file);
25708 rs6000_output_function_entry (file, fname);
25709 putc ('\n', file);
25711 /* Interrupt handler mask. */
25712 /* Omit this long, since we never set the interrupt handler bit
25713 above. */
25715 /* Number of CTL (controlled storage) anchors. */
25716 /* Omit this long, since the has_ctl bit is never set above. */
25718 /* Displacement into stack of each CTL anchor. */
25719 /* Omit this list of longs, because there are no CTL anchors. */
25721 /* Length of function name. */
25722 if (*fname == '*')
25723 ++fname;
25724 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25726 /* Function name. */
25727 assemble_string (fname, strlen (fname));
25729 /* Register for alloca automatic storage; this is always reg 31.
25730 Only emit this if the alloca bit was set above. */
25731 if (frame_pointer_needed)
25732 fputs ("\t.byte 31\n", file);
25734 fputs ("\t.align 2\n", file);
25738 /* A C compound statement that outputs the assembler code for a thunk
25739 function, used to implement C++ virtual function calls with
25740 multiple inheritance. The thunk acts as a wrapper around a virtual
25741 function, adjusting the implicit object parameter before handing
25742 control off to the real function.
25744 First, emit code to add the integer DELTA to the location that
25745 contains the incoming first argument. Assume that this argument
25746 contains a pointer, and is the one used to pass the `this' pointer
25747 in C++. This is the incoming argument *before* the function
25748 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25749 values of all other incoming arguments.
25751 After the addition, emit code to jump to FUNCTION, which is a
25752 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25753 not touch the return address. Hence returning from FUNCTION will
25754 return to whoever called the current `thunk'.
25756 The effect must be as if FUNCTION had been called directly with the
25757 adjusted first argument. This macro is responsible for emitting
25758 all of the code for a thunk function; output_function_prologue()
25759 and output_function_epilogue() are not invoked.
25761 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25762 been extracted from it.) It might possibly be useful on some
25763 targets, but probably not.
25765 If you do not define this macro, the target-independent code in the
25766 C++ frontend will generate a less efficient heavyweight thunk that
25767 calls FUNCTION instead of jumping to it. The generic approach does
25768 not support varargs. */
25770 static void
25771 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25772 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25773 tree function)
25775 rtx this_rtx, funexp;
25776 rtx_insn *insn;
25778 reload_completed = 1;
25779 epilogue_completed = 1;
25781 /* Mark the end of the (empty) prologue. */
25782 emit_note (NOTE_INSN_PROLOGUE_END);
25784 /* Find the "this" pointer. If the function returns a structure,
25785 the structure return pointer is in r3. */
25786 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25787 this_rtx = gen_rtx_REG (Pmode, 4);
25788 else
25789 this_rtx = gen_rtx_REG (Pmode, 3);
25791 /* Apply the constant offset, if required. */
25792 if (delta)
25793 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25795 /* Apply the offset from the vtable, if required. */
25796 if (vcall_offset)
25798 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25799 rtx tmp = gen_rtx_REG (Pmode, 12);
25801 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25802 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25804 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25805 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25807 else
25809 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25811 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25813 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25816 /* Generate a tail call to the target function. */
25817 if (!TREE_USED (function))
25819 assemble_external (function);
25820 TREE_USED (function) = 1;
25822 funexp = XEXP (DECL_RTL (function), 0);
25823 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25825 #if TARGET_MACHO
25826 if (MACHOPIC_INDIRECT)
25827 funexp = machopic_indirect_call_target (funexp);
25828 #endif
25830 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25831 generate sibcall RTL explicitly. */
25832 insn = emit_call_insn (
25833 gen_rtx_PARALLEL (VOIDmode,
25834 gen_rtvec (4,
25835 gen_rtx_CALL (VOIDmode,
25836 funexp, const0_rtx),
25837 gen_rtx_USE (VOIDmode, const0_rtx),
25838 gen_rtx_USE (VOIDmode,
25839 gen_rtx_REG (SImode,
25840 LR_REGNO)),
25841 simple_return_rtx)));
25842 SIBLING_CALL_P (insn) = 1;
25843 emit_barrier ();
25845 /* Ensure we have a global entry point for the thunk. ??? We could
25846 avoid that if the target routine doesn't need a global entry point,
25847 but we do not know whether this is the case at this point. */
25848 if (DEFAULT_ABI == ABI_ELFv2)
25849 cfun->machine->r2_setup_needed = true;
25851 /* Run just enough of rest_of_compilation to get the insns emitted.
25852 There's not really enough bulk here to make other passes such as
25853 instruction scheduling worth while. Note that use_thunk calls
25854 assemble_start_function and assemble_end_function. */
25855 insn = get_insns ();
25856 shorten_branches (insn);
25857 final_start_function (insn, file, 1);
25858 final (insn, file, 1);
25859 final_end_function ();
25861 reload_completed = 0;
25862 epilogue_completed = 0;
25865 /* A quick summary of the various types of 'constant-pool tables'
25866 under PowerPC:
25868 Target Flags Name One table per
25869 AIX (none) AIX TOC object file
25870 AIX -mfull-toc AIX TOC object file
25871 AIX -mminimal-toc AIX minimal TOC translation unit
25872 SVR4/EABI (none) SVR4 SDATA object file
25873 SVR4/EABI -fpic SVR4 pic object file
25874 SVR4/EABI -fPIC SVR4 PIC translation unit
25875 SVR4/EABI -mrelocatable EABI TOC function
25876 SVR4/EABI -maix AIX TOC object file
25877 SVR4/EABI -maix -mminimal-toc
25878 AIX minimal TOC translation unit
25880 Name Reg. Set by entries contains:
25881 made by addrs? fp? sum?
25883 AIX TOC 2 crt0 as Y option option
25884 AIX minimal TOC 30 prolog gcc Y Y option
25885 SVR4 SDATA 13 crt0 gcc N Y N
25886 SVR4 pic 30 prolog ld Y not yet N
25887 SVR4 PIC 30 prolog gcc Y option option
25888 EABI TOC 30 prolog gcc Y option option
25892 /* Hash functions for the hash table. */
25894 static unsigned
25895 rs6000_hash_constant (rtx k)
25897 enum rtx_code code = GET_CODE (k);
25898 machine_mode mode = GET_MODE (k);
25899 unsigned result = (code << 3) ^ mode;
25900 const char *format;
25901 int flen, fidx;
25903 format = GET_RTX_FORMAT (code);
25904 flen = strlen (format);
25905 fidx = 0;
25907 switch (code)
25909 case LABEL_REF:
25910 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25912 case CONST_WIDE_INT:
25914 int i;
25915 flen = CONST_WIDE_INT_NUNITS (k);
25916 for (i = 0; i < flen; i++)
25917 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25918 return result;
25921 case CONST_DOUBLE:
25922 if (mode != VOIDmode)
25923 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25924 flen = 2;
25925 break;
25927 case CODE_LABEL:
25928 fidx = 3;
25929 break;
25931 default:
25932 break;
25935 for (; fidx < flen; fidx++)
25936 switch (format[fidx])
25938 case 's':
25940 unsigned i, len;
25941 const char *str = XSTR (k, fidx);
25942 len = strlen (str);
25943 result = result * 613 + len;
25944 for (i = 0; i < len; i++)
25945 result = result * 613 + (unsigned) str[i];
25946 break;
25948 case 'u':
25949 case 'e':
25950 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25951 break;
25952 case 'i':
25953 case 'n':
25954 result = result * 613 + (unsigned) XINT (k, fidx);
25955 break;
25956 case 'w':
25957 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25958 result = result * 613 + (unsigned) XWINT (k, fidx);
25959 else
25961 size_t i;
25962 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25963 result = result * 613 + (unsigned) (XWINT (k, fidx)
25964 >> CHAR_BIT * i);
25966 break;
25967 case '0':
25968 break;
25969 default:
25970 gcc_unreachable ();
25973 return result;
25976 hashval_t
25977 toc_hasher::hash (toc_hash_struct *thc)
25979 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25982 /* Compare H1 and H2 for equivalence. */
25984 bool
25985 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25987 rtx r1 = h1->key;
25988 rtx r2 = h2->key;
25990 if (h1->key_mode != h2->key_mode)
25991 return 0;
25993 return rtx_equal_p (r1, r2);
25996 /* These are the names given by the C++ front-end to vtables, and
25997 vtable-like objects. Ideally, this logic should not be here;
25998 instead, there should be some programmatic way of inquiring as
25999 to whether or not an object is a vtable. */
26001 #define VTABLE_NAME_P(NAME) \
26002 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
26003 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
26004 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
26005 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
26006 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
26008 #ifdef NO_DOLLAR_IN_LABEL
26009 /* Return a GGC-allocated character string translating dollar signs in
26010 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
26012 const char *
26013 rs6000_xcoff_strip_dollar (const char *name)
26015 char *strip, *p;
26016 const char *q;
26017 size_t len;
26019 q = (const char *) strchr (name, '$');
26021 if (q == 0 || q == name)
26022 return name;
26024 len = strlen (name);
26025 strip = XALLOCAVEC (char, len + 1);
26026 strcpy (strip, name);
26027 p = strip + (q - name);
26028 while (p)
26030 *p = '_';
26031 p = strchr (p + 1, '$');
26034 return ggc_alloc_string (strip, len);
26036 #endif
26038 void
26039 rs6000_output_symbol_ref (FILE *file, rtx x)
26041 /* Currently C++ toc references to vtables can be emitted before it
26042 is decided whether the vtable is public or private. If this is
26043 the case, then the linker will eventually complain that there is
26044 a reference to an unknown section. Thus, for vtables only,
26045 we emit the TOC reference to reference the symbol and not the
26046 section. */
26047 const char *name = XSTR (x, 0);
26049 tree decl = SYMBOL_REF_DECL (x);
26050 if (decl /* sync condition with assemble_external () */
26051 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
26052 && (TREE_CODE (decl) == VAR_DECL
26053 || TREE_CODE (decl) == FUNCTION_DECL)
26054 && name[strlen (name) - 1] != ']')
26056 name = concat (name,
26057 (TREE_CODE (decl) == FUNCTION_DECL
26058 ? "[DS]" : "[UA]"),
26059 NULL);
26060 XSTR (x, 0) = name;
26063 if (VTABLE_NAME_P (name))
26065 RS6000_OUTPUT_BASENAME (file, name);
26067 else
26068 assemble_name (file, name);
26071 /* Output a TOC entry. We derive the entry name from what is being
26072 written. */
26074 void
26075 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
26077 char buf[256];
26078 const char *name = buf;
26079 rtx base = x;
26080 HOST_WIDE_INT offset = 0;
26082 gcc_assert (!TARGET_NO_TOC);
26084 /* When the linker won't eliminate them, don't output duplicate
26085 TOC entries (this happens on AIX if there is any kind of TOC,
26086 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
26087 CODE_LABELs. */
26088 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
26090 struct toc_hash_struct *h;
26092 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
26093 time because GGC is not initialized at that point. */
26094 if (toc_hash_table == NULL)
26095 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
26097 h = ggc_alloc<toc_hash_struct> ();
26098 h->key = x;
26099 h->key_mode = mode;
26100 h->labelno = labelno;
26102 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
26103 if (*found == NULL)
26104 *found = h;
26105 else /* This is indeed a duplicate.
26106 Set this label equal to that label. */
26108 fputs ("\t.set ", file);
26109 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26110 fprintf (file, "%d,", labelno);
26111 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26112 fprintf (file, "%d\n", ((*found)->labelno));
26114 #ifdef HAVE_AS_TLS
26115 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
26116 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
26117 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
26119 fputs ("\t.set ", file);
26120 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26121 fprintf (file, "%d,", labelno);
26122 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26123 fprintf (file, "%d\n", ((*found)->labelno));
26125 #endif
26126 return;
26130 /* If we're going to put a double constant in the TOC, make sure it's
26131 aligned properly when strict alignment is on. */
26132 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
26133 && STRICT_ALIGNMENT
26134 && GET_MODE_BITSIZE (mode) >= 64
26135 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
26136 ASM_OUTPUT_ALIGN (file, 3);
26139 (*targetm.asm_out.internal_label) (file, "LC", labelno);
26141 /* Handle FP constants specially. Note that if we have a minimal
26142 TOC, things we put here aren't actually in the TOC, so we can allow
26143 FP constants. */
26144 if (GET_CODE (x) == CONST_DOUBLE &&
26145 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
26147 REAL_VALUE_TYPE rv;
26148 long k[4];
26150 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26151 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26152 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
26153 else
26154 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26156 if (TARGET_64BIT)
26158 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26159 fputs (DOUBLE_INT_ASM_OP, file);
26160 else
26161 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26162 k[0] & 0xffffffff, k[1] & 0xffffffff,
26163 k[2] & 0xffffffff, k[3] & 0xffffffff);
26164 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26165 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26166 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26167 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26168 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26169 return;
26171 else
26173 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26174 fputs ("\t.long ", file);
26175 else
26176 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26177 k[0] & 0xffffffff, k[1] & 0xffffffff,
26178 k[2] & 0xffffffff, k[3] & 0xffffffff);
26179 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26180 k[0] & 0xffffffff, k[1] & 0xffffffff,
26181 k[2] & 0xffffffff, k[3] & 0xffffffff);
26182 return;
26185 else if (GET_CODE (x) == CONST_DOUBLE &&
26186 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26188 REAL_VALUE_TYPE rv;
26189 long k[2];
26191 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26193 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26194 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26195 else
26196 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26198 if (TARGET_64BIT)
26200 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26201 fputs (DOUBLE_INT_ASM_OP, file);
26202 else
26203 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26204 k[0] & 0xffffffff, k[1] & 0xffffffff);
26205 fprintf (file, "0x%lx%08lx\n",
26206 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26207 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26208 return;
26210 else
26212 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26213 fputs ("\t.long ", file);
26214 else
26215 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26216 k[0] & 0xffffffff, k[1] & 0xffffffff);
26217 fprintf (file, "0x%lx,0x%lx\n",
26218 k[0] & 0xffffffff, k[1] & 0xffffffff);
26219 return;
26222 else if (GET_CODE (x) == CONST_DOUBLE &&
26223 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26225 REAL_VALUE_TYPE rv;
26226 long l;
26228 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26229 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26230 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26231 else
26232 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26234 if (TARGET_64BIT)
26236 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26237 fputs (DOUBLE_INT_ASM_OP, file);
26238 else
26239 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26240 if (WORDS_BIG_ENDIAN)
26241 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26242 else
26243 fprintf (file, "0x%lx\n", l & 0xffffffff);
26244 return;
26246 else
26248 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26249 fputs ("\t.long ", file);
26250 else
26251 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26252 fprintf (file, "0x%lx\n", l & 0xffffffff);
26253 return;
26256 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26258 unsigned HOST_WIDE_INT low;
26259 HOST_WIDE_INT high;
26261 low = INTVAL (x) & 0xffffffff;
26262 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26264 /* TOC entries are always Pmode-sized, so when big-endian
26265 smaller integer constants in the TOC need to be padded.
26266 (This is still a win over putting the constants in
26267 a separate constant pool, because then we'd have
26268 to have both a TOC entry _and_ the actual constant.)
26270 For a 32-bit target, CONST_INT values are loaded and shifted
26271 entirely within `low' and can be stored in one TOC entry. */
26273 /* It would be easy to make this work, but it doesn't now. */
26274 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26276 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26278 low |= high << 32;
26279 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26280 high = (HOST_WIDE_INT) low >> 32;
26281 low &= 0xffffffff;
26284 if (TARGET_64BIT)
26286 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26287 fputs (DOUBLE_INT_ASM_OP, file);
26288 else
26289 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26290 (long) high & 0xffffffff, (long) low & 0xffffffff);
26291 fprintf (file, "0x%lx%08lx\n",
26292 (long) high & 0xffffffff, (long) low & 0xffffffff);
26293 return;
26295 else
26297 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26299 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26300 fputs ("\t.long ", file);
26301 else
26302 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26303 (long) high & 0xffffffff, (long) low & 0xffffffff);
26304 fprintf (file, "0x%lx,0x%lx\n",
26305 (long) high & 0xffffffff, (long) low & 0xffffffff);
26307 else
26309 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26310 fputs ("\t.long ", file);
26311 else
26312 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26313 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26315 return;
26319 if (GET_CODE (x) == CONST)
26321 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26322 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26324 base = XEXP (XEXP (x, 0), 0);
26325 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26328 switch (GET_CODE (base))
26330 case SYMBOL_REF:
26331 name = XSTR (base, 0);
26332 break;
26334 case LABEL_REF:
26335 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26336 CODE_LABEL_NUMBER (XEXP (base, 0)));
26337 break;
26339 case CODE_LABEL:
26340 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26341 break;
26343 default:
26344 gcc_unreachable ();
26347 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26348 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26349 else
26351 fputs ("\t.tc ", file);
26352 RS6000_OUTPUT_BASENAME (file, name);
26354 if (offset < 0)
26355 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26356 else if (offset)
26357 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26359 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26360 after other TOC symbols, reducing overflow of small TOC access
26361 to [TC] symbols. */
26362 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26363 ? "[TE]," : "[TC],", file);
26366 /* Currently C++ toc references to vtables can be emitted before it
26367 is decided whether the vtable is public or private. If this is
26368 the case, then the linker will eventually complain that there is
26369 a TOC reference to an unknown section. Thus, for vtables only,
26370 we emit the TOC reference to reference the symbol and not the
26371 section. */
26372 if (VTABLE_NAME_P (name))
26374 RS6000_OUTPUT_BASENAME (file, name);
26375 if (offset < 0)
26376 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26377 else if (offset > 0)
26378 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26380 else
26381 output_addr_const (file, x);
26383 #if HAVE_AS_TLS
26384 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26385 && SYMBOL_REF_TLS_MODEL (base) != 0)
26387 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26388 fputs ("@le", file);
26389 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26390 fputs ("@ie", file);
26391 /* Use global-dynamic for local-dynamic. */
26392 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26393 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26395 putc ('\n', file);
26396 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26397 fputs ("\t.tc .", file);
26398 RS6000_OUTPUT_BASENAME (file, name);
26399 fputs ("[TC],", file);
26400 output_addr_const (file, x);
26401 fputs ("@m", file);
26404 #endif
26406 putc ('\n', file);
26409 /* Output an assembler pseudo-op to write an ASCII string of N characters
26410 starting at P to FILE.
26412 On the RS/6000, we have to do this using the .byte operation and
26413 write out special characters outside the quoted string.
26414 Also, the assembler is broken; very long strings are truncated,
26415 so we must artificially break them up early. */
26417 void
26418 output_ascii (FILE *file, const char *p, int n)
26420 char c;
26421 int i, count_string;
26422 const char *for_string = "\t.byte \"";
26423 const char *for_decimal = "\t.byte ";
26424 const char *to_close = NULL;
26426 count_string = 0;
26427 for (i = 0; i < n; i++)
26429 c = *p++;
26430 if (c >= ' ' && c < 0177)
26432 if (for_string)
26433 fputs (for_string, file);
26434 putc (c, file);
26436 /* Write two quotes to get one. */
26437 if (c == '"')
26439 putc (c, file);
26440 ++count_string;
26443 for_string = NULL;
26444 for_decimal = "\"\n\t.byte ";
26445 to_close = "\"\n";
26446 ++count_string;
26448 if (count_string >= 512)
26450 fputs (to_close, file);
26452 for_string = "\t.byte \"";
26453 for_decimal = "\t.byte ";
26454 to_close = NULL;
26455 count_string = 0;
26458 else
26460 if (for_decimal)
26461 fputs (for_decimal, file);
26462 fprintf (file, "%d", c);
26464 for_string = "\n\t.byte \"";
26465 for_decimal = ", ";
26466 to_close = "\n";
26467 count_string = 0;
26471 /* Now close the string if we have written one. Then end the line. */
26472 if (to_close)
26473 fputs (to_close, file);
26476 /* Generate a unique section name for FILENAME for a section type
26477 represented by SECTION_DESC. Output goes into BUF.
26479 SECTION_DESC can be any string, as long as it is different for each
26480 possible section type.
26482 We name the section in the same manner as xlc. The name begins with an
26483 underscore followed by the filename (after stripping any leading directory
26484 names) with the last period replaced by the string SECTION_DESC. If
26485 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26486 the name. */
26488 void
26489 rs6000_gen_section_name (char **buf, const char *filename,
26490 const char *section_desc)
26492 const char *q, *after_last_slash, *last_period = 0;
26493 char *p;
26494 int len;
26496 after_last_slash = filename;
26497 for (q = filename; *q; q++)
26499 if (*q == '/')
26500 after_last_slash = q + 1;
26501 else if (*q == '.')
26502 last_period = q;
26505 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26506 *buf = (char *) xmalloc (len);
26508 p = *buf;
26509 *p++ = '_';
26511 for (q = after_last_slash; *q; q++)
26513 if (q == last_period)
26515 strcpy (p, section_desc);
26516 p += strlen (section_desc);
26517 break;
26520 else if (ISALNUM (*q))
26521 *p++ = *q;
26524 if (last_period == 0)
26525 strcpy (p, section_desc);
26526 else
26527 *p = '\0';
26530 /* Emit profile function. */
26532 void
26533 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26535 /* Non-standard profiling for kernels, which just saves LR then calls
26536 _mcount without worrying about arg saves. The idea is to change
26537 the function prologue as little as possible as it isn't easy to
26538 account for arg save/restore code added just for _mcount. */
26539 if (TARGET_PROFILE_KERNEL)
26540 return;
26542 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26544 #ifndef NO_PROFILE_COUNTERS
26545 # define NO_PROFILE_COUNTERS 0
26546 #endif
26547 if (NO_PROFILE_COUNTERS)
26548 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26549 LCT_NORMAL, VOIDmode, 0);
26550 else
26552 char buf[30];
26553 const char *label_name;
26554 rtx fun;
26556 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26557 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26558 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26560 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26561 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26564 else if (DEFAULT_ABI == ABI_DARWIN)
26566 const char *mcount_name = RS6000_MCOUNT;
26567 int caller_addr_regno = LR_REGNO;
26569 /* Be conservative and always set this, at least for now. */
26570 crtl->uses_pic_offset_table = 1;
26572 #if TARGET_MACHO
26573 /* For PIC code, set up a stub and collect the caller's address
26574 from r0, which is where the prologue puts it. */
26575 if (MACHOPIC_INDIRECT
26576 && crtl->uses_pic_offset_table)
26577 caller_addr_regno = 0;
26578 #endif
26579 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26580 LCT_NORMAL, VOIDmode, 1,
26581 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26585 /* Write function profiler code. */
26587 void
26588 output_function_profiler (FILE *file, int labelno)
26590 char buf[100];
26592 switch (DEFAULT_ABI)
26594 default:
26595 gcc_unreachable ();
26597 case ABI_V4:
26598 if (!TARGET_32BIT)
26600 warning (0, "no profiling of 64-bit code for this ABI");
26601 return;
26603 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26604 fprintf (file, "\tmflr %s\n", reg_names[0]);
26605 if (NO_PROFILE_COUNTERS)
26607 asm_fprintf (file, "\tstw %s,4(%s)\n",
26608 reg_names[0], reg_names[1]);
26610 else if (TARGET_SECURE_PLT && flag_pic)
26612 if (TARGET_LINK_STACK)
26614 char name[32];
26615 get_ppc476_thunk_name (name);
26616 asm_fprintf (file, "\tbl %s\n", name);
26618 else
26619 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26620 asm_fprintf (file, "\tstw %s,4(%s)\n",
26621 reg_names[0], reg_names[1]);
26622 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26623 asm_fprintf (file, "\taddis %s,%s,",
26624 reg_names[12], reg_names[12]);
26625 assemble_name (file, buf);
26626 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26627 assemble_name (file, buf);
26628 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26630 else if (flag_pic == 1)
26632 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26633 asm_fprintf (file, "\tstw %s,4(%s)\n",
26634 reg_names[0], reg_names[1]);
26635 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26636 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26637 assemble_name (file, buf);
26638 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26640 else if (flag_pic > 1)
26642 asm_fprintf (file, "\tstw %s,4(%s)\n",
26643 reg_names[0], reg_names[1]);
26644 /* Now, we need to get the address of the label. */
26645 if (TARGET_LINK_STACK)
26647 char name[32];
26648 get_ppc476_thunk_name (name);
26649 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26650 assemble_name (file, buf);
26651 fputs ("-.\n1:", file);
26652 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26653 asm_fprintf (file, "\taddi %s,%s,4\n",
26654 reg_names[11], reg_names[11]);
26656 else
26658 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26659 assemble_name (file, buf);
26660 fputs ("-.\n1:", file);
26661 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26663 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26664 reg_names[0], reg_names[11]);
26665 asm_fprintf (file, "\tadd %s,%s,%s\n",
26666 reg_names[0], reg_names[0], reg_names[11]);
26668 else
26670 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26671 assemble_name (file, buf);
26672 fputs ("@ha\n", file);
26673 asm_fprintf (file, "\tstw %s,4(%s)\n",
26674 reg_names[0], reg_names[1]);
26675 asm_fprintf (file, "\tla %s,", reg_names[0]);
26676 assemble_name (file, buf);
26677 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26680 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26681 fprintf (file, "\tbl %s%s\n",
26682 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26683 break;
26685 case ABI_AIX:
26686 case ABI_ELFv2:
26687 case ABI_DARWIN:
26688 /* Don't do anything, done in output_profile_hook (). */
26689 break;
26695 /* The following variable value is the last issued insn. */
26697 static rtx last_scheduled_insn;
26699 /* The following variable helps to balance issuing of load and
26700 store instructions */
26702 static int load_store_pendulum;
26704 /* Power4 load update and store update instructions are cracked into a
26705 load or store and an integer insn which are executed in the same cycle.
26706 Branches have their own dispatch slot which does not count against the
26707 GCC issue rate, but it changes the program flow so there are no other
26708 instructions to issue in this cycle. */
26710 static int
26711 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26713 last_scheduled_insn = insn;
26714 if (GET_CODE (PATTERN (insn)) == USE
26715 || GET_CODE (PATTERN (insn)) == CLOBBER)
26717 cached_can_issue_more = more;
26718 return cached_can_issue_more;
26721 if (insn_terminates_group_p (insn, current_group))
26723 cached_can_issue_more = 0;
26724 return cached_can_issue_more;
26727 /* If no reservation, but reach here */
26728 if (recog_memoized (insn) < 0)
26729 return more;
26731 if (rs6000_sched_groups)
26733 if (is_microcoded_insn (insn))
26734 cached_can_issue_more = 0;
26735 else if (is_cracked_insn (insn))
26736 cached_can_issue_more = more > 2 ? more - 2 : 0;
26737 else
26738 cached_can_issue_more = more - 1;
26740 return cached_can_issue_more;
26743 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26744 return 0;
26746 cached_can_issue_more = more - 1;
26747 return cached_can_issue_more;
26750 static int
26751 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26753 int r = rs6000_variable_issue_1 (insn, more);
26754 if (verbose)
26755 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26756 return r;
26759 /* Adjust the cost of a scheduling dependency. Return the new cost of
26760 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26762 static int
26763 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26765 enum attr_type attr_type;
26767 if (! recog_memoized (insn))
26768 return 0;
26770 switch (REG_NOTE_KIND (link))
26772 case REG_DEP_TRUE:
26774 /* Data dependency; DEP_INSN writes a register that INSN reads
26775 some cycles later. */
26777 /* Separate a load from a narrower, dependent store. */
26778 if (rs6000_sched_groups
26779 && GET_CODE (PATTERN (insn)) == SET
26780 && GET_CODE (PATTERN (dep_insn)) == SET
26781 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26782 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26783 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26784 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26785 return cost + 14;
26787 attr_type = get_attr_type (insn);
26789 switch (attr_type)
26791 case TYPE_JMPREG:
26792 /* Tell the first scheduling pass about the latency between
26793 a mtctr and bctr (and mtlr and br/blr). The first
26794 scheduling pass will not know about this latency since
26795 the mtctr instruction, which has the latency associated
26796 to it, will be generated by reload. */
26797 return 4;
26798 case TYPE_BRANCH:
26799 /* Leave some extra cycles between a compare and its
26800 dependent branch, to inhibit expensive mispredicts. */
26801 if ((rs6000_cpu_attr == CPU_PPC603
26802 || rs6000_cpu_attr == CPU_PPC604
26803 || rs6000_cpu_attr == CPU_PPC604E
26804 || rs6000_cpu_attr == CPU_PPC620
26805 || rs6000_cpu_attr == CPU_PPC630
26806 || rs6000_cpu_attr == CPU_PPC750
26807 || rs6000_cpu_attr == CPU_PPC7400
26808 || rs6000_cpu_attr == CPU_PPC7450
26809 || rs6000_cpu_attr == CPU_PPCE5500
26810 || rs6000_cpu_attr == CPU_PPCE6500
26811 || rs6000_cpu_attr == CPU_POWER4
26812 || rs6000_cpu_attr == CPU_POWER5
26813 || rs6000_cpu_attr == CPU_POWER7
26814 || rs6000_cpu_attr == CPU_POWER8
26815 || rs6000_cpu_attr == CPU_CELL)
26816 && recog_memoized (dep_insn)
26817 && (INSN_CODE (dep_insn) >= 0))
26819 switch (get_attr_type (dep_insn))
26821 case TYPE_CMP:
26822 case TYPE_FPCOMPARE:
26823 case TYPE_CR_LOGICAL:
26824 case TYPE_DELAYED_CR:
26825 return cost + 2;
26826 case TYPE_EXTS:
26827 case TYPE_MUL:
26828 if (get_attr_dot (dep_insn) == DOT_YES)
26829 return cost + 2;
26830 else
26831 break;
26832 case TYPE_SHIFT:
26833 if (get_attr_dot (dep_insn) == DOT_YES
26834 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26835 return cost + 2;
26836 else
26837 break;
26838 default:
26839 break;
26841 break;
26843 case TYPE_STORE:
26844 case TYPE_FPSTORE:
26845 if ((rs6000_cpu == PROCESSOR_POWER6)
26846 && recog_memoized (dep_insn)
26847 && (INSN_CODE (dep_insn) >= 0))
26850 if (GET_CODE (PATTERN (insn)) != SET)
26851 /* If this happens, we have to extend this to schedule
26852 optimally. Return default for now. */
26853 return cost;
26855 /* Adjust the cost for the case where the value written
26856 by a fixed point operation is used as the address
26857 gen value on a store. */
26858 switch (get_attr_type (dep_insn))
26860 case TYPE_LOAD:
26861 case TYPE_CNTLZ:
26863 if (! store_data_bypass_p (dep_insn, insn))
26864 return get_attr_sign_extend (dep_insn)
26865 == SIGN_EXTEND_YES ? 6 : 4;
26866 break;
26868 case TYPE_SHIFT:
26870 if (! store_data_bypass_p (dep_insn, insn))
26871 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26872 6 : 3;
26873 break;
26875 case TYPE_INTEGER:
26876 case TYPE_ADD:
26877 case TYPE_LOGICAL:
26878 case TYPE_EXTS:
26879 case TYPE_INSERT:
26881 if (! store_data_bypass_p (dep_insn, insn))
26882 return 3;
26883 break;
26885 case TYPE_STORE:
26886 case TYPE_FPLOAD:
26887 case TYPE_FPSTORE:
26889 if (get_attr_update (dep_insn) == UPDATE_YES
26890 && ! store_data_bypass_p (dep_insn, insn))
26891 return 3;
26892 break;
26894 case TYPE_MUL:
26896 if (! store_data_bypass_p (dep_insn, insn))
26897 return 17;
26898 break;
26900 case TYPE_DIV:
26902 if (! store_data_bypass_p (dep_insn, insn))
26903 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26904 break;
26906 default:
26907 break;
26910 break;
26912 case TYPE_LOAD:
26913 if ((rs6000_cpu == PROCESSOR_POWER6)
26914 && recog_memoized (dep_insn)
26915 && (INSN_CODE (dep_insn) >= 0))
26918 /* Adjust the cost for the case where the value written
26919 by a fixed point instruction is used within the address
26920 gen portion of a subsequent load(u)(x) */
26921 switch (get_attr_type (dep_insn))
26923 case TYPE_LOAD:
26924 case TYPE_CNTLZ:
26926 if (set_to_load_agen (dep_insn, insn))
26927 return get_attr_sign_extend (dep_insn)
26928 == SIGN_EXTEND_YES ? 6 : 4;
26929 break;
26931 case TYPE_SHIFT:
26933 if (set_to_load_agen (dep_insn, insn))
26934 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26935 6 : 3;
26936 break;
26938 case TYPE_INTEGER:
26939 case TYPE_ADD:
26940 case TYPE_LOGICAL:
26941 case TYPE_EXTS:
26942 case TYPE_INSERT:
26944 if (set_to_load_agen (dep_insn, insn))
26945 return 3;
26946 break;
26948 case TYPE_STORE:
26949 case TYPE_FPLOAD:
26950 case TYPE_FPSTORE:
26952 if (get_attr_update (dep_insn) == UPDATE_YES
26953 && set_to_load_agen (dep_insn, insn))
26954 return 3;
26955 break;
26957 case TYPE_MUL:
26959 if (set_to_load_agen (dep_insn, insn))
26960 return 17;
26961 break;
26963 case TYPE_DIV:
26965 if (set_to_load_agen (dep_insn, insn))
26966 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26967 break;
26969 default:
26970 break;
26973 break;
26975 case TYPE_FPLOAD:
26976 if ((rs6000_cpu == PROCESSOR_POWER6)
26977 && get_attr_update (insn) == UPDATE_NO
26978 && recog_memoized (dep_insn)
26979 && (INSN_CODE (dep_insn) >= 0)
26980 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26981 return 2;
26983 default:
26984 break;
26987 /* Fall out to return default cost. */
26989 break;
26991 case REG_DEP_OUTPUT:
26992 /* Output dependency; DEP_INSN writes a register that INSN writes some
26993 cycles later. */
26994 if ((rs6000_cpu == PROCESSOR_POWER6)
26995 && recog_memoized (dep_insn)
26996 && (INSN_CODE (dep_insn) >= 0))
26998 attr_type = get_attr_type (insn);
27000 switch (attr_type)
27002 case TYPE_FP:
27003 if (get_attr_type (dep_insn) == TYPE_FP)
27004 return 1;
27005 break;
27006 case TYPE_FPLOAD:
27007 if (get_attr_update (insn) == UPDATE_NO
27008 && get_attr_type (dep_insn) == TYPE_MFFGPR)
27009 return 2;
27010 break;
27011 default:
27012 break;
27015 case REG_DEP_ANTI:
27016 /* Anti dependency; DEP_INSN reads a register that INSN writes some
27017 cycles later. */
27018 return 0;
27020 default:
27021 gcc_unreachable ();
27024 return cost;
27027 /* Debug version of rs6000_adjust_cost. */
27029 static int
27030 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
27031 int cost)
27033 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
27035 if (ret != cost)
27037 const char *dep;
27039 switch (REG_NOTE_KIND (link))
27041 default: dep = "unknown depencency"; break;
27042 case REG_DEP_TRUE: dep = "data dependency"; break;
27043 case REG_DEP_OUTPUT: dep = "output dependency"; break;
27044 case REG_DEP_ANTI: dep = "anti depencency"; break;
27047 fprintf (stderr,
27048 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
27049 "%s, insn:\n", ret, cost, dep);
27051 debug_rtx (insn);
27054 return ret;
27057 /* The function returns a true if INSN is microcoded.
27058 Return false otherwise. */
27060 static bool
27061 is_microcoded_insn (rtx_insn *insn)
27063 if (!insn || !NONDEBUG_INSN_P (insn)
27064 || GET_CODE (PATTERN (insn)) == USE
27065 || GET_CODE (PATTERN (insn)) == CLOBBER)
27066 return false;
27068 if (rs6000_cpu_attr == CPU_CELL)
27069 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
27071 if (rs6000_sched_groups
27072 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27074 enum attr_type type = get_attr_type (insn);
27075 if ((type == TYPE_LOAD
27076 && get_attr_update (insn) == UPDATE_YES
27077 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
27078 || ((type == TYPE_LOAD || type == TYPE_STORE)
27079 && get_attr_update (insn) == UPDATE_YES
27080 && get_attr_indexed (insn) == INDEXED_YES)
27081 || type == TYPE_MFCR)
27082 return true;
27085 return false;
27088 /* The function returns true if INSN is cracked into 2 instructions
27089 by the processor (and therefore occupies 2 issue slots). */
27091 static bool
27092 is_cracked_insn (rtx_insn *insn)
27094 if (!insn || !NONDEBUG_INSN_P (insn)
27095 || GET_CODE (PATTERN (insn)) == USE
27096 || GET_CODE (PATTERN (insn)) == CLOBBER)
27097 return false;
27099 if (rs6000_sched_groups
27100 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27102 enum attr_type type = get_attr_type (insn);
27103 if ((type == TYPE_LOAD
27104 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27105 && get_attr_update (insn) == UPDATE_NO)
27106 || (type == TYPE_LOAD
27107 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
27108 && get_attr_update (insn) == UPDATE_YES
27109 && get_attr_indexed (insn) == INDEXED_NO)
27110 || (type == TYPE_STORE
27111 && get_attr_update (insn) == UPDATE_YES
27112 && get_attr_indexed (insn) == INDEXED_NO)
27113 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
27114 && get_attr_update (insn) == UPDATE_YES)
27115 || type == TYPE_DELAYED_CR
27116 || (type == TYPE_EXTS
27117 && get_attr_dot (insn) == DOT_YES)
27118 || (type == TYPE_SHIFT
27119 && get_attr_dot (insn) == DOT_YES
27120 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
27121 || (type == TYPE_MUL
27122 && get_attr_dot (insn) == DOT_YES)
27123 || type == TYPE_DIV
27124 || (type == TYPE_INSERT
27125 && get_attr_size (insn) == SIZE_32))
27126 return true;
27129 return false;
27132 /* The function returns true if INSN can be issued only from
27133 the branch slot. */
27135 static bool
27136 is_branch_slot_insn (rtx_insn *insn)
27138 if (!insn || !NONDEBUG_INSN_P (insn)
27139 || GET_CODE (PATTERN (insn)) == USE
27140 || GET_CODE (PATTERN (insn)) == CLOBBER)
27141 return false;
27143 if (rs6000_sched_groups)
27145 enum attr_type type = get_attr_type (insn);
27146 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
27147 return true;
27148 return false;
27151 return false;
27154 /* The function returns true if out_inst sets a value that is
27155 used in the address generation computation of in_insn */
27156 static bool
27157 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27159 rtx out_set, in_set;
27161 /* For performance reasons, only handle the simple case where
27162 both loads are a single_set. */
27163 out_set = single_set (out_insn);
27164 if (out_set)
27166 in_set = single_set (in_insn);
27167 if (in_set)
27168 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27171 return false;
27174 /* Try to determine base/offset/size parts of the given MEM.
27175 Return true if successful, false if all the values couldn't
27176 be determined.
27178 This function only looks for REG or REG+CONST address forms.
27179 REG+REG address form will return false. */
27181 static bool
27182 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27183 HOST_WIDE_INT *size)
27185 rtx addr_rtx;
27186 if MEM_SIZE_KNOWN_P (mem)
27187 *size = MEM_SIZE (mem);
27188 else
27189 return false;
27191 addr_rtx = (XEXP (mem, 0));
27192 if (GET_CODE (addr_rtx) == PRE_MODIFY)
27193 addr_rtx = XEXP (addr_rtx, 1);
27195 *offset = 0;
27196 while (GET_CODE (addr_rtx) == PLUS
27197 && CONST_INT_P (XEXP (addr_rtx, 1)))
27199 *offset += INTVAL (XEXP (addr_rtx, 1));
27200 addr_rtx = XEXP (addr_rtx, 0);
27202 if (!REG_P (addr_rtx))
27203 return false;
27205 *base = addr_rtx;
27206 return true;
27209 /* The function returns true if the target storage location of
27210 mem1 is adjacent to the target storage location of mem2 */
27211 /* Return 1 if memory locations are adjacent. */
27213 static bool
27214 adjacent_mem_locations (rtx mem1, rtx mem2)
27216 rtx reg1, reg2;
27217 HOST_WIDE_INT off1, size1, off2, size2;
27219 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27220 && get_memref_parts (mem2, &reg2, &off2, &size2))
27221 return ((REGNO (reg1) == REGNO (reg2))
27222 && ((off1 + size1 == off2)
27223 || (off2 + size2 == off1)));
27225 return false;
27228 /* This function returns true if it can be determined that the two MEM
27229 locations overlap by at least 1 byte based on base reg/offset/size. */
27231 static bool
27232 mem_locations_overlap (rtx mem1, rtx mem2)
27234 rtx reg1, reg2;
27235 HOST_WIDE_INT off1, size1, off2, size2;
27237 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27238 && get_memref_parts (mem2, &reg2, &off2, &size2))
27239 return ((REGNO (reg1) == REGNO (reg2))
27240 && (((off1 <= off2) && (off1 + size1 > off2))
27241 || ((off2 <= off1) && (off2 + size2 > off1))));
27243 return false;
27246 /* A C statement (sans semicolon) to update the integer scheduling
27247 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27248 INSN earlier, reduce the priority to execute INSN later. Do not
27249 define this macro if you do not need to adjust the scheduling
27250 priorities of insns. */
27252 static int
27253 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27255 rtx load_mem, str_mem;
27256 /* On machines (like the 750) which have asymmetric integer units,
27257 where one integer unit can do multiply and divides and the other
27258 can't, reduce the priority of multiply/divide so it is scheduled
27259 before other integer operations. */
27261 #if 0
27262 if (! INSN_P (insn))
27263 return priority;
27265 if (GET_CODE (PATTERN (insn)) == USE)
27266 return priority;
27268 switch (rs6000_cpu_attr) {
27269 case CPU_PPC750:
27270 switch (get_attr_type (insn))
27272 default:
27273 break;
27275 case TYPE_MUL:
27276 case TYPE_DIV:
27277 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27278 priority, priority);
27279 if (priority >= 0 && priority < 0x01000000)
27280 priority >>= 3;
27281 break;
27284 #endif
27286 if (insn_must_be_first_in_group (insn)
27287 && reload_completed
27288 && current_sched_info->sched_max_insns_priority
27289 && rs6000_sched_restricted_insns_priority)
27292 /* Prioritize insns that can be dispatched only in the first
27293 dispatch slot. */
27294 if (rs6000_sched_restricted_insns_priority == 1)
27295 /* Attach highest priority to insn. This means that in
27296 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27297 precede 'priority' (critical path) considerations. */
27298 return current_sched_info->sched_max_insns_priority;
27299 else if (rs6000_sched_restricted_insns_priority == 2)
27300 /* Increase priority of insn by a minimal amount. This means that in
27301 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27302 considerations precede dispatch-slot restriction considerations. */
27303 return (priority + 1);
27306 if (rs6000_cpu == PROCESSOR_POWER6
27307 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27308 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27309 /* Attach highest priority to insn if the scheduler has just issued two
27310 stores and this instruction is a load, or two loads and this instruction
27311 is a store. Power6 wants loads and stores scheduled alternately
27312 when possible */
27313 return current_sched_info->sched_max_insns_priority;
27315 return priority;
27318 /* Return true if the instruction is nonpipelined on the Cell. */
27319 static bool
27320 is_nonpipeline_insn (rtx_insn *insn)
27322 enum attr_type type;
27323 if (!insn || !NONDEBUG_INSN_P (insn)
27324 || GET_CODE (PATTERN (insn)) == USE
27325 || GET_CODE (PATTERN (insn)) == CLOBBER)
27326 return false;
27328 type = get_attr_type (insn);
27329 if (type == TYPE_MUL
27330 || type == TYPE_DIV
27331 || type == TYPE_SDIV
27332 || type == TYPE_DDIV
27333 || type == TYPE_SSQRT
27334 || type == TYPE_DSQRT
27335 || type == TYPE_MFCR
27336 || type == TYPE_MFCRF
27337 || type == TYPE_MFJMPR)
27339 return true;
27341 return false;
27345 /* Return how many instructions the machine can issue per cycle. */
27347 static int
27348 rs6000_issue_rate (void)
27350 /* Unless scheduling for register pressure, use issue rate of 1 for
27351 first scheduling pass to decrease degradation. */
27352 if (!reload_completed && !flag_sched_pressure)
27353 return 1;
27355 switch (rs6000_cpu_attr) {
27356 case CPU_RS64A:
27357 case CPU_PPC601: /* ? */
27358 case CPU_PPC7450:
27359 return 3;
27360 case CPU_PPC440:
27361 case CPU_PPC603:
27362 case CPU_PPC750:
27363 case CPU_PPC7400:
27364 case CPU_PPC8540:
27365 case CPU_PPC8548:
27366 case CPU_CELL:
27367 case CPU_PPCE300C2:
27368 case CPU_PPCE300C3:
27369 case CPU_PPCE500MC:
27370 case CPU_PPCE500MC64:
27371 case CPU_PPCE5500:
27372 case CPU_PPCE6500:
27373 case CPU_TITAN:
27374 return 2;
27375 case CPU_PPC476:
27376 case CPU_PPC604:
27377 case CPU_PPC604E:
27378 case CPU_PPC620:
27379 case CPU_PPC630:
27380 return 4;
27381 case CPU_POWER4:
27382 case CPU_POWER5:
27383 case CPU_POWER6:
27384 case CPU_POWER7:
27385 return 5;
27386 case CPU_POWER8:
27387 return 7;
27388 default:
27389 return 1;
27393 /* Return how many instructions to look ahead for better insn
27394 scheduling. */
27396 static int
27397 rs6000_use_sched_lookahead (void)
27399 switch (rs6000_cpu_attr)
27401 case CPU_PPC8540:
27402 case CPU_PPC8548:
27403 return 4;
27405 case CPU_CELL:
27406 return (reload_completed ? 8 : 0);
27408 default:
27409 return 0;
27413 /* We are choosing insn from the ready queue. Return zero if INSN can be
27414 chosen. */
27415 static int
27416 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27418 if (ready_index == 0)
27419 return 0;
27421 if (rs6000_cpu_attr != CPU_CELL)
27422 return 0;
27424 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27426 if (!reload_completed
27427 || is_nonpipeline_insn (insn)
27428 || is_microcoded_insn (insn))
27429 return 1;
27431 return 0;
27434 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27435 and return true. */
27437 static bool
27438 find_mem_ref (rtx pat, rtx *mem_ref)
27440 const char * fmt;
27441 int i, j;
27443 /* stack_tie does not produce any real memory traffic. */
27444 if (tie_operand (pat, VOIDmode))
27445 return false;
27447 if (GET_CODE (pat) == MEM)
27449 *mem_ref = pat;
27450 return true;
27453 /* Recursively process the pattern. */
27454 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27456 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27458 if (fmt[i] == 'e')
27460 if (find_mem_ref (XEXP (pat, i), mem_ref))
27461 return true;
27463 else if (fmt[i] == 'E')
27464 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27466 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27467 return true;
27471 return false;
27474 /* Determine if PAT is a PATTERN of a load insn. */
27476 static bool
27477 is_load_insn1 (rtx pat, rtx *load_mem)
27479 if (!pat || pat == NULL_RTX)
27480 return false;
27482 if (GET_CODE (pat) == SET)
27483 return find_mem_ref (SET_SRC (pat), load_mem);
27485 if (GET_CODE (pat) == PARALLEL)
27487 int i;
27489 for (i = 0; i < XVECLEN (pat, 0); i++)
27490 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27491 return true;
27494 return false;
27497 /* Determine if INSN loads from memory. */
27499 static bool
27500 is_load_insn (rtx insn, rtx *load_mem)
27502 if (!insn || !INSN_P (insn))
27503 return false;
27505 if (CALL_P (insn))
27506 return false;
27508 return is_load_insn1 (PATTERN (insn), load_mem);
27511 /* Determine if PAT is a PATTERN of a store insn. */
27513 static bool
27514 is_store_insn1 (rtx pat, rtx *str_mem)
27516 if (!pat || pat == NULL_RTX)
27517 return false;
27519 if (GET_CODE (pat) == SET)
27520 return find_mem_ref (SET_DEST (pat), str_mem);
27522 if (GET_CODE (pat) == PARALLEL)
27524 int i;
27526 for (i = 0; i < XVECLEN (pat, 0); i++)
27527 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27528 return true;
27531 return false;
27534 /* Determine if INSN stores to memory. */
27536 static bool
27537 is_store_insn (rtx insn, rtx *str_mem)
27539 if (!insn || !INSN_P (insn))
27540 return false;
27542 return is_store_insn1 (PATTERN (insn), str_mem);
27545 /* Returns whether the dependence between INSN and NEXT is considered
27546 costly by the given target. */
27548 static bool
27549 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27551 rtx insn;
27552 rtx next;
27553 rtx load_mem, str_mem;
27555 /* If the flag is not enabled - no dependence is considered costly;
27556 allow all dependent insns in the same group.
27557 This is the most aggressive option. */
27558 if (rs6000_sched_costly_dep == no_dep_costly)
27559 return false;
27561 /* If the flag is set to 1 - a dependence is always considered costly;
27562 do not allow dependent instructions in the same group.
27563 This is the most conservative option. */
27564 if (rs6000_sched_costly_dep == all_deps_costly)
27565 return true;
27567 insn = DEP_PRO (dep);
27568 next = DEP_CON (dep);
27570 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27571 && is_load_insn (next, &load_mem)
27572 && is_store_insn (insn, &str_mem))
27573 /* Prevent load after store in the same group. */
27574 return true;
27576 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27577 && is_load_insn (next, &load_mem)
27578 && is_store_insn (insn, &str_mem)
27579 && DEP_TYPE (dep) == REG_DEP_TRUE
27580 && mem_locations_overlap(str_mem, load_mem))
27581 /* Prevent load after store in the same group if it is a true
27582 dependence. */
27583 return true;
27585 /* The flag is set to X; dependences with latency >= X are considered costly,
27586 and will not be scheduled in the same group. */
27587 if (rs6000_sched_costly_dep <= max_dep_latency
27588 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27589 return true;
27591 return false;
27594 /* Return the next insn after INSN that is found before TAIL is reached,
27595 skipping any "non-active" insns - insns that will not actually occupy
27596 an issue slot. Return NULL_RTX if such an insn is not found. */
27598 static rtx_insn *
27599 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27601 if (insn == NULL_RTX || insn == tail)
27602 return NULL;
27604 while (1)
27606 insn = NEXT_INSN (insn);
27607 if (insn == NULL_RTX || insn == tail)
27608 return NULL;
27610 if (CALL_P (insn)
27611 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27612 || (NONJUMP_INSN_P (insn)
27613 && GET_CODE (PATTERN (insn)) != USE
27614 && GET_CODE (PATTERN (insn)) != CLOBBER
27615 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27616 break;
27618 return insn;
27621 /* We are about to begin issuing insns for this clock cycle. */
27623 static int
27624 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27625 rtx_insn **ready ATTRIBUTE_UNUSED,
27626 int *pn_ready ATTRIBUTE_UNUSED,
27627 int clock_var ATTRIBUTE_UNUSED)
27629 int n_ready = *pn_ready;
27631 if (sched_verbose)
27632 fprintf (dump, "// rs6000_sched_reorder :\n");
27634 /* Reorder the ready list, if the second to last ready insn
27635 is a nonepipeline insn. */
27636 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27638 if (is_nonpipeline_insn (ready[n_ready - 1])
27639 && (recog_memoized (ready[n_ready - 2]) > 0))
27640 /* Simply swap first two insns. */
27641 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
27644 if (rs6000_cpu == PROCESSOR_POWER6)
27645 load_store_pendulum = 0;
27647 return rs6000_issue_rate ();
27650 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27652 static int
27653 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27654 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27656 if (sched_verbose)
27657 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27659 /* For Power6, we need to handle some special cases to try and keep the
27660 store queue from overflowing and triggering expensive flushes.
27662 This code monitors how load and store instructions are being issued
27663 and skews the ready list one way or the other to increase the likelihood
27664 that a desired instruction is issued at the proper time.
27666 A couple of things are done. First, we maintain a "load_store_pendulum"
27667 to track the current state of load/store issue.
27669 - If the pendulum is at zero, then no loads or stores have been
27670 issued in the current cycle so we do nothing.
27672 - If the pendulum is 1, then a single load has been issued in this
27673 cycle and we attempt to locate another load in the ready list to
27674 issue with it.
27676 - If the pendulum is -2, then two stores have already been
27677 issued in this cycle, so we increase the priority of the first load
27678 in the ready list to increase it's likelihood of being chosen first
27679 in the next cycle.
27681 - If the pendulum is -1, then a single store has been issued in this
27682 cycle and we attempt to locate another store in the ready list to
27683 issue with it, preferring a store to an adjacent memory location to
27684 facilitate store pairing in the store queue.
27686 - If the pendulum is 2, then two loads have already been
27687 issued in this cycle, so we increase the priority of the first store
27688 in the ready list to increase it's likelihood of being chosen first
27689 in the next cycle.
27691 - If the pendulum < -2 or > 2, then do nothing.
27693 Note: This code covers the most common scenarios. There exist non
27694 load/store instructions which make use of the LSU and which
27695 would need to be accounted for to strictly model the behavior
27696 of the machine. Those instructions are currently unaccounted
27697 for to help minimize compile time overhead of this code.
27699 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27701 int pos;
27702 int i;
27703 rtx_insn *tmp;
27704 rtx load_mem, str_mem;
27706 if (is_store_insn (last_scheduled_insn, &str_mem))
27707 /* Issuing a store, swing the load_store_pendulum to the left */
27708 load_store_pendulum--;
27709 else if (is_load_insn (last_scheduled_insn, &load_mem))
27710 /* Issuing a load, swing the load_store_pendulum to the right */
27711 load_store_pendulum++;
27712 else
27713 return cached_can_issue_more;
27715 /* If the pendulum is balanced, or there is only one instruction on
27716 the ready list, then all is well, so return. */
27717 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27718 return cached_can_issue_more;
27720 if (load_store_pendulum == 1)
27722 /* A load has been issued in this cycle. Scan the ready list
27723 for another load to issue with it */
27724 pos = *pn_ready-1;
27726 while (pos >= 0)
27728 if (is_load_insn (ready[pos], &load_mem))
27730 /* Found a load. Move it to the head of the ready list,
27731 and adjust it's priority so that it is more likely to
27732 stay there */
27733 tmp = ready[pos];
27734 for (i=pos; i<*pn_ready-1; i++)
27735 ready[i] = ready[i + 1];
27736 ready[*pn_ready-1] = tmp;
27738 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27739 INSN_PRIORITY (tmp)++;
27740 break;
27742 pos--;
27745 else if (load_store_pendulum == -2)
27747 /* Two stores have been issued in this cycle. Increase the
27748 priority of the first load in the ready list to favor it for
27749 issuing in the next cycle. */
27750 pos = *pn_ready-1;
27752 while (pos >= 0)
27754 if (is_load_insn (ready[pos], &load_mem)
27755 && !sel_sched_p ()
27756 && INSN_PRIORITY_KNOWN (ready[pos]))
27758 INSN_PRIORITY (ready[pos])++;
27760 /* Adjust the pendulum to account for the fact that a load
27761 was found and increased in priority. This is to prevent
27762 increasing the priority of multiple loads */
27763 load_store_pendulum--;
27765 break;
27767 pos--;
27770 else if (load_store_pendulum == -1)
27772 /* A store has been issued in this cycle. Scan the ready list for
27773 another store to issue with it, preferring a store to an adjacent
27774 memory location */
27775 int first_store_pos = -1;
27777 pos = *pn_ready-1;
27779 while (pos >= 0)
27781 if (is_store_insn (ready[pos], &str_mem))
27783 rtx str_mem2;
27784 /* Maintain the index of the first store found on the
27785 list */
27786 if (first_store_pos == -1)
27787 first_store_pos = pos;
27789 if (is_store_insn (last_scheduled_insn, &str_mem2)
27790 && adjacent_mem_locations (str_mem, str_mem2))
27792 /* Found an adjacent store. Move it to the head of the
27793 ready list, and adjust it's priority so that it is
27794 more likely to stay there */
27795 tmp = ready[pos];
27796 for (i=pos; i<*pn_ready-1; i++)
27797 ready[i] = ready[i + 1];
27798 ready[*pn_ready-1] = tmp;
27800 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27801 INSN_PRIORITY (tmp)++;
27803 first_store_pos = -1;
27805 break;
27808 pos--;
27811 if (first_store_pos >= 0)
27813 /* An adjacent store wasn't found, but a non-adjacent store was,
27814 so move the non-adjacent store to the front of the ready
27815 list, and adjust its priority so that it is more likely to
27816 stay there. */
27817 tmp = ready[first_store_pos];
27818 for (i=first_store_pos; i<*pn_ready-1; i++)
27819 ready[i] = ready[i + 1];
27820 ready[*pn_ready-1] = tmp;
27821 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27822 INSN_PRIORITY (tmp)++;
27825 else if (load_store_pendulum == 2)
27827 /* Two loads have been issued in this cycle. Increase the priority
27828 of the first store in the ready list to favor it for issuing in
27829 the next cycle. */
27830 pos = *pn_ready-1;
27832 while (pos >= 0)
27834 if (is_store_insn (ready[pos], &str_mem)
27835 && !sel_sched_p ()
27836 && INSN_PRIORITY_KNOWN (ready[pos]))
27838 INSN_PRIORITY (ready[pos])++;
27840 /* Adjust the pendulum to account for the fact that a store
27841 was found and increased in priority. This is to prevent
27842 increasing the priority of multiple stores */
27843 load_store_pendulum++;
27845 break;
27847 pos--;
27852 return cached_can_issue_more;
27855 /* Return whether the presence of INSN causes a dispatch group termination
27856 of group WHICH_GROUP.
27858 If WHICH_GROUP == current_group, this function will return true if INSN
27859 causes the termination of the current group (i.e, the dispatch group to
27860 which INSN belongs). This means that INSN will be the last insn in the
27861 group it belongs to.
27863 If WHICH_GROUP == previous_group, this function will return true if INSN
27864 causes the termination of the previous group (i.e, the dispatch group that
27865 precedes the group to which INSN belongs). This means that INSN will be
27866 the first insn in the group it belongs to). */
27868 static bool
27869 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27871 bool first, last;
27873 if (! insn)
27874 return false;
27876 first = insn_must_be_first_in_group (insn);
27877 last = insn_must_be_last_in_group (insn);
27879 if (first && last)
27880 return true;
27882 if (which_group == current_group)
27883 return last;
27884 else if (which_group == previous_group)
27885 return first;
27887 return false;
27891 static bool
27892 insn_must_be_first_in_group (rtx_insn *insn)
27894 enum attr_type type;
27896 if (!insn
27897 || NOTE_P (insn)
27898 || DEBUG_INSN_P (insn)
27899 || GET_CODE (PATTERN (insn)) == USE
27900 || GET_CODE (PATTERN (insn)) == CLOBBER)
27901 return false;
27903 switch (rs6000_cpu)
27905 case PROCESSOR_POWER5:
27906 if (is_cracked_insn (insn))
27907 return true;
27908 case PROCESSOR_POWER4:
27909 if (is_microcoded_insn (insn))
27910 return true;
27912 if (!rs6000_sched_groups)
27913 return false;
27915 type = get_attr_type (insn);
27917 switch (type)
27919 case TYPE_MFCR:
27920 case TYPE_MFCRF:
27921 case TYPE_MTCR:
27922 case TYPE_DELAYED_CR:
27923 case TYPE_CR_LOGICAL:
27924 case TYPE_MTJMPR:
27925 case TYPE_MFJMPR:
27926 case TYPE_DIV:
27927 case TYPE_LOAD_L:
27928 case TYPE_STORE_C:
27929 case TYPE_ISYNC:
27930 case TYPE_SYNC:
27931 return true;
27932 default:
27933 break;
27935 break;
27936 case PROCESSOR_POWER6:
27937 type = get_attr_type (insn);
27939 switch (type)
27941 case TYPE_EXTS:
27942 case TYPE_CNTLZ:
27943 case TYPE_TRAP:
27944 case TYPE_MUL:
27945 case TYPE_INSERT:
27946 case TYPE_FPCOMPARE:
27947 case TYPE_MFCR:
27948 case TYPE_MTCR:
27949 case TYPE_MFJMPR:
27950 case TYPE_MTJMPR:
27951 case TYPE_ISYNC:
27952 case TYPE_SYNC:
27953 case TYPE_LOAD_L:
27954 case TYPE_STORE_C:
27955 return true;
27956 case TYPE_SHIFT:
27957 if (get_attr_dot (insn) == DOT_NO
27958 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27959 return true;
27960 else
27961 break;
27962 case TYPE_DIV:
27963 if (get_attr_size (insn) == SIZE_32)
27964 return true;
27965 else
27966 break;
27967 case TYPE_LOAD:
27968 case TYPE_STORE:
27969 case TYPE_FPLOAD:
27970 case TYPE_FPSTORE:
27971 if (get_attr_update (insn) == UPDATE_YES)
27972 return true;
27973 else
27974 break;
27975 default:
27976 break;
27978 break;
27979 case PROCESSOR_POWER7:
27980 type = get_attr_type (insn);
27982 switch (type)
27984 case TYPE_CR_LOGICAL:
27985 case TYPE_MFCR:
27986 case TYPE_MFCRF:
27987 case TYPE_MTCR:
27988 case TYPE_DIV:
27989 case TYPE_ISYNC:
27990 case TYPE_LOAD_L:
27991 case TYPE_STORE_C:
27992 case TYPE_MFJMPR:
27993 case TYPE_MTJMPR:
27994 return true;
27995 case TYPE_MUL:
27996 case TYPE_SHIFT:
27997 case TYPE_EXTS:
27998 if (get_attr_dot (insn) == DOT_YES)
27999 return true;
28000 else
28001 break;
28002 case TYPE_LOAD:
28003 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28004 || get_attr_update (insn) == UPDATE_YES)
28005 return true;
28006 else
28007 break;
28008 case TYPE_STORE:
28009 case TYPE_FPLOAD:
28010 case TYPE_FPSTORE:
28011 if (get_attr_update (insn) == UPDATE_YES)
28012 return true;
28013 else
28014 break;
28015 default:
28016 break;
28018 break;
28019 case PROCESSOR_POWER8:
28020 type = get_attr_type (insn);
28022 switch (type)
28024 case TYPE_CR_LOGICAL:
28025 case TYPE_DELAYED_CR:
28026 case TYPE_MFCR:
28027 case TYPE_MFCRF:
28028 case TYPE_MTCR:
28029 case TYPE_SYNC:
28030 case TYPE_ISYNC:
28031 case TYPE_LOAD_L:
28032 case TYPE_STORE_C:
28033 case TYPE_VECSTORE:
28034 case TYPE_MFJMPR:
28035 case TYPE_MTJMPR:
28036 return true;
28037 case TYPE_SHIFT:
28038 case TYPE_EXTS:
28039 case TYPE_MUL:
28040 if (get_attr_dot (insn) == DOT_YES)
28041 return true;
28042 else
28043 break;
28044 case TYPE_LOAD:
28045 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28046 || get_attr_update (insn) == UPDATE_YES)
28047 return true;
28048 else
28049 break;
28050 case TYPE_STORE:
28051 if (get_attr_update (insn) == UPDATE_YES
28052 && get_attr_indexed (insn) == INDEXED_YES)
28053 return true;
28054 else
28055 break;
28056 default:
28057 break;
28059 break;
28060 default:
28061 break;
28064 return false;
28067 static bool
28068 insn_must_be_last_in_group (rtx_insn *insn)
28070 enum attr_type type;
28072 if (!insn
28073 || NOTE_P (insn)
28074 || DEBUG_INSN_P (insn)
28075 || GET_CODE (PATTERN (insn)) == USE
28076 || GET_CODE (PATTERN (insn)) == CLOBBER)
28077 return false;
28079 switch (rs6000_cpu) {
28080 case PROCESSOR_POWER4:
28081 case PROCESSOR_POWER5:
28082 if (is_microcoded_insn (insn))
28083 return true;
28085 if (is_branch_slot_insn (insn))
28086 return true;
28088 break;
28089 case PROCESSOR_POWER6:
28090 type = get_attr_type (insn);
28092 switch (type)
28094 case TYPE_EXTS:
28095 case TYPE_CNTLZ:
28096 case TYPE_TRAP:
28097 case TYPE_MUL:
28098 case TYPE_FPCOMPARE:
28099 case TYPE_MFCR:
28100 case TYPE_MTCR:
28101 case TYPE_MFJMPR:
28102 case TYPE_MTJMPR:
28103 case TYPE_ISYNC:
28104 case TYPE_SYNC:
28105 case TYPE_LOAD_L:
28106 case TYPE_STORE_C:
28107 return true;
28108 case TYPE_SHIFT:
28109 if (get_attr_dot (insn) == DOT_NO
28110 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28111 return true;
28112 else
28113 break;
28114 case TYPE_DIV:
28115 if (get_attr_size (insn) == SIZE_32)
28116 return true;
28117 else
28118 break;
28119 default:
28120 break;
28122 break;
28123 case PROCESSOR_POWER7:
28124 type = get_attr_type (insn);
28126 switch (type)
28128 case TYPE_ISYNC:
28129 case TYPE_SYNC:
28130 case TYPE_LOAD_L:
28131 case TYPE_STORE_C:
28132 return true;
28133 case TYPE_LOAD:
28134 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28135 && get_attr_update (insn) == UPDATE_YES)
28136 return true;
28137 else
28138 break;
28139 case TYPE_STORE:
28140 if (get_attr_update (insn) == UPDATE_YES
28141 && get_attr_indexed (insn) == INDEXED_YES)
28142 return true;
28143 else
28144 break;
28145 default:
28146 break;
28148 break;
28149 case PROCESSOR_POWER8:
28150 type = get_attr_type (insn);
28152 switch (type)
28154 case TYPE_MFCR:
28155 case TYPE_MTCR:
28156 case TYPE_ISYNC:
28157 case TYPE_SYNC:
28158 case TYPE_LOAD_L:
28159 case TYPE_STORE_C:
28160 return true;
28161 case TYPE_LOAD:
28162 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28163 && get_attr_update (insn) == UPDATE_YES)
28164 return true;
28165 else
28166 break;
28167 case TYPE_STORE:
28168 if (get_attr_update (insn) == UPDATE_YES
28169 && get_attr_indexed (insn) == INDEXED_YES)
28170 return true;
28171 else
28172 break;
28173 default:
28174 break;
28176 break;
28177 default:
28178 break;
28181 return false;
28184 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28185 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28187 static bool
28188 is_costly_group (rtx *group_insns, rtx next_insn)
28190 int i;
28191 int issue_rate = rs6000_issue_rate ();
28193 for (i = 0; i < issue_rate; i++)
28195 sd_iterator_def sd_it;
28196 dep_t dep;
28197 rtx insn = group_insns[i];
28199 if (!insn)
28200 continue;
28202 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28204 rtx next = DEP_CON (dep);
28206 if (next == next_insn
28207 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28208 return true;
28212 return false;
28215 /* Utility of the function redefine_groups.
28216 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28217 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28218 to keep it "far" (in a separate group) from GROUP_INSNS, following
28219 one of the following schemes, depending on the value of the flag
28220 -minsert_sched_nops = X:
28221 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28222 in order to force NEXT_INSN into a separate group.
28223 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28224 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28225 insertion (has a group just ended, how many vacant issue slots remain in the
28226 last group, and how many dispatch groups were encountered so far). */
28228 static int
28229 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28230 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28231 int *group_count)
28233 rtx nop;
28234 bool force;
28235 int issue_rate = rs6000_issue_rate ();
28236 bool end = *group_end;
28237 int i;
28239 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28240 return can_issue_more;
28242 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28243 return can_issue_more;
28245 force = is_costly_group (group_insns, next_insn);
28246 if (!force)
28247 return can_issue_more;
28249 if (sched_verbose > 6)
28250 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28251 *group_count ,can_issue_more);
28253 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28255 if (*group_end)
28256 can_issue_more = 0;
28258 /* Since only a branch can be issued in the last issue_slot, it is
28259 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28260 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28261 in this case the last nop will start a new group and the branch
28262 will be forced to the new group. */
28263 if (can_issue_more && !is_branch_slot_insn (next_insn))
28264 can_issue_more--;
28266 /* Do we have a special group ending nop? */
28267 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28268 || rs6000_cpu_attr == CPU_POWER8)
28270 nop = gen_group_ending_nop ();
28271 emit_insn_before (nop, next_insn);
28272 can_issue_more = 0;
28274 else
28275 while (can_issue_more > 0)
28277 nop = gen_nop ();
28278 emit_insn_before (nop, next_insn);
28279 can_issue_more--;
28282 *group_end = true;
28283 return 0;
28286 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28288 int n_nops = rs6000_sched_insert_nops;
28290 /* Nops can't be issued from the branch slot, so the effective
28291 issue_rate for nops is 'issue_rate - 1'. */
28292 if (can_issue_more == 0)
28293 can_issue_more = issue_rate;
28294 can_issue_more--;
28295 if (can_issue_more == 0)
28297 can_issue_more = issue_rate - 1;
28298 (*group_count)++;
28299 end = true;
28300 for (i = 0; i < issue_rate; i++)
28302 group_insns[i] = 0;
28306 while (n_nops > 0)
28308 nop = gen_nop ();
28309 emit_insn_before (nop, next_insn);
28310 if (can_issue_more == issue_rate - 1) /* new group begins */
28311 end = false;
28312 can_issue_more--;
28313 if (can_issue_more == 0)
28315 can_issue_more = issue_rate - 1;
28316 (*group_count)++;
28317 end = true;
28318 for (i = 0; i < issue_rate; i++)
28320 group_insns[i] = 0;
28323 n_nops--;
28326 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28327 can_issue_more++;
28329 /* Is next_insn going to start a new group? */
28330 *group_end
28331 = (end
28332 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28333 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28334 || (can_issue_more < issue_rate &&
28335 insn_terminates_group_p (next_insn, previous_group)));
28336 if (*group_end && end)
28337 (*group_count)--;
28339 if (sched_verbose > 6)
28340 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28341 *group_count, can_issue_more);
28342 return can_issue_more;
28345 return can_issue_more;
28348 /* This function tries to synch the dispatch groups that the compiler "sees"
28349 with the dispatch groups that the processor dispatcher is expected to
28350 form in practice. It tries to achieve this synchronization by forcing the
28351 estimated processor grouping on the compiler (as opposed to the function
28352 'pad_goups' which tries to force the scheduler's grouping on the processor).
28354 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28355 examines the (estimated) dispatch groups that will be formed by the processor
28356 dispatcher. It marks these group boundaries to reflect the estimated
28357 processor grouping, overriding the grouping that the scheduler had marked.
28358 Depending on the value of the flag '-minsert-sched-nops' this function can
28359 force certain insns into separate groups or force a certain distance between
28360 them by inserting nops, for example, if there exists a "costly dependence"
28361 between the insns.
28363 The function estimates the group boundaries that the processor will form as
28364 follows: It keeps track of how many vacant issue slots are available after
28365 each insn. A subsequent insn will start a new group if one of the following
28366 4 cases applies:
28367 - no more vacant issue slots remain in the current dispatch group.
28368 - only the last issue slot, which is the branch slot, is vacant, but the next
28369 insn is not a branch.
28370 - only the last 2 or less issue slots, including the branch slot, are vacant,
28371 which means that a cracked insn (which occupies two issue slots) can't be
28372 issued in this group.
28373 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28374 start a new group. */
28376 static int
28377 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28378 rtx_insn *tail)
28380 rtx_insn *insn, *next_insn;
28381 int issue_rate;
28382 int can_issue_more;
28383 int slot, i;
28384 bool group_end;
28385 int group_count = 0;
28386 rtx *group_insns;
28388 /* Initialize. */
28389 issue_rate = rs6000_issue_rate ();
28390 group_insns = XALLOCAVEC (rtx, issue_rate);
28391 for (i = 0; i < issue_rate; i++)
28393 group_insns[i] = 0;
28395 can_issue_more = issue_rate;
28396 slot = 0;
28397 insn = get_next_active_insn (prev_head_insn, tail);
28398 group_end = false;
28400 while (insn != NULL_RTX)
28402 slot = (issue_rate - can_issue_more);
28403 group_insns[slot] = insn;
28404 can_issue_more =
28405 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28406 if (insn_terminates_group_p (insn, current_group))
28407 can_issue_more = 0;
28409 next_insn = get_next_active_insn (insn, tail);
28410 if (next_insn == NULL_RTX)
28411 return group_count + 1;
28413 /* Is next_insn going to start a new group? */
28414 group_end
28415 = (can_issue_more == 0
28416 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28417 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28418 || (can_issue_more < issue_rate &&
28419 insn_terminates_group_p (next_insn, previous_group)));
28421 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28422 next_insn, &group_end, can_issue_more,
28423 &group_count);
28425 if (group_end)
28427 group_count++;
28428 can_issue_more = 0;
28429 for (i = 0; i < issue_rate; i++)
28431 group_insns[i] = 0;
28435 if (GET_MODE (next_insn) == TImode && can_issue_more)
28436 PUT_MODE (next_insn, VOIDmode);
28437 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28438 PUT_MODE (next_insn, TImode);
28440 insn = next_insn;
28441 if (can_issue_more == 0)
28442 can_issue_more = issue_rate;
28443 } /* while */
28445 return group_count;
28448 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28449 dispatch group boundaries that the scheduler had marked. Pad with nops
28450 any dispatch groups which have vacant issue slots, in order to force the
28451 scheduler's grouping on the processor dispatcher. The function
28452 returns the number of dispatch groups found. */
28454 static int
28455 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28456 rtx_insn *tail)
28458 rtx_insn *insn, *next_insn;
28459 rtx nop;
28460 int issue_rate;
28461 int can_issue_more;
28462 int group_end;
28463 int group_count = 0;
28465 /* Initialize issue_rate. */
28466 issue_rate = rs6000_issue_rate ();
28467 can_issue_more = issue_rate;
28469 insn = get_next_active_insn (prev_head_insn, tail);
28470 next_insn = get_next_active_insn (insn, tail);
28472 while (insn != NULL_RTX)
28474 can_issue_more =
28475 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28477 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28479 if (next_insn == NULL_RTX)
28480 break;
28482 if (group_end)
28484 /* If the scheduler had marked group termination at this location
28485 (between insn and next_insn), and neither insn nor next_insn will
28486 force group termination, pad the group with nops to force group
28487 termination. */
28488 if (can_issue_more
28489 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28490 && !insn_terminates_group_p (insn, current_group)
28491 && !insn_terminates_group_p (next_insn, previous_group))
28493 if (!is_branch_slot_insn (next_insn))
28494 can_issue_more--;
28496 while (can_issue_more)
28498 nop = gen_nop ();
28499 emit_insn_before (nop, next_insn);
28500 can_issue_more--;
28504 can_issue_more = issue_rate;
28505 group_count++;
28508 insn = next_insn;
28509 next_insn = get_next_active_insn (insn, tail);
28512 return group_count;
28515 /* We're beginning a new block. Initialize data structures as necessary. */
28517 static void
28518 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28519 int sched_verbose ATTRIBUTE_UNUSED,
28520 int max_ready ATTRIBUTE_UNUSED)
28522 last_scheduled_insn = NULL_RTX;
28523 load_store_pendulum = 0;
28526 /* The following function is called at the end of scheduling BB.
28527 After reload, it inserts nops at insn group bundling. */
28529 static void
28530 rs6000_sched_finish (FILE *dump, int sched_verbose)
28532 int n_groups;
28534 if (sched_verbose)
28535 fprintf (dump, "=== Finishing schedule.\n");
28537 if (reload_completed && rs6000_sched_groups)
28539 /* Do not run sched_finish hook when selective scheduling enabled. */
28540 if (sel_sched_p ())
28541 return;
28543 if (rs6000_sched_insert_nops == sched_finish_none)
28544 return;
28546 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28547 n_groups = pad_groups (dump, sched_verbose,
28548 current_sched_info->prev_head,
28549 current_sched_info->next_tail);
28550 else
28551 n_groups = redefine_groups (dump, sched_verbose,
28552 current_sched_info->prev_head,
28553 current_sched_info->next_tail);
28555 if (sched_verbose >= 6)
28557 fprintf (dump, "ngroups = %d\n", n_groups);
28558 print_rtl (dump, current_sched_info->prev_head);
28559 fprintf (dump, "Done finish_sched\n");
28564 struct _rs6000_sched_context
28566 short cached_can_issue_more;
28567 rtx last_scheduled_insn;
28568 int load_store_pendulum;
28571 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28572 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28574 /* Allocate store for new scheduling context. */
28575 static void *
28576 rs6000_alloc_sched_context (void)
28578 return xmalloc (sizeof (rs6000_sched_context_def));
28581 /* If CLEAN_P is true then initializes _SC with clean data,
28582 and from the global context otherwise. */
28583 static void
28584 rs6000_init_sched_context (void *_sc, bool clean_p)
28586 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28588 if (clean_p)
28590 sc->cached_can_issue_more = 0;
28591 sc->last_scheduled_insn = NULL_RTX;
28592 sc->load_store_pendulum = 0;
28594 else
28596 sc->cached_can_issue_more = cached_can_issue_more;
28597 sc->last_scheduled_insn = last_scheduled_insn;
28598 sc->load_store_pendulum = load_store_pendulum;
28602 /* Sets the global scheduling context to the one pointed to by _SC. */
28603 static void
28604 rs6000_set_sched_context (void *_sc)
28606 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28608 gcc_assert (sc != NULL);
28610 cached_can_issue_more = sc->cached_can_issue_more;
28611 last_scheduled_insn = sc->last_scheduled_insn;
28612 load_store_pendulum = sc->load_store_pendulum;
28615 /* Free _SC. */
28616 static void
28617 rs6000_free_sched_context (void *_sc)
28619 gcc_assert (_sc != NULL);
28621 free (_sc);
28625 /* Length in units of the trampoline for entering a nested function. */
28628 rs6000_trampoline_size (void)
28630 int ret = 0;
28632 switch (DEFAULT_ABI)
28634 default:
28635 gcc_unreachable ();
28637 case ABI_AIX:
28638 ret = (TARGET_32BIT) ? 12 : 24;
28639 break;
28641 case ABI_ELFv2:
28642 gcc_assert (!TARGET_32BIT);
28643 ret = 32;
28644 break;
28646 case ABI_DARWIN:
28647 case ABI_V4:
28648 ret = (TARGET_32BIT) ? 40 : 48;
28649 break;
28652 return ret;
28655 /* Emit RTL insns to initialize the variable parts of a trampoline.
28656 FNADDR is an RTX for the address of the function's pure code.
28657 CXT is an RTX for the static chain value for the function. */
28659 static void
28660 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28662 int regsize = (TARGET_32BIT) ? 4 : 8;
28663 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28664 rtx ctx_reg = force_reg (Pmode, cxt);
28665 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28667 switch (DEFAULT_ABI)
28669 default:
28670 gcc_unreachable ();
28672 /* Under AIX, just build the 3 word function descriptor */
28673 case ABI_AIX:
28675 rtx fnmem, fn_reg, toc_reg;
28677 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28678 error ("You cannot take the address of a nested function if you use "
28679 "the -mno-pointers-to-nested-functions option.");
28681 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28682 fn_reg = gen_reg_rtx (Pmode);
28683 toc_reg = gen_reg_rtx (Pmode);
28685 /* Macro to shorten the code expansions below. */
28686 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28688 m_tramp = replace_equiv_address (m_tramp, addr);
28690 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28691 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28692 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28693 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28694 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28696 # undef MEM_PLUS
28698 break;
28700 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28701 case ABI_ELFv2:
28702 case ABI_DARWIN:
28703 case ABI_V4:
28704 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28705 LCT_NORMAL, VOIDmode, 4,
28706 addr, Pmode,
28707 GEN_INT (rs6000_trampoline_size ()), SImode,
28708 fnaddr, Pmode,
28709 ctx_reg, Pmode);
28710 break;
28715 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28716 identifier as an argument, so the front end shouldn't look it up. */
28718 static bool
28719 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28721 return is_attribute_p ("altivec", attr_id);
28724 /* Handle the "altivec" attribute. The attribute may have
28725 arguments as follows:
28727 __attribute__((altivec(vector__)))
28728 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28729 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28731 and may appear more than once (e.g., 'vector bool char') in a
28732 given declaration. */
28734 static tree
28735 rs6000_handle_altivec_attribute (tree *node,
28736 tree name ATTRIBUTE_UNUSED,
28737 tree args,
28738 int flags ATTRIBUTE_UNUSED,
28739 bool *no_add_attrs)
28741 tree type = *node, result = NULL_TREE;
28742 machine_mode mode;
28743 int unsigned_p;
28744 char altivec_type
28745 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28746 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28747 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28748 : '?');
28750 while (POINTER_TYPE_P (type)
28751 || TREE_CODE (type) == FUNCTION_TYPE
28752 || TREE_CODE (type) == METHOD_TYPE
28753 || TREE_CODE (type) == ARRAY_TYPE)
28754 type = TREE_TYPE (type);
28756 mode = TYPE_MODE (type);
28758 /* Check for invalid AltiVec type qualifiers. */
28759 if (type == long_double_type_node)
28760 error ("use of %<long double%> in AltiVec types is invalid");
28761 else if (type == boolean_type_node)
28762 error ("use of boolean types in AltiVec types is invalid");
28763 else if (TREE_CODE (type) == COMPLEX_TYPE)
28764 error ("use of %<complex%> in AltiVec types is invalid");
28765 else if (DECIMAL_FLOAT_MODE_P (mode))
28766 error ("use of decimal floating point types in AltiVec types is invalid");
28767 else if (!TARGET_VSX)
28769 if (type == long_unsigned_type_node || type == long_integer_type_node)
28771 if (TARGET_64BIT)
28772 error ("use of %<long%> in AltiVec types is invalid for "
28773 "64-bit code without -mvsx");
28774 else if (rs6000_warn_altivec_long)
28775 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28776 "use %<int%>");
28778 else if (type == long_long_unsigned_type_node
28779 || type == long_long_integer_type_node)
28780 error ("use of %<long long%> in AltiVec types is invalid without "
28781 "-mvsx");
28782 else if (type == double_type_node)
28783 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28786 switch (altivec_type)
28788 case 'v':
28789 unsigned_p = TYPE_UNSIGNED (type);
28790 switch (mode)
28792 case TImode:
28793 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28794 break;
28795 case DImode:
28796 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28797 break;
28798 case SImode:
28799 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28800 break;
28801 case HImode:
28802 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28803 break;
28804 case QImode:
28805 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28806 break;
28807 case SFmode: result = V4SF_type_node; break;
28808 case DFmode: result = V2DF_type_node; break;
28809 /* If the user says 'vector int bool', we may be handed the 'bool'
28810 attribute _before_ the 'vector' attribute, and so select the
28811 proper type in the 'b' case below. */
28812 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28813 case V2DImode: case V2DFmode:
28814 result = type;
28815 default: break;
28817 break;
28818 case 'b':
28819 switch (mode)
28821 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28822 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28823 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28824 case QImode: case V16QImode: result = bool_V16QI_type_node;
28825 default: break;
28827 break;
28828 case 'p':
28829 switch (mode)
28831 case V8HImode: result = pixel_V8HI_type_node;
28832 default: break;
28834 default: break;
28837 /* Propagate qualifiers attached to the element type
28838 onto the vector type. */
28839 if (result && result != type && TYPE_QUALS (type))
28840 result = build_qualified_type (result, TYPE_QUALS (type));
28842 *no_add_attrs = true; /* No need to hang on to the attribute. */
28844 if (result)
28845 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28847 return NULL_TREE;
28850 /* AltiVec defines four built-in scalar types that serve as vector
28851 elements; we must teach the compiler how to mangle them. */
28853 static const char *
28854 rs6000_mangle_type (const_tree type)
28856 type = TYPE_MAIN_VARIANT (type);
28858 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28859 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28860 return NULL;
28862 if (type == bool_char_type_node) return "U6__boolc";
28863 if (type == bool_short_type_node) return "U6__bools";
28864 if (type == pixel_type_node) return "u7__pixel";
28865 if (type == bool_int_type_node) return "U6__booli";
28866 if (type == bool_long_type_node) return "U6__booll";
28868 /* Mangle IBM extended float long double as `g' (__float128) on
28869 powerpc*-linux where long-double-64 previously was the default. */
28870 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28871 && TARGET_ELF
28872 && TARGET_LONG_DOUBLE_128
28873 && !TARGET_IEEEQUAD)
28874 return "g";
28876 /* For all other types, use normal C++ mangling. */
28877 return NULL;
28880 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28881 struct attribute_spec.handler. */
28883 static tree
28884 rs6000_handle_longcall_attribute (tree *node, tree name,
28885 tree args ATTRIBUTE_UNUSED,
28886 int flags ATTRIBUTE_UNUSED,
28887 bool *no_add_attrs)
28889 if (TREE_CODE (*node) != FUNCTION_TYPE
28890 && TREE_CODE (*node) != FIELD_DECL
28891 && TREE_CODE (*node) != TYPE_DECL)
28893 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28894 name);
28895 *no_add_attrs = true;
28898 return NULL_TREE;
28901 /* Set longcall attributes on all functions declared when
28902 rs6000_default_long_calls is true. */
28903 static void
28904 rs6000_set_default_type_attributes (tree type)
28906 if (rs6000_default_long_calls
28907 && (TREE_CODE (type) == FUNCTION_TYPE
28908 || TREE_CODE (type) == METHOD_TYPE))
28909 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28910 NULL_TREE,
28911 TYPE_ATTRIBUTES (type));
28913 #if TARGET_MACHO
28914 darwin_set_default_type_attributes (type);
28915 #endif
28918 /* Return a reference suitable for calling a function with the
28919 longcall attribute. */
28922 rs6000_longcall_ref (rtx call_ref)
28924 const char *call_name;
28925 tree node;
28927 if (GET_CODE (call_ref) != SYMBOL_REF)
28928 return call_ref;
28930 /* System V adds '.' to the internal name, so skip them. */
28931 call_name = XSTR (call_ref, 0);
28932 if (*call_name == '.')
28934 while (*call_name == '.')
28935 call_name++;
28937 node = get_identifier (call_name);
28938 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28941 return force_reg (Pmode, call_ref);
28944 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28945 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28946 #endif
28948 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28949 struct attribute_spec.handler. */
28950 static tree
28951 rs6000_handle_struct_attribute (tree *node, tree name,
28952 tree args ATTRIBUTE_UNUSED,
28953 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28955 tree *type = NULL;
28956 if (DECL_P (*node))
28958 if (TREE_CODE (*node) == TYPE_DECL)
28959 type = &TREE_TYPE (*node);
28961 else
28962 type = node;
28964 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28965 || TREE_CODE (*type) == UNION_TYPE)))
28967 warning (OPT_Wattributes, "%qE attribute ignored", name);
28968 *no_add_attrs = true;
28971 else if ((is_attribute_p ("ms_struct", name)
28972 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28973 || ((is_attribute_p ("gcc_struct", name)
28974 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28976 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28977 name);
28978 *no_add_attrs = true;
28981 return NULL_TREE;
28984 static bool
28985 rs6000_ms_bitfield_layout_p (const_tree record_type)
28987 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28988 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28989 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28992 #ifdef USING_ELFOS_H
28994 /* A get_unnamed_section callback, used for switching to toc_section. */
28996 static void
28997 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28999 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29000 && TARGET_MINIMAL_TOC
29001 && !TARGET_RELOCATABLE)
29003 if (!toc_initialized)
29005 toc_initialized = 1;
29006 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29007 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
29008 fprintf (asm_out_file, "\t.tc ");
29009 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
29010 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29011 fprintf (asm_out_file, "\n");
29013 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29014 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29015 fprintf (asm_out_file, " = .+32768\n");
29017 else
29018 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29020 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29021 && !TARGET_RELOCATABLE)
29022 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29023 else
29025 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29026 if (!toc_initialized)
29028 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29029 fprintf (asm_out_file, " = .+32768\n");
29030 toc_initialized = 1;
29035 /* Implement TARGET_ASM_INIT_SECTIONS. */
29037 static void
29038 rs6000_elf_asm_init_sections (void)
29040 toc_section
29041 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
29043 sdata2_section
29044 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
29045 SDATA2_SECTION_ASM_OP);
29048 /* Implement TARGET_SELECT_RTX_SECTION. */
29050 static section *
29051 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
29052 unsigned HOST_WIDE_INT align)
29054 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29055 return toc_section;
29056 else
29057 return default_elf_select_rtx_section (mode, x, align);
29060 /* For a SYMBOL_REF, set generic flags and then perform some
29061 target-specific processing.
29063 When the AIX ABI is requested on a non-AIX system, replace the
29064 function name with the real name (with a leading .) rather than the
29065 function descriptor name. This saves a lot of overriding code to
29066 read the prefixes. */
29068 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
29069 static void
29070 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
29072 default_encode_section_info (decl, rtl, first);
29074 if (first
29075 && TREE_CODE (decl) == FUNCTION_DECL
29076 && !TARGET_AIX
29077 && DEFAULT_ABI == ABI_AIX)
29079 rtx sym_ref = XEXP (rtl, 0);
29080 size_t len = strlen (XSTR (sym_ref, 0));
29081 char *str = XALLOCAVEC (char, len + 2);
29082 str[0] = '.';
29083 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
29084 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
29088 static inline bool
29089 compare_section_name (const char *section, const char *templ)
29091 int len;
29093 len = strlen (templ);
29094 return (strncmp (section, templ, len) == 0
29095 && (section[len] == 0 || section[len] == '.'));
29098 bool
29099 rs6000_elf_in_small_data_p (const_tree decl)
29101 if (rs6000_sdata == SDATA_NONE)
29102 return false;
29104 /* We want to merge strings, so we never consider them small data. */
29105 if (TREE_CODE (decl) == STRING_CST)
29106 return false;
29108 /* Functions are never in the small data area. */
29109 if (TREE_CODE (decl) == FUNCTION_DECL)
29110 return false;
29112 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
29114 const char *section = DECL_SECTION_NAME (decl);
29115 if (compare_section_name (section, ".sdata")
29116 || compare_section_name (section, ".sdata2")
29117 || compare_section_name (section, ".gnu.linkonce.s")
29118 || compare_section_name (section, ".sbss")
29119 || compare_section_name (section, ".sbss2")
29120 || compare_section_name (section, ".gnu.linkonce.sb")
29121 || strcmp (section, ".PPC.EMB.sdata0") == 0
29122 || strcmp (section, ".PPC.EMB.sbss0") == 0)
29123 return true;
29125 else
29127 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
29129 if (size > 0
29130 && size <= g_switch_value
29131 /* If it's not public, and we're not going to reference it there,
29132 there's no need to put it in the small data section. */
29133 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
29134 return true;
29137 return false;
29140 #endif /* USING_ELFOS_H */
29142 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29144 static bool
29145 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29147 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29150 /* Do not place thread-local symbols refs in the object blocks. */
29152 static bool
29153 rs6000_use_blocks_for_decl_p (const_tree decl)
29155 return !DECL_THREAD_LOCAL_P (decl);
29158 /* Return a REG that occurs in ADDR with coefficient 1.
29159 ADDR can be effectively incremented by incrementing REG.
29161 r0 is special and we must not select it as an address
29162 register by this routine since our caller will try to
29163 increment the returned register via an "la" instruction. */
29166 find_addr_reg (rtx addr)
29168 while (GET_CODE (addr) == PLUS)
29170 if (GET_CODE (XEXP (addr, 0)) == REG
29171 && REGNO (XEXP (addr, 0)) != 0)
29172 addr = XEXP (addr, 0);
29173 else if (GET_CODE (XEXP (addr, 1)) == REG
29174 && REGNO (XEXP (addr, 1)) != 0)
29175 addr = XEXP (addr, 1);
29176 else if (CONSTANT_P (XEXP (addr, 0)))
29177 addr = XEXP (addr, 1);
29178 else if (CONSTANT_P (XEXP (addr, 1)))
29179 addr = XEXP (addr, 0);
29180 else
29181 gcc_unreachable ();
29183 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29184 return addr;
29187 void
29188 rs6000_fatal_bad_address (rtx op)
29190 fatal_insn ("bad address", op);
29193 #if TARGET_MACHO
29195 typedef struct branch_island_d {
29196 tree function_name;
29197 tree label_name;
29198 int line_number;
29199 } branch_island;
29202 static vec<branch_island, va_gc> *branch_islands;
29204 /* Remember to generate a branch island for far calls to the given
29205 function. */
29207 static void
29208 add_compiler_branch_island (tree label_name, tree function_name,
29209 int line_number)
29211 branch_island bi = {function_name, label_name, line_number};
29212 vec_safe_push (branch_islands, bi);
29215 /* Generate far-jump branch islands for everything recorded in
29216 branch_islands. Invoked immediately after the last instruction of
29217 the epilogue has been emitted; the branch islands must be appended
29218 to, and contiguous with, the function body. Mach-O stubs are
29219 generated in machopic_output_stub(). */
29221 static void
29222 macho_branch_islands (void)
29224 char tmp_buf[512];
29226 while (!vec_safe_is_empty (branch_islands))
29228 branch_island *bi = &branch_islands->last ();
29229 const char *label = IDENTIFIER_POINTER (bi->label_name);
29230 const char *name = IDENTIFIER_POINTER (bi->function_name);
29231 char name_buf[512];
29232 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29233 if (name[0] == '*' || name[0] == '&')
29234 strcpy (name_buf, name+1);
29235 else
29237 name_buf[0] = '_';
29238 strcpy (name_buf+1, name);
29240 strcpy (tmp_buf, "\n");
29241 strcat (tmp_buf, label);
29242 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29243 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29244 dbxout_stabd (N_SLINE, bi->line_number);
29245 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29246 if (flag_pic)
29248 if (TARGET_LINK_STACK)
29250 char name[32];
29251 get_ppc476_thunk_name (name);
29252 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29253 strcat (tmp_buf, name);
29254 strcat (tmp_buf, "\n");
29255 strcat (tmp_buf, label);
29256 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29258 else
29260 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29261 strcat (tmp_buf, label);
29262 strcat (tmp_buf, "_pic\n");
29263 strcat (tmp_buf, label);
29264 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29267 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29268 strcat (tmp_buf, name_buf);
29269 strcat (tmp_buf, " - ");
29270 strcat (tmp_buf, label);
29271 strcat (tmp_buf, "_pic)\n");
29273 strcat (tmp_buf, "\tmtlr r0\n");
29275 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29276 strcat (tmp_buf, name_buf);
29277 strcat (tmp_buf, " - ");
29278 strcat (tmp_buf, label);
29279 strcat (tmp_buf, "_pic)\n");
29281 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29283 else
29285 strcat (tmp_buf, ":\nlis r12,hi16(");
29286 strcat (tmp_buf, name_buf);
29287 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29288 strcat (tmp_buf, name_buf);
29289 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29291 output_asm_insn (tmp_buf, 0);
29292 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29293 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29294 dbxout_stabd (N_SLINE, bi->line_number);
29295 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29296 branch_islands->pop ();
29300 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29301 already there or not. */
29303 static int
29304 no_previous_def (tree function_name)
29306 branch_island *bi;
29307 unsigned ix;
29309 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29310 if (function_name == bi->function_name)
29311 return 0;
29312 return 1;
29315 /* GET_PREV_LABEL gets the label name from the previous definition of
29316 the function. */
29318 static tree
29319 get_prev_label (tree function_name)
29321 branch_island *bi;
29322 unsigned ix;
29324 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29325 if (function_name == bi->function_name)
29326 return bi->label_name;
29327 return NULL_TREE;
29330 /* INSN is either a function call or a millicode call. It may have an
29331 unconditional jump in its delay slot.
29333 CALL_DEST is the routine we are calling. */
29335 char *
29336 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29337 int cookie_operand_number)
29339 static char buf[256];
29340 if (darwin_emit_branch_islands
29341 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29342 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29344 tree labelname;
29345 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29347 if (no_previous_def (funname))
29349 rtx label_rtx = gen_label_rtx ();
29350 char *label_buf, temp_buf[256];
29351 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29352 CODE_LABEL_NUMBER (label_rtx));
29353 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29354 labelname = get_identifier (label_buf);
29355 add_compiler_branch_island (labelname, funname, insn_line (insn));
29357 else
29358 labelname = get_prev_label (funname);
29360 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29361 instruction will reach 'foo', otherwise link as 'bl L42'".
29362 "L42" should be a 'branch island', that will do a far jump to
29363 'foo'. Branch islands are generated in
29364 macho_branch_islands(). */
29365 sprintf (buf, "jbsr %%z%d,%.246s",
29366 dest_operand_number, IDENTIFIER_POINTER (labelname));
29368 else
29369 sprintf (buf, "bl %%z%d", dest_operand_number);
29370 return buf;
29373 /* Generate PIC and indirect symbol stubs. */
29375 void
29376 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29378 unsigned int length;
29379 char *symbol_name, *lazy_ptr_name;
29380 char *local_label_0;
29381 static int label = 0;
29383 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29384 symb = (*targetm.strip_name_encoding) (symb);
29387 length = strlen (symb);
29388 symbol_name = XALLOCAVEC (char, length + 32);
29389 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29391 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29392 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29394 if (flag_pic == 2)
29395 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29396 else
29397 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29399 if (flag_pic == 2)
29401 fprintf (file, "\t.align 5\n");
29403 fprintf (file, "%s:\n", stub);
29404 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29406 label++;
29407 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29408 sprintf (local_label_0, "\"L%011d$spb\"", label);
29410 fprintf (file, "\tmflr r0\n");
29411 if (TARGET_LINK_STACK)
29413 char name[32];
29414 get_ppc476_thunk_name (name);
29415 fprintf (file, "\tbl %s\n", name);
29416 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29418 else
29420 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29421 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29423 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29424 lazy_ptr_name, local_label_0);
29425 fprintf (file, "\tmtlr r0\n");
29426 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29427 (TARGET_64BIT ? "ldu" : "lwzu"),
29428 lazy_ptr_name, local_label_0);
29429 fprintf (file, "\tmtctr r12\n");
29430 fprintf (file, "\tbctr\n");
29432 else
29434 fprintf (file, "\t.align 4\n");
29436 fprintf (file, "%s:\n", stub);
29437 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29439 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29440 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29441 (TARGET_64BIT ? "ldu" : "lwzu"),
29442 lazy_ptr_name);
29443 fprintf (file, "\tmtctr r12\n");
29444 fprintf (file, "\tbctr\n");
29447 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29448 fprintf (file, "%s:\n", lazy_ptr_name);
29449 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29450 fprintf (file, "%sdyld_stub_binding_helper\n",
29451 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29454 /* Legitimize PIC addresses. If the address is already
29455 position-independent, we return ORIG. Newly generated
29456 position-independent addresses go into a reg. This is REG if non
29457 zero, otherwise we allocate register(s) as necessary. */
29459 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29462 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29463 rtx reg)
29465 rtx base, offset;
29467 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29468 reg = gen_reg_rtx (Pmode);
29470 if (GET_CODE (orig) == CONST)
29472 rtx reg_temp;
29474 if (GET_CODE (XEXP (orig, 0)) == PLUS
29475 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29476 return orig;
29478 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29480 /* Use a different reg for the intermediate value, as
29481 it will be marked UNCHANGING. */
29482 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29483 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29484 Pmode, reg_temp);
29485 offset =
29486 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29487 Pmode, reg);
29489 if (GET_CODE (offset) == CONST_INT)
29491 if (SMALL_INT (offset))
29492 return plus_constant (Pmode, base, INTVAL (offset));
29493 else if (! reload_in_progress && ! reload_completed)
29494 offset = force_reg (Pmode, offset);
29495 else
29497 rtx mem = force_const_mem (Pmode, orig);
29498 return machopic_legitimize_pic_address (mem, Pmode, reg);
29501 return gen_rtx_PLUS (Pmode, base, offset);
29504 /* Fall back on generic machopic code. */
29505 return machopic_legitimize_pic_address (orig, mode, reg);
29508 /* Output a .machine directive for the Darwin assembler, and call
29509 the generic start_file routine. */
29511 static void
29512 rs6000_darwin_file_start (void)
29514 static const struct
29516 const char *arg;
29517 const char *name;
29518 HOST_WIDE_INT if_set;
29519 } mapping[] = {
29520 { "ppc64", "ppc64", MASK_64BIT },
29521 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29522 { "power4", "ppc970", 0 },
29523 { "G5", "ppc970", 0 },
29524 { "7450", "ppc7450", 0 },
29525 { "7400", "ppc7400", MASK_ALTIVEC },
29526 { "G4", "ppc7400", 0 },
29527 { "750", "ppc750", 0 },
29528 { "740", "ppc750", 0 },
29529 { "G3", "ppc750", 0 },
29530 { "604e", "ppc604e", 0 },
29531 { "604", "ppc604", 0 },
29532 { "603e", "ppc603", 0 },
29533 { "603", "ppc603", 0 },
29534 { "601", "ppc601", 0 },
29535 { NULL, "ppc", 0 } };
29536 const char *cpu_id = "";
29537 size_t i;
29539 rs6000_file_start ();
29540 darwin_file_start ();
29542 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29544 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29545 cpu_id = rs6000_default_cpu;
29547 if (global_options_set.x_rs6000_cpu_index)
29548 cpu_id = processor_target_table[rs6000_cpu_index].name;
29550 /* Look through the mapping array. Pick the first name that either
29551 matches the argument, has a bit set in IF_SET that is also set
29552 in the target flags, or has a NULL name. */
29554 i = 0;
29555 while (mapping[i].arg != NULL
29556 && strcmp (mapping[i].arg, cpu_id) != 0
29557 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29558 i++;
29560 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29563 #endif /* TARGET_MACHO */
29565 #if TARGET_ELF
29566 static int
29567 rs6000_elf_reloc_rw_mask (void)
29569 if (flag_pic)
29570 return 3;
29571 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29572 return 2;
29573 else
29574 return 0;
29577 /* Record an element in the table of global constructors. SYMBOL is
29578 a SYMBOL_REF of the function to be called; PRIORITY is a number
29579 between 0 and MAX_INIT_PRIORITY.
29581 This differs from default_named_section_asm_out_constructor in
29582 that we have special handling for -mrelocatable. */
29584 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29585 static void
29586 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29588 const char *section = ".ctors";
29589 char buf[16];
29591 if (priority != DEFAULT_INIT_PRIORITY)
29593 sprintf (buf, ".ctors.%.5u",
29594 /* Invert the numbering so the linker puts us in the proper
29595 order; constructors are run from right to left, and the
29596 linker sorts in increasing order. */
29597 MAX_INIT_PRIORITY - priority);
29598 section = buf;
29601 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29602 assemble_align (POINTER_SIZE);
29604 if (TARGET_RELOCATABLE)
29606 fputs ("\t.long (", asm_out_file);
29607 output_addr_const (asm_out_file, symbol);
29608 fputs (")@fixup\n", asm_out_file);
29610 else
29611 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29614 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29615 static void
29616 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29618 const char *section = ".dtors";
29619 char buf[16];
29621 if (priority != DEFAULT_INIT_PRIORITY)
29623 sprintf (buf, ".dtors.%.5u",
29624 /* Invert the numbering so the linker puts us in the proper
29625 order; constructors are run from right to left, and the
29626 linker sorts in increasing order. */
29627 MAX_INIT_PRIORITY - priority);
29628 section = buf;
29631 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29632 assemble_align (POINTER_SIZE);
29634 if (TARGET_RELOCATABLE)
29636 fputs ("\t.long (", asm_out_file);
29637 output_addr_const (asm_out_file, symbol);
29638 fputs (")@fixup\n", asm_out_file);
29640 else
29641 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29644 void
29645 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29647 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29649 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29650 ASM_OUTPUT_LABEL (file, name);
29651 fputs (DOUBLE_INT_ASM_OP, file);
29652 rs6000_output_function_entry (file, name);
29653 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29654 if (DOT_SYMBOLS)
29656 fputs ("\t.size\t", file);
29657 assemble_name (file, name);
29658 fputs (",24\n\t.type\t.", file);
29659 assemble_name (file, name);
29660 fputs (",@function\n", file);
29661 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29663 fputs ("\t.globl\t.", file);
29664 assemble_name (file, name);
29665 putc ('\n', file);
29668 else
29669 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29670 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29671 rs6000_output_function_entry (file, name);
29672 fputs (":\n", file);
29673 return;
29676 if (TARGET_RELOCATABLE
29677 && !TARGET_SECURE_PLT
29678 && (get_pool_size () != 0 || crtl->profile)
29679 && uses_TOC ())
29681 char buf[256];
29683 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29685 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29686 fprintf (file, "\t.long ");
29687 assemble_name (file, buf);
29688 putc ('-', file);
29689 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29690 assemble_name (file, buf);
29691 putc ('\n', file);
29694 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29695 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29697 if (DEFAULT_ABI == ABI_AIX)
29699 const char *desc_name, *orig_name;
29701 orig_name = (*targetm.strip_name_encoding) (name);
29702 desc_name = orig_name;
29703 while (*desc_name == '.')
29704 desc_name++;
29706 if (TREE_PUBLIC (decl))
29707 fprintf (file, "\t.globl %s\n", desc_name);
29709 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29710 fprintf (file, "%s:\n", desc_name);
29711 fprintf (file, "\t.long %s\n", orig_name);
29712 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29713 fputs ("\t.long 0\n", file);
29714 fprintf (file, "\t.previous\n");
29716 ASM_OUTPUT_LABEL (file, name);
29719 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29720 static void
29721 rs6000_elf_file_end (void)
29723 #ifdef HAVE_AS_GNU_ATTRIBUTE
29724 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29726 if (rs6000_passes_float)
29727 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29728 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29729 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29730 : 2));
29731 if (rs6000_passes_vector)
29732 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29733 (TARGET_ALTIVEC_ABI ? 2
29734 : TARGET_SPE_ABI ? 3
29735 : 1));
29736 if (rs6000_returns_struct)
29737 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29738 aix_struct_return ? 2 : 1);
29740 #endif
29741 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29742 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29743 file_end_indicate_exec_stack ();
29744 #endif
29746 #endif
29748 #if TARGET_XCOFF
29749 static void
29750 rs6000_xcoff_asm_output_anchor (rtx symbol)
29752 char buffer[100];
29754 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29755 SYMBOL_REF_BLOCK_OFFSET (symbol));
29756 fprintf (asm_out_file, "%s", SET_ASM_OP);
29757 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29758 fprintf (asm_out_file, ",");
29759 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29760 fprintf (asm_out_file, "\n");
29763 static void
29764 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29766 fputs (GLOBAL_ASM_OP, stream);
29767 RS6000_OUTPUT_BASENAME (stream, name);
29768 putc ('\n', stream);
29771 /* A get_unnamed_decl callback, used for read-only sections. PTR
29772 points to the section string variable. */
29774 static void
29775 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29777 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29778 *(const char *const *) directive,
29779 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29782 /* Likewise for read-write sections. */
29784 static void
29785 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29787 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29788 *(const char *const *) directive,
29789 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29792 static void
29793 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29795 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29796 *(const char *const *) directive,
29797 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29800 /* A get_unnamed_section callback, used for switching to toc_section. */
29802 static void
29803 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29805 if (TARGET_MINIMAL_TOC)
29807 /* toc_section is always selected at least once from
29808 rs6000_xcoff_file_start, so this is guaranteed to
29809 always be defined once and only once in each file. */
29810 if (!toc_initialized)
29812 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29813 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29814 toc_initialized = 1;
29816 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29817 (TARGET_32BIT ? "" : ",3"));
29819 else
29820 fputs ("\t.toc\n", asm_out_file);
29823 /* Implement TARGET_ASM_INIT_SECTIONS. */
29825 static void
29826 rs6000_xcoff_asm_init_sections (void)
29828 read_only_data_section
29829 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29830 &xcoff_read_only_section_name);
29832 private_data_section
29833 = get_unnamed_section (SECTION_WRITE,
29834 rs6000_xcoff_output_readwrite_section_asm_op,
29835 &xcoff_private_data_section_name);
29837 tls_data_section
29838 = get_unnamed_section (SECTION_TLS,
29839 rs6000_xcoff_output_tls_section_asm_op,
29840 &xcoff_tls_data_section_name);
29842 tls_private_data_section
29843 = get_unnamed_section (SECTION_TLS,
29844 rs6000_xcoff_output_tls_section_asm_op,
29845 &xcoff_private_data_section_name);
29847 read_only_private_data_section
29848 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29849 &xcoff_private_data_section_name);
29851 toc_section
29852 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29854 readonly_data_section = read_only_data_section;
29855 exception_section = data_section;
29858 static int
29859 rs6000_xcoff_reloc_rw_mask (void)
29861 return 3;
29864 static void
29865 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29866 tree decl ATTRIBUTE_UNUSED)
29868 int smclass;
29869 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29871 if (flags & SECTION_CODE)
29872 smclass = 0;
29873 else if (flags & SECTION_TLS)
29874 smclass = 3;
29875 else if (flags & SECTION_WRITE)
29876 smclass = 2;
29877 else
29878 smclass = 1;
29880 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29881 (flags & SECTION_CODE) ? "." : "",
29882 name, suffix[smclass], flags & SECTION_ENTSIZE);
29885 #define IN_NAMED_SECTION(DECL) \
29886 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29887 && DECL_SECTION_NAME (DECL) != NULL)
29889 static section *
29890 rs6000_xcoff_select_section (tree decl, int reloc,
29891 unsigned HOST_WIDE_INT align)
29893 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29894 named section. */
29895 if (align > BIGGEST_ALIGNMENT)
29897 resolve_unique_section (decl, reloc, true);
29898 if (IN_NAMED_SECTION (decl))
29899 return get_named_section (decl, NULL, reloc);
29902 if (decl_readonly_section (decl, reloc))
29904 if (TREE_PUBLIC (decl))
29905 return read_only_data_section;
29906 else
29907 return read_only_private_data_section;
29909 else
29911 #if HAVE_AS_TLS
29912 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29914 if (TREE_PUBLIC (decl))
29915 return tls_data_section;
29916 else if (bss_initializer_p (decl))
29918 /* Convert to COMMON to emit in BSS. */
29919 DECL_COMMON (decl) = 1;
29920 return tls_comm_section;
29922 else
29923 return tls_private_data_section;
29925 else
29926 #endif
29927 if (TREE_PUBLIC (decl))
29928 return data_section;
29929 else
29930 return private_data_section;
29934 static void
29935 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29937 const char *name;
29939 /* Use select_section for private data and uninitialized data with
29940 alignment <= BIGGEST_ALIGNMENT. */
29941 if (!TREE_PUBLIC (decl)
29942 || DECL_COMMON (decl)
29943 || (DECL_INITIAL (decl) == NULL_TREE
29944 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29945 || DECL_INITIAL (decl) == error_mark_node
29946 || (flag_zero_initialized_in_bss
29947 && initializer_zerop (DECL_INITIAL (decl))))
29948 return;
29950 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29951 name = (*targetm.strip_name_encoding) (name);
29952 set_decl_section_name (decl, name);
29955 /* Select section for constant in constant pool.
29957 On RS/6000, all constants are in the private read-only data area.
29958 However, if this is being placed in the TOC it must be output as a
29959 toc entry. */
29961 static section *
29962 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
29963 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29965 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29966 return toc_section;
29967 else
29968 return read_only_private_data_section;
29971 /* Remove any trailing [DS] or the like from the symbol name. */
29973 static const char *
29974 rs6000_xcoff_strip_name_encoding (const char *name)
29976 size_t len;
29977 if (*name == '*')
29978 name++;
29979 len = strlen (name);
29980 if (name[len - 1] == ']')
29981 return ggc_alloc_string (name, len - 4);
29982 else
29983 return name;
29986 /* Section attributes. AIX is always PIC. */
29988 static unsigned int
29989 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29991 unsigned int align;
29992 unsigned int flags = default_section_type_flags (decl, name, reloc);
29994 /* Align to at least UNIT size. */
29995 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29996 align = MIN_UNITS_PER_WORD;
29997 else
29998 /* Increase alignment of large objects if not already stricter. */
29999 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
30000 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
30001 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
30003 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
30006 /* Output at beginning of assembler file.
30008 Initialize the section names for the RS/6000 at this point.
30010 Specify filename, including full path, to assembler.
30012 We want to go into the TOC section so at least one .toc will be emitted.
30013 Also, in order to output proper .bs/.es pairs, we need at least one static
30014 [RW] section emitted.
30016 Finally, declare mcount when profiling to make the assembler happy. */
30018 static void
30019 rs6000_xcoff_file_start (void)
30021 rs6000_gen_section_name (&xcoff_bss_section_name,
30022 main_input_filename, ".bss_");
30023 rs6000_gen_section_name (&xcoff_private_data_section_name,
30024 main_input_filename, ".rw_");
30025 rs6000_gen_section_name (&xcoff_read_only_section_name,
30026 main_input_filename, ".ro_");
30027 rs6000_gen_section_name (&xcoff_tls_data_section_name,
30028 main_input_filename, ".tls_");
30029 rs6000_gen_section_name (&xcoff_tbss_section_name,
30030 main_input_filename, ".tbss_[UL]");
30032 fputs ("\t.file\t", asm_out_file);
30033 output_quoted_string (asm_out_file, main_input_filename);
30034 fputc ('\n', asm_out_file);
30035 if (write_symbols != NO_DEBUG)
30036 switch_to_section (private_data_section);
30037 switch_to_section (text_section);
30038 if (profile_flag)
30039 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
30040 rs6000_file_start ();
30043 /* Output at end of assembler file.
30044 On the RS/6000, referencing data should automatically pull in text. */
30046 static void
30047 rs6000_xcoff_file_end (void)
30049 switch_to_section (text_section);
30050 fputs ("_section_.text:\n", asm_out_file);
30051 switch_to_section (data_section);
30052 fputs (TARGET_32BIT
30053 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
30054 asm_out_file);
30057 struct declare_alias_data
30059 FILE *file;
30060 bool function_descriptor;
30063 /* Declare alias N. A helper function for for_node_and_aliases. */
30065 static bool
30066 rs6000_declare_alias (struct symtab_node *n, void *d)
30068 struct declare_alias_data *data = (struct declare_alias_data *)d;
30069 /* Main symbol is output specially, because varasm machinery does part of
30070 the job for us - we do not need to declare .globl/lglobs and such. */
30071 if (!n->alias || n->weakref)
30072 return false;
30074 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
30075 return false;
30077 /* Prevent assemble_alias from trying to use .set pseudo operation
30078 that does not behave as expected by the middle-end. */
30079 TREE_ASM_WRITTEN (n->decl) = true;
30081 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
30082 char *buffer = (char *) alloca (strlen (name) + 2);
30083 char *p;
30084 int dollar_inside = 0;
30086 strcpy (buffer, name);
30087 p = strchr (buffer, '$');
30088 while (p) {
30089 *p = '_';
30090 dollar_inside++;
30091 p = strchr (p + 1, '$');
30093 if (TREE_PUBLIC (n->decl))
30095 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
30097 if (dollar_inside) {
30098 if (data->function_descriptor)
30099 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30100 else
30101 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30103 if (data->function_descriptor)
30104 fputs ("\t.globl .", data->file);
30105 else
30106 fputs ("\t.globl ", data->file);
30107 RS6000_OUTPUT_BASENAME (data->file, buffer);
30108 putc ('\n', data->file);
30110 #ifdef ASM_WEAKEN_DECL
30111 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
30112 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
30113 #endif
30115 else
30117 if (dollar_inside)
30119 if (data->function_descriptor)
30120 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30121 else
30122 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30124 if (data->function_descriptor)
30125 fputs ("\t.lglobl .", data->file);
30126 else
30127 fputs ("\t.lglobl ", data->file);
30128 RS6000_OUTPUT_BASENAME (data->file, buffer);
30129 putc ('\n', data->file);
30131 if (data->function_descriptor)
30132 fputs (".", data->file);
30133 RS6000_OUTPUT_BASENAME (data->file, buffer);
30134 fputs (":\n", data->file);
30135 return false;
30138 /* This macro produces the initial definition of a function name.
30139 On the RS/6000, we need to place an extra '.' in the function name and
30140 output the function descriptor.
30141 Dollar signs are converted to underscores.
30143 The csect for the function will have already been created when
30144 text_section was selected. We do have to go back to that csect, however.
30146 The third and fourth parameters to the .function pseudo-op (16 and 044)
30147 are placeholders which no longer have any use.
30149 Because AIX assembler's .set command has unexpected semantics, we output
30150 all aliases as alternative labels in front of the definition. */
30152 void
30153 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30155 char *buffer = (char *) alloca (strlen (name) + 1);
30156 char *p;
30157 int dollar_inside = 0;
30158 struct declare_alias_data data = {file, false};
30160 strcpy (buffer, name);
30161 p = strchr (buffer, '$');
30162 while (p) {
30163 *p = '_';
30164 dollar_inside++;
30165 p = strchr (p + 1, '$');
30167 if (TREE_PUBLIC (decl))
30169 if (!RS6000_WEAK || !DECL_WEAK (decl))
30171 if (dollar_inside) {
30172 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30173 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30175 fputs ("\t.globl .", file);
30176 RS6000_OUTPUT_BASENAME (file, buffer);
30177 putc ('\n', file);
30180 else
30182 if (dollar_inside) {
30183 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30184 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30186 fputs ("\t.lglobl .", file);
30187 RS6000_OUTPUT_BASENAME (file, buffer);
30188 putc ('\n', file);
30190 fputs ("\t.csect ", file);
30191 RS6000_OUTPUT_BASENAME (file, buffer);
30192 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30193 RS6000_OUTPUT_BASENAME (file, buffer);
30194 fputs (":\n", file);
30195 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30196 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30197 RS6000_OUTPUT_BASENAME (file, buffer);
30198 fputs (", TOC[tc0], 0\n", file);
30199 in_section = NULL;
30200 switch_to_section (function_section (decl));
30201 putc ('.', file);
30202 RS6000_OUTPUT_BASENAME (file, buffer);
30203 fputs (":\n", file);
30204 data.function_descriptor = true;
30205 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30206 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30207 xcoffout_declare_function (file, decl, buffer);
30208 return;
30211 /* This macro produces the initial definition of a object (variable) name.
30212 Because AIX assembler's .set command has unexpected semantics, we output
30213 all aliases as alternative labels in front of the definition. */
30215 void
30216 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30218 struct declare_alias_data data = {file, false};
30219 RS6000_OUTPUT_BASENAME (file, name);
30220 fputs (":\n", file);
30221 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30224 #ifdef HAVE_AS_TLS
30225 static void
30226 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30228 rtx symbol;
30229 int flags;
30231 default_encode_section_info (decl, rtl, first);
30233 /* Careful not to prod global register variables. */
30234 if (!MEM_P (rtl))
30235 return;
30236 symbol = XEXP (rtl, 0);
30237 if (GET_CODE (symbol) != SYMBOL_REF)
30238 return;
30240 flags = SYMBOL_REF_FLAGS (symbol);
30242 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30243 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30245 SYMBOL_REF_FLAGS (symbol) = flags;
30247 #endif /* HAVE_AS_TLS */
30248 #endif /* TARGET_XCOFF */
30250 /* Compute a (partial) cost for rtx X. Return true if the complete
30251 cost has been computed, and false if subexpressions should be
30252 scanned. In either case, *TOTAL contains the cost result. */
30254 static bool
30255 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30256 int *total, bool speed)
30258 machine_mode mode = GET_MODE (x);
30260 switch (code)
30262 /* On the RS/6000, if it is valid in the insn, it is free. */
30263 case CONST_INT:
30264 if (((outer_code == SET
30265 || outer_code == PLUS
30266 || outer_code == MINUS)
30267 && (satisfies_constraint_I (x)
30268 || satisfies_constraint_L (x)))
30269 || (outer_code == AND
30270 && (satisfies_constraint_K (x)
30271 || (mode == SImode
30272 ? satisfies_constraint_L (x)
30273 : satisfies_constraint_J (x))
30274 || mask_operand (x, mode)
30275 || (mode == DImode
30276 && mask64_operand (x, DImode))))
30277 || ((outer_code == IOR || outer_code == XOR)
30278 && (satisfies_constraint_K (x)
30279 || (mode == SImode
30280 ? satisfies_constraint_L (x)
30281 : satisfies_constraint_J (x))))
30282 || outer_code == ASHIFT
30283 || outer_code == ASHIFTRT
30284 || outer_code == LSHIFTRT
30285 || outer_code == ROTATE
30286 || outer_code == ROTATERT
30287 || outer_code == ZERO_EXTRACT
30288 || (outer_code == MULT
30289 && satisfies_constraint_I (x))
30290 || ((outer_code == DIV || outer_code == UDIV
30291 || outer_code == MOD || outer_code == UMOD)
30292 && exact_log2 (INTVAL (x)) >= 0)
30293 || (outer_code == COMPARE
30294 && (satisfies_constraint_I (x)
30295 || satisfies_constraint_K (x)))
30296 || ((outer_code == EQ || outer_code == NE)
30297 && (satisfies_constraint_I (x)
30298 || satisfies_constraint_K (x)
30299 || (mode == SImode
30300 ? satisfies_constraint_L (x)
30301 : satisfies_constraint_J (x))))
30302 || (outer_code == GTU
30303 && satisfies_constraint_I (x))
30304 || (outer_code == LTU
30305 && satisfies_constraint_P (x)))
30307 *total = 0;
30308 return true;
30310 else if ((outer_code == PLUS
30311 && reg_or_add_cint_operand (x, VOIDmode))
30312 || (outer_code == MINUS
30313 && reg_or_sub_cint_operand (x, VOIDmode))
30314 || ((outer_code == SET
30315 || outer_code == IOR
30316 || outer_code == XOR)
30317 && (INTVAL (x)
30318 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30320 *total = COSTS_N_INSNS (1);
30321 return true;
30323 /* FALLTHRU */
30325 case CONST_DOUBLE:
30326 case CONST_WIDE_INT:
30327 case CONST:
30328 case HIGH:
30329 case SYMBOL_REF:
30330 case MEM:
30331 /* When optimizing for size, MEM should be slightly more expensive
30332 than generating address, e.g., (plus (reg) (const)).
30333 L1 cache latency is about two instructions. */
30334 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30335 return true;
30337 case LABEL_REF:
30338 *total = 0;
30339 return true;
30341 case PLUS:
30342 case MINUS:
30343 if (FLOAT_MODE_P (mode))
30344 *total = rs6000_cost->fp;
30345 else
30346 *total = COSTS_N_INSNS (1);
30347 return false;
30349 case MULT:
30350 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30351 && satisfies_constraint_I (XEXP (x, 1)))
30353 if (INTVAL (XEXP (x, 1)) >= -256
30354 && INTVAL (XEXP (x, 1)) <= 255)
30355 *total = rs6000_cost->mulsi_const9;
30356 else
30357 *total = rs6000_cost->mulsi_const;
30359 else if (mode == SFmode)
30360 *total = rs6000_cost->fp;
30361 else if (FLOAT_MODE_P (mode))
30362 *total = rs6000_cost->dmul;
30363 else if (mode == DImode)
30364 *total = rs6000_cost->muldi;
30365 else
30366 *total = rs6000_cost->mulsi;
30367 return false;
30369 case FMA:
30370 if (mode == SFmode)
30371 *total = rs6000_cost->fp;
30372 else
30373 *total = rs6000_cost->dmul;
30374 break;
30376 case DIV:
30377 case MOD:
30378 if (FLOAT_MODE_P (mode))
30380 *total = mode == DFmode ? rs6000_cost->ddiv
30381 : rs6000_cost->sdiv;
30382 return false;
30384 /* FALLTHRU */
30386 case UDIV:
30387 case UMOD:
30388 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30389 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30391 if (code == DIV || code == MOD)
30392 /* Shift, addze */
30393 *total = COSTS_N_INSNS (2);
30394 else
30395 /* Shift */
30396 *total = COSTS_N_INSNS (1);
30398 else
30400 if (GET_MODE (XEXP (x, 1)) == DImode)
30401 *total = rs6000_cost->divdi;
30402 else
30403 *total = rs6000_cost->divsi;
30405 /* Add in shift and subtract for MOD. */
30406 if (code == MOD || code == UMOD)
30407 *total += COSTS_N_INSNS (2);
30408 return false;
30410 case CTZ:
30411 case FFS:
30412 *total = COSTS_N_INSNS (4);
30413 return false;
30415 case POPCOUNT:
30416 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30417 return false;
30419 case PARITY:
30420 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30421 return false;
30423 case NOT:
30424 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30426 *total = 0;
30427 return false;
30429 /* FALLTHRU */
30431 case AND:
30432 case CLZ:
30433 case IOR:
30434 case XOR:
30435 case ZERO_EXTRACT:
30436 *total = COSTS_N_INSNS (1);
30437 return false;
30439 case ASHIFT:
30440 case ASHIFTRT:
30441 case LSHIFTRT:
30442 case ROTATE:
30443 case ROTATERT:
30444 /* Handle mul_highpart. */
30445 if (outer_code == TRUNCATE
30446 && GET_CODE (XEXP (x, 0)) == MULT)
30448 if (mode == DImode)
30449 *total = rs6000_cost->muldi;
30450 else
30451 *total = rs6000_cost->mulsi;
30452 return true;
30454 else if (outer_code == AND)
30455 *total = 0;
30456 else
30457 *total = COSTS_N_INSNS (1);
30458 return false;
30460 case SIGN_EXTEND:
30461 case ZERO_EXTEND:
30462 if (GET_CODE (XEXP (x, 0)) == MEM)
30463 *total = 0;
30464 else
30465 *total = COSTS_N_INSNS (1);
30466 return false;
30468 case COMPARE:
30469 case NEG:
30470 case ABS:
30471 if (!FLOAT_MODE_P (mode))
30473 *total = COSTS_N_INSNS (1);
30474 return false;
30476 /* FALLTHRU */
30478 case FLOAT:
30479 case UNSIGNED_FLOAT:
30480 case FIX:
30481 case UNSIGNED_FIX:
30482 case FLOAT_TRUNCATE:
30483 *total = rs6000_cost->fp;
30484 return false;
30486 case FLOAT_EXTEND:
30487 if (mode == DFmode)
30488 *total = 0;
30489 else
30490 *total = rs6000_cost->fp;
30491 return false;
30493 case UNSPEC:
30494 switch (XINT (x, 1))
30496 case UNSPEC_FRSP:
30497 *total = rs6000_cost->fp;
30498 return true;
30500 default:
30501 break;
30503 break;
30505 case CALL:
30506 case IF_THEN_ELSE:
30507 if (!speed)
30509 *total = COSTS_N_INSNS (1);
30510 return true;
30512 else if (FLOAT_MODE_P (mode)
30513 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30515 *total = rs6000_cost->fp;
30516 return false;
30518 break;
30520 case NE:
30521 case EQ:
30522 case GTU:
30523 case LTU:
30524 /* Carry bit requires mode == Pmode.
30525 NEG or PLUS already counted so only add one. */
30526 if (mode == Pmode
30527 && (outer_code == NEG || outer_code == PLUS))
30529 *total = COSTS_N_INSNS (1);
30530 return true;
30532 if (outer_code == SET)
30534 if (XEXP (x, 1) == const0_rtx)
30536 if (TARGET_ISEL && !TARGET_MFCRF)
30537 *total = COSTS_N_INSNS (8);
30538 else
30539 *total = COSTS_N_INSNS (2);
30540 return true;
30542 else if (mode == Pmode)
30544 *total = COSTS_N_INSNS (3);
30545 return false;
30548 /* FALLTHRU */
30550 case GT:
30551 case LT:
30552 case UNORDERED:
30553 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30555 if (TARGET_ISEL && !TARGET_MFCRF)
30556 *total = COSTS_N_INSNS (8);
30557 else
30558 *total = COSTS_N_INSNS (2);
30559 return true;
30561 /* CC COMPARE. */
30562 if (outer_code == COMPARE)
30564 *total = 0;
30565 return true;
30567 break;
30569 default:
30570 break;
30573 return false;
30576 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30578 static bool
30579 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30580 bool speed)
30582 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30584 fprintf (stderr,
30585 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30586 "opno = %d, total = %d, speed = %s, x:\n",
30587 ret ? "complete" : "scan inner",
30588 GET_RTX_NAME (code),
30589 GET_RTX_NAME (outer_code),
30590 opno,
30591 *total,
30592 speed ? "true" : "false");
30594 debug_rtx (x);
30596 return ret;
30599 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30601 static int
30602 rs6000_debug_address_cost (rtx x, machine_mode mode,
30603 addr_space_t as, bool speed)
30605 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30607 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30608 ret, speed ? "true" : "false");
30609 debug_rtx (x);
30611 return ret;
30615 /* A C expression returning the cost of moving data from a register of class
30616 CLASS1 to one of CLASS2. */
30618 static int
30619 rs6000_register_move_cost (machine_mode mode,
30620 reg_class_t from, reg_class_t to)
30622 int ret;
30624 if (TARGET_DEBUG_COST)
30625 dbg_cost_ctrl++;
30627 /* Moves from/to GENERAL_REGS. */
30628 if (reg_classes_intersect_p (to, GENERAL_REGS)
30629 || reg_classes_intersect_p (from, GENERAL_REGS))
30631 reg_class_t rclass = from;
30633 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30634 rclass = to;
30636 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30637 ret = (rs6000_memory_move_cost (mode, rclass, false)
30638 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30640 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30641 shift. */
30642 else if (rclass == CR_REGS)
30643 ret = 4;
30645 /* For those processors that have slow LR/CTR moves, make them more
30646 expensive than memory in order to bias spills to memory .*/
30647 else if ((rs6000_cpu == PROCESSOR_POWER6
30648 || rs6000_cpu == PROCESSOR_POWER7
30649 || rs6000_cpu == PROCESSOR_POWER8)
30650 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30651 ret = 6 * hard_regno_nregs[0][mode];
30653 else
30654 /* A move will cost one instruction per GPR moved. */
30655 ret = 2 * hard_regno_nregs[0][mode];
30658 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30659 else if (VECTOR_MEM_VSX_P (mode)
30660 && reg_classes_intersect_p (to, VSX_REGS)
30661 && reg_classes_intersect_p (from, VSX_REGS))
30662 ret = 2 * hard_regno_nregs[32][mode];
30664 /* Moving between two similar registers is just one instruction. */
30665 else if (reg_classes_intersect_p (to, from))
30666 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30668 /* Everything else has to go through GENERAL_REGS. */
30669 else
30670 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30671 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30673 if (TARGET_DEBUG_COST)
30675 if (dbg_cost_ctrl == 1)
30676 fprintf (stderr,
30677 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30678 ret, GET_MODE_NAME (mode), reg_class_names[from],
30679 reg_class_names[to]);
30680 dbg_cost_ctrl--;
30683 return ret;
30686 /* A C expressions returning the cost of moving data of MODE from a register to
30687 or from memory. */
30689 static int
30690 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30691 bool in ATTRIBUTE_UNUSED)
30693 int ret;
30695 if (TARGET_DEBUG_COST)
30696 dbg_cost_ctrl++;
30698 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30699 ret = 4 * hard_regno_nregs[0][mode];
30700 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30701 || reg_classes_intersect_p (rclass, VSX_REGS)))
30702 ret = 4 * hard_regno_nregs[32][mode];
30703 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30704 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30705 else
30706 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30708 if (TARGET_DEBUG_COST)
30710 if (dbg_cost_ctrl == 1)
30711 fprintf (stderr,
30712 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30713 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30714 dbg_cost_ctrl--;
30717 return ret;
30720 /* Returns a code for a target-specific builtin that implements
30721 reciprocal of the function, or NULL_TREE if not available. */
30723 static tree
30724 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30725 bool sqrt ATTRIBUTE_UNUSED)
30727 if (optimize_insn_for_size_p ())
30728 return NULL_TREE;
30730 if (md_fn)
30731 switch (fn)
30733 case VSX_BUILTIN_XVSQRTDP:
30734 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30735 return NULL_TREE;
30737 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30739 case VSX_BUILTIN_XVSQRTSP:
30740 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30741 return NULL_TREE;
30743 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30745 default:
30746 return NULL_TREE;
30749 else
30750 switch (fn)
30752 case BUILT_IN_SQRT:
30753 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30754 return NULL_TREE;
30756 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30758 case BUILT_IN_SQRTF:
30759 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30760 return NULL_TREE;
30762 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30764 default:
30765 return NULL_TREE;
30769 /* Load up a constant. If the mode is a vector mode, splat the value across
30770 all of the vector elements. */
30772 static rtx
30773 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30775 rtx reg;
30777 if (mode == SFmode || mode == DFmode)
30779 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30780 reg = force_reg (mode, d);
30782 else if (mode == V4SFmode)
30784 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30785 rtvec v = gen_rtvec (4, d, d, d, d);
30786 reg = gen_reg_rtx (mode);
30787 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30789 else if (mode == V2DFmode)
30791 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30792 rtvec v = gen_rtvec (2, d, d);
30793 reg = gen_reg_rtx (mode);
30794 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30796 else
30797 gcc_unreachable ();
30799 return reg;
30802 /* Generate an FMA instruction. */
30804 static void
30805 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30807 machine_mode mode = GET_MODE (target);
30808 rtx dst;
30810 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30811 gcc_assert (dst != NULL);
30813 if (dst != target)
30814 emit_move_insn (target, dst);
30817 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30819 static void
30820 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30822 machine_mode mode = GET_MODE (target);
30823 rtx dst;
30825 /* Altivec does not support fms directly;
30826 generate in terms of fma in that case. */
30827 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30828 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30829 else
30831 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30832 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30834 gcc_assert (dst != NULL);
30836 if (dst != target)
30837 emit_move_insn (target, dst);
30840 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30842 static void
30843 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30845 machine_mode mode = GET_MODE (dst);
30846 rtx r;
30848 /* This is a tad more complicated, since the fnma_optab is for
30849 a different expression: fma(-m1, m2, a), which is the same
30850 thing except in the case of signed zeros.
30852 Fortunately we know that if FMA is supported that FNMSUB is
30853 also supported in the ISA. Just expand it directly. */
30855 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30857 r = gen_rtx_NEG (mode, a);
30858 r = gen_rtx_FMA (mode, m1, m2, r);
30859 r = gen_rtx_NEG (mode, r);
30860 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30863 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30864 add a reg_note saying that this was a division. Support both scalar and
30865 vector divide. Assumes no trapping math and finite arguments. */
30867 void
30868 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30870 machine_mode mode = GET_MODE (dst);
30871 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30872 int i;
30874 /* Low precision estimates guarantee 5 bits of accuracy. High
30875 precision estimates guarantee 14 bits of accuracy. SFmode
30876 requires 23 bits of accuracy. DFmode requires 52 bits of
30877 accuracy. Each pass at least doubles the accuracy, leading
30878 to the following. */
30879 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30880 if (mode == DFmode || mode == V2DFmode)
30881 passes++;
30883 enum insn_code code = optab_handler (smul_optab, mode);
30884 insn_gen_fn gen_mul = GEN_FCN (code);
30886 gcc_assert (code != CODE_FOR_nothing);
30888 one = rs6000_load_constant_and_splat (mode, dconst1);
30890 /* x0 = 1./d estimate */
30891 x0 = gen_reg_rtx (mode);
30892 emit_insn (gen_rtx_SET (VOIDmode, x0,
30893 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30894 UNSPEC_FRES)));
30896 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30897 if (passes > 1) {
30899 /* e0 = 1. - d * x0 */
30900 e0 = gen_reg_rtx (mode);
30901 rs6000_emit_nmsub (e0, d, x0, one);
30903 /* x1 = x0 + e0 * x0 */
30904 x1 = gen_reg_rtx (mode);
30905 rs6000_emit_madd (x1, e0, x0, x0);
30907 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30908 ++i, xprev = xnext, eprev = enext) {
30910 /* enext = eprev * eprev */
30911 enext = gen_reg_rtx (mode);
30912 emit_insn (gen_mul (enext, eprev, eprev));
30914 /* xnext = xprev + enext * xprev */
30915 xnext = gen_reg_rtx (mode);
30916 rs6000_emit_madd (xnext, enext, xprev, xprev);
30919 } else
30920 xprev = x0;
30922 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30924 /* u = n * xprev */
30925 u = gen_reg_rtx (mode);
30926 emit_insn (gen_mul (u, n, xprev));
30928 /* v = n - (d * u) */
30929 v = gen_reg_rtx (mode);
30930 rs6000_emit_nmsub (v, d, u, n);
30932 /* dst = (v * xprev) + u */
30933 rs6000_emit_madd (dst, v, xprev, u);
30935 if (note_p)
30936 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30939 /* Newton-Raphson approximation of single/double-precision floating point
30940 rsqrt. Assumes no trapping math and finite arguments. */
30942 void
30943 rs6000_emit_swrsqrt (rtx dst, rtx src)
30945 machine_mode mode = GET_MODE (src);
30946 rtx x0 = gen_reg_rtx (mode);
30947 rtx y = gen_reg_rtx (mode);
30949 /* Low precision estimates guarantee 5 bits of accuracy. High
30950 precision estimates guarantee 14 bits of accuracy. SFmode
30951 requires 23 bits of accuracy. DFmode requires 52 bits of
30952 accuracy. Each pass at least doubles the accuracy, leading
30953 to the following. */
30954 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30955 if (mode == DFmode || mode == V2DFmode)
30956 passes++;
30958 REAL_VALUE_TYPE dconst3_2;
30959 int i;
30960 rtx halfthree;
30961 enum insn_code code = optab_handler (smul_optab, mode);
30962 insn_gen_fn gen_mul = GEN_FCN (code);
30964 gcc_assert (code != CODE_FOR_nothing);
30966 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30967 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30968 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30970 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30972 /* x0 = rsqrt estimate */
30973 emit_insn (gen_rtx_SET (VOIDmode, x0,
30974 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30975 UNSPEC_RSQRT)));
30977 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30978 rs6000_emit_msub (y, src, halfthree, src);
30980 for (i = 0; i < passes; i++)
30982 rtx x1 = gen_reg_rtx (mode);
30983 rtx u = gen_reg_rtx (mode);
30984 rtx v = gen_reg_rtx (mode);
30986 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30987 emit_insn (gen_mul (u, x0, x0));
30988 rs6000_emit_nmsub (v, y, u, halfthree);
30989 emit_insn (gen_mul (x1, x0, v));
30990 x0 = x1;
30993 emit_move_insn (dst, x0);
30994 return;
30997 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30998 (Power7) targets. DST is the target, and SRC is the argument operand. */
31000 void
31001 rs6000_emit_popcount (rtx dst, rtx src)
31003 machine_mode mode = GET_MODE (dst);
31004 rtx tmp1, tmp2;
31006 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
31007 if (TARGET_POPCNTD)
31009 if (mode == SImode)
31010 emit_insn (gen_popcntdsi2 (dst, src));
31011 else
31012 emit_insn (gen_popcntddi2 (dst, src));
31013 return;
31016 tmp1 = gen_reg_rtx (mode);
31018 if (mode == SImode)
31020 emit_insn (gen_popcntbsi2 (tmp1, src));
31021 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
31022 NULL_RTX, 0);
31023 tmp2 = force_reg (SImode, tmp2);
31024 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
31026 else
31028 emit_insn (gen_popcntbdi2 (tmp1, src));
31029 tmp2 = expand_mult (DImode, tmp1,
31030 GEN_INT ((HOST_WIDE_INT)
31031 0x01010101 << 32 | 0x01010101),
31032 NULL_RTX, 0);
31033 tmp2 = force_reg (DImode, tmp2);
31034 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
31039 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
31040 target, and SRC is the argument operand. */
31042 void
31043 rs6000_emit_parity (rtx dst, rtx src)
31045 machine_mode mode = GET_MODE (dst);
31046 rtx tmp;
31048 tmp = gen_reg_rtx (mode);
31050 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
31051 if (TARGET_CMPB)
31053 if (mode == SImode)
31055 emit_insn (gen_popcntbsi2 (tmp, src));
31056 emit_insn (gen_paritysi2_cmpb (dst, tmp));
31058 else
31060 emit_insn (gen_popcntbdi2 (tmp, src));
31061 emit_insn (gen_paritydi2_cmpb (dst, tmp));
31063 return;
31066 if (mode == SImode)
31068 /* Is mult+shift >= shift+xor+shift+xor? */
31069 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
31071 rtx tmp1, tmp2, tmp3, tmp4;
31073 tmp1 = gen_reg_rtx (SImode);
31074 emit_insn (gen_popcntbsi2 (tmp1, src));
31076 tmp2 = gen_reg_rtx (SImode);
31077 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
31078 tmp3 = gen_reg_rtx (SImode);
31079 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
31081 tmp4 = gen_reg_rtx (SImode);
31082 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
31083 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
31085 else
31086 rs6000_emit_popcount (tmp, src);
31087 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
31089 else
31091 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
31092 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
31094 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
31096 tmp1 = gen_reg_rtx (DImode);
31097 emit_insn (gen_popcntbdi2 (tmp1, src));
31099 tmp2 = gen_reg_rtx (DImode);
31100 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
31101 tmp3 = gen_reg_rtx (DImode);
31102 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
31104 tmp4 = gen_reg_rtx (DImode);
31105 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
31106 tmp5 = gen_reg_rtx (DImode);
31107 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
31109 tmp6 = gen_reg_rtx (DImode);
31110 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
31111 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
31113 else
31114 rs6000_emit_popcount (tmp, src);
31115 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
31119 /* Expand an Altivec constant permutation for little endian mode.
31120 There are two issues: First, the two input operands must be
31121 swapped so that together they form a double-wide array in LE
31122 order. Second, the vperm instruction has surprising behavior
31123 in LE mode: it interprets the elements of the source vectors
31124 in BE mode ("left to right") and interprets the elements of
31125 the destination vector in LE mode ("right to left"). To
31126 correct for this, we must subtract each element of the permute
31127 control vector from 31.
31129 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
31130 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
31131 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
31132 serve as the permute control vector. Then, in BE mode,
31134 vperm 9,10,11,12
31136 places the desired result in vr9. However, in LE mode the
31137 vector contents will be
31139 vr10 = 00000003 00000002 00000001 00000000
31140 vr11 = 00000007 00000006 00000005 00000004
31142 The result of the vperm using the same permute control vector is
31144 vr9 = 05000000 07000000 01000000 03000000
31146 That is, the leftmost 4 bytes of vr10 are interpreted as the
31147 source for the rightmost 4 bytes of vr9, and so on.
31149 If we change the permute control vector to
31151 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31153 and issue
31155 vperm 9,11,10,12
31157 we get the desired
31159 vr9 = 00000006 00000004 00000002 00000000. */
31161 void
31162 altivec_expand_vec_perm_const_le (rtx operands[4])
31164 unsigned int i;
31165 rtx perm[16];
31166 rtx constv, unspec;
31167 rtx target = operands[0];
31168 rtx op0 = operands[1];
31169 rtx op1 = operands[2];
31170 rtx sel = operands[3];
31172 /* Unpack and adjust the constant selector. */
31173 for (i = 0; i < 16; ++i)
31175 rtx e = XVECEXP (sel, 0, i);
31176 unsigned int elt = 31 - (INTVAL (e) & 31);
31177 perm[i] = GEN_INT (elt);
31180 /* Expand to a permute, swapping the inputs and using the
31181 adjusted selector. */
31182 if (!REG_P (op0))
31183 op0 = force_reg (V16QImode, op0);
31184 if (!REG_P (op1))
31185 op1 = force_reg (V16QImode, op1);
31187 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31188 constv = force_reg (V16QImode, constv);
31189 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31190 UNSPEC_VPERM);
31191 if (!REG_P (target))
31193 rtx tmp = gen_reg_rtx (V16QImode);
31194 emit_move_insn (tmp, unspec);
31195 unspec = tmp;
31198 emit_move_insn (target, unspec);
31201 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31202 permute control vector. But here it's not a constant, so we must
31203 generate a vector NAND or NOR to do the adjustment. */
31205 void
31206 altivec_expand_vec_perm_le (rtx operands[4])
31208 rtx notx, iorx, unspec;
31209 rtx target = operands[0];
31210 rtx op0 = operands[1];
31211 rtx op1 = operands[2];
31212 rtx sel = operands[3];
31213 rtx tmp = target;
31214 rtx norreg = gen_reg_rtx (V16QImode);
31215 machine_mode mode = GET_MODE (target);
31217 /* Get everything in regs so the pattern matches. */
31218 if (!REG_P (op0))
31219 op0 = force_reg (mode, op0);
31220 if (!REG_P (op1))
31221 op1 = force_reg (mode, op1);
31222 if (!REG_P (sel))
31223 sel = force_reg (V16QImode, sel);
31224 if (!REG_P (target))
31225 tmp = gen_reg_rtx (mode);
31227 /* Invert the selector with a VNAND if available, else a VNOR.
31228 The VNAND is preferred for future fusion opportunities. */
31229 notx = gen_rtx_NOT (V16QImode, sel);
31230 iorx = (TARGET_P8_VECTOR
31231 ? gen_rtx_IOR (V16QImode, notx, notx)
31232 : gen_rtx_AND (V16QImode, notx, notx));
31233 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31235 /* Permute with operands reversed and adjusted selector. */
31236 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31237 UNSPEC_VPERM);
31239 /* Copy into target, possibly by way of a register. */
31240 if (!REG_P (target))
31242 emit_move_insn (tmp, unspec);
31243 unspec = tmp;
31246 emit_move_insn (target, unspec);
31249 /* Expand an Altivec constant permutation. Return true if we match
31250 an efficient implementation; false to fall back to VPERM. */
31252 bool
31253 altivec_expand_vec_perm_const (rtx operands[4])
31255 struct altivec_perm_insn {
31256 HOST_WIDE_INT mask;
31257 enum insn_code impl;
31258 unsigned char perm[16];
31260 static const struct altivec_perm_insn patterns[] = {
31261 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31262 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31263 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31264 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31265 { OPTION_MASK_ALTIVEC,
31266 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31267 : CODE_FOR_altivec_vmrglb_direct),
31268 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31269 { OPTION_MASK_ALTIVEC,
31270 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31271 : CODE_FOR_altivec_vmrglh_direct),
31272 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31273 { OPTION_MASK_ALTIVEC,
31274 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31275 : CODE_FOR_altivec_vmrglw_direct),
31276 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31277 { OPTION_MASK_ALTIVEC,
31278 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31279 : CODE_FOR_altivec_vmrghb_direct),
31280 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31281 { OPTION_MASK_ALTIVEC,
31282 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31283 : CODE_FOR_altivec_vmrghh_direct),
31284 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31285 { OPTION_MASK_ALTIVEC,
31286 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31287 : CODE_FOR_altivec_vmrghw_direct),
31288 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31289 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31290 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31291 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31292 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31295 unsigned int i, j, elt, which;
31296 unsigned char perm[16];
31297 rtx target, op0, op1, sel, x;
31298 bool one_vec;
31300 target = operands[0];
31301 op0 = operands[1];
31302 op1 = operands[2];
31303 sel = operands[3];
31305 /* Unpack the constant selector. */
31306 for (i = which = 0; i < 16; ++i)
31308 rtx e = XVECEXP (sel, 0, i);
31309 elt = INTVAL (e) & 31;
31310 which |= (elt < 16 ? 1 : 2);
31311 perm[i] = elt;
31314 /* Simplify the constant selector based on operands. */
31315 switch (which)
31317 default:
31318 gcc_unreachable ();
31320 case 3:
31321 one_vec = false;
31322 if (!rtx_equal_p (op0, op1))
31323 break;
31324 /* FALLTHRU */
31326 case 2:
31327 for (i = 0; i < 16; ++i)
31328 perm[i] &= 15;
31329 op0 = op1;
31330 one_vec = true;
31331 break;
31333 case 1:
31334 op1 = op0;
31335 one_vec = true;
31336 break;
31339 /* Look for splat patterns. */
31340 if (one_vec)
31342 elt = perm[0];
31344 for (i = 0; i < 16; ++i)
31345 if (perm[i] != elt)
31346 break;
31347 if (i == 16)
31349 if (!BYTES_BIG_ENDIAN)
31350 elt = 15 - elt;
31351 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31352 return true;
31355 if (elt % 2 == 0)
31357 for (i = 0; i < 16; i += 2)
31358 if (perm[i] != elt || perm[i + 1] != elt + 1)
31359 break;
31360 if (i == 16)
31362 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31363 x = gen_reg_rtx (V8HImode);
31364 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31365 GEN_INT (field)));
31366 emit_move_insn (target, gen_lowpart (V16QImode, x));
31367 return true;
31371 if (elt % 4 == 0)
31373 for (i = 0; i < 16; i += 4)
31374 if (perm[i] != elt
31375 || perm[i + 1] != elt + 1
31376 || perm[i + 2] != elt + 2
31377 || perm[i + 3] != elt + 3)
31378 break;
31379 if (i == 16)
31381 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31382 x = gen_reg_rtx (V4SImode);
31383 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31384 GEN_INT (field)));
31385 emit_move_insn (target, gen_lowpart (V16QImode, x));
31386 return true;
31391 /* Look for merge and pack patterns. */
31392 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31394 bool swapped;
31396 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31397 continue;
31399 elt = patterns[j].perm[0];
31400 if (perm[0] == elt)
31401 swapped = false;
31402 else if (perm[0] == elt + 16)
31403 swapped = true;
31404 else
31405 continue;
31406 for (i = 1; i < 16; ++i)
31408 elt = patterns[j].perm[i];
31409 if (swapped)
31410 elt = (elt >= 16 ? elt - 16 : elt + 16);
31411 else if (one_vec && elt >= 16)
31412 elt -= 16;
31413 if (perm[i] != elt)
31414 break;
31416 if (i == 16)
31418 enum insn_code icode = patterns[j].impl;
31419 machine_mode omode = insn_data[icode].operand[0].mode;
31420 machine_mode imode = insn_data[icode].operand[1].mode;
31422 /* For little-endian, don't use vpkuwum and vpkuhum if the
31423 underlying vector type is not V4SI and V8HI, respectively.
31424 For example, using vpkuwum with a V8HI picks up the even
31425 halfwords (BE numbering) when the even halfwords (LE
31426 numbering) are what we need. */
31427 if (!BYTES_BIG_ENDIAN
31428 && icode == CODE_FOR_altivec_vpkuwum_direct
31429 && ((GET_CODE (op0) == REG
31430 && GET_MODE (op0) != V4SImode)
31431 || (GET_CODE (op0) == SUBREG
31432 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31433 continue;
31434 if (!BYTES_BIG_ENDIAN
31435 && icode == CODE_FOR_altivec_vpkuhum_direct
31436 && ((GET_CODE (op0) == REG
31437 && GET_MODE (op0) != V8HImode)
31438 || (GET_CODE (op0) == SUBREG
31439 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31440 continue;
31442 /* For little-endian, the two input operands must be swapped
31443 (or swapped back) to ensure proper right-to-left numbering
31444 from 0 to 2N-1. */
31445 if (swapped ^ !BYTES_BIG_ENDIAN)
31446 std::swap (op0, op1);
31447 if (imode != V16QImode)
31449 op0 = gen_lowpart (imode, op0);
31450 op1 = gen_lowpart (imode, op1);
31452 if (omode == V16QImode)
31453 x = target;
31454 else
31455 x = gen_reg_rtx (omode);
31456 emit_insn (GEN_FCN (icode) (x, op0, op1));
31457 if (omode != V16QImode)
31458 emit_move_insn (target, gen_lowpart (V16QImode, x));
31459 return true;
31463 if (!BYTES_BIG_ENDIAN)
31465 altivec_expand_vec_perm_const_le (operands);
31466 return true;
31469 return false;
31472 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31473 Return true if we match an efficient implementation. */
31475 static bool
31476 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31477 unsigned char perm0, unsigned char perm1)
31479 rtx x;
31481 /* If both selectors come from the same operand, fold to single op. */
31482 if ((perm0 & 2) == (perm1 & 2))
31484 if (perm0 & 2)
31485 op0 = op1;
31486 else
31487 op1 = op0;
31489 /* If both operands are equal, fold to simpler permutation. */
31490 if (rtx_equal_p (op0, op1))
31492 perm0 = perm0 & 1;
31493 perm1 = (perm1 & 1) + 2;
31495 /* If the first selector comes from the second operand, swap. */
31496 else if (perm0 & 2)
31498 if (perm1 & 2)
31499 return false;
31500 perm0 -= 2;
31501 perm1 += 2;
31502 std::swap (op0, op1);
31504 /* If the second selector does not come from the second operand, fail. */
31505 else if ((perm1 & 2) == 0)
31506 return false;
31508 /* Success! */
31509 if (target != NULL)
31511 machine_mode vmode, dmode;
31512 rtvec v;
31514 vmode = GET_MODE (target);
31515 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31516 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31517 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31518 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31519 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31520 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31522 return true;
31525 bool
31526 rs6000_expand_vec_perm_const (rtx operands[4])
31528 rtx target, op0, op1, sel;
31529 unsigned char perm0, perm1;
31531 target = operands[0];
31532 op0 = operands[1];
31533 op1 = operands[2];
31534 sel = operands[3];
31536 /* Unpack the constant selector. */
31537 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31538 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31540 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31543 /* Test whether a constant permutation is supported. */
31545 static bool
31546 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31547 const unsigned char *sel)
31549 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31550 if (TARGET_ALTIVEC)
31551 return true;
31553 /* Check for ps_merge* or evmerge* insns. */
31554 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31555 || (TARGET_SPE && vmode == V2SImode))
31557 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31558 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31559 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31562 return false;
31565 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31567 static void
31568 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31569 machine_mode vmode, unsigned nelt, rtx perm[])
31571 machine_mode imode;
31572 rtx x;
31574 imode = vmode;
31575 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31577 imode = GET_MODE_INNER (vmode);
31578 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31579 imode = mode_for_vector (imode, nelt);
31582 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31583 x = expand_vec_perm (vmode, op0, op1, x, target);
31584 if (x != target)
31585 emit_move_insn (target, x);
31588 /* Expand an extract even operation. */
31590 void
31591 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31593 machine_mode vmode = GET_MODE (target);
31594 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31595 rtx perm[16];
31597 for (i = 0; i < nelt; i++)
31598 perm[i] = GEN_INT (i * 2);
31600 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31603 /* Expand a vector interleave operation. */
31605 void
31606 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31608 machine_mode vmode = GET_MODE (target);
31609 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31610 rtx perm[16];
31612 high = (highp ? 0 : nelt / 2);
31613 for (i = 0; i < nelt / 2; i++)
31615 perm[i * 2] = GEN_INT (i + high);
31616 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31619 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31622 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31623 void
31624 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31626 HOST_WIDE_INT hwi_scale (scale);
31627 REAL_VALUE_TYPE r_pow;
31628 rtvec v = rtvec_alloc (2);
31629 rtx elt;
31630 rtx scale_vec = gen_reg_rtx (V2DFmode);
31631 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31632 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31633 RTVEC_ELT (v, 0) = elt;
31634 RTVEC_ELT (v, 1) = elt;
31635 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31636 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31639 /* Return an RTX representing where to find the function value of a
31640 function returning MODE. */
31641 static rtx
31642 rs6000_complex_function_value (machine_mode mode)
31644 unsigned int regno;
31645 rtx r1, r2;
31646 machine_mode inner = GET_MODE_INNER (mode);
31647 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31649 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31650 regno = FP_ARG_RETURN;
31651 else
31653 regno = GP_ARG_RETURN;
31655 /* 32-bit is OK since it'll go in r3/r4. */
31656 if (TARGET_32BIT && inner_bytes >= 4)
31657 return gen_rtx_REG (mode, regno);
31660 if (inner_bytes >= 8)
31661 return gen_rtx_REG (mode, regno);
31663 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31664 const0_rtx);
31665 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31666 GEN_INT (inner_bytes));
31667 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31670 /* Return an rtx describing a return value of MODE as a PARALLEL
31671 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
31672 stride REG_STRIDE. */
31674 static rtx
31675 rs6000_parallel_return (machine_mode mode,
31676 int n_elts, machine_mode elt_mode,
31677 unsigned int regno, unsigned int reg_stride)
31679 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
31681 int i;
31682 for (i = 0; i < n_elts; i++)
31684 rtx r = gen_rtx_REG (elt_mode, regno);
31685 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31686 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31687 regno += reg_stride;
31690 return par;
31693 /* Target hook for TARGET_FUNCTION_VALUE.
31695 On the SPE, both FPs and vectors are returned in r3.
31697 On RS/6000 an integer value is in r3 and a floating-point value is in
31698 fp1, unless -msoft-float. */
31700 static rtx
31701 rs6000_function_value (const_tree valtype,
31702 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31703 bool outgoing ATTRIBUTE_UNUSED)
31705 machine_mode mode;
31706 unsigned int regno;
31707 machine_mode elt_mode;
31708 int n_elts;
31710 /* Special handling for structs in darwin64. */
31711 if (TARGET_MACHO
31712 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31714 CUMULATIVE_ARGS valcum;
31715 rtx valret;
31717 valcum.words = 0;
31718 valcum.fregno = FP_ARG_MIN_REG;
31719 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31720 /* Do a trial code generation as if this were going to be passed as
31721 an argument; if any part goes in memory, we return NULL. */
31722 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31723 if (valret)
31724 return valret;
31725 /* Otherwise fall through to standard ABI rules. */
31728 mode = TYPE_MODE (valtype);
31730 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31731 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
31733 int first_reg, n_regs;
31735 if (SCALAR_FLOAT_MODE_P (elt_mode))
31737 /* _Decimal128 must use even/odd register pairs. */
31738 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31739 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31741 else
31743 first_reg = ALTIVEC_ARG_RETURN;
31744 n_regs = 1;
31747 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
31750 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
31751 if (TARGET_32BIT && TARGET_POWERPC64)
31752 switch (mode)
31754 default:
31755 break;
31756 case DImode:
31757 case SCmode:
31758 case DCmode:
31759 case TCmode:
31760 int count = GET_MODE_SIZE (mode) / 4;
31761 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
31764 if ((INTEGRAL_TYPE_P (valtype)
31765 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
31766 || (POINTER_TYPE_P (valtype) && !upc_shared_type_p (TREE_TYPE (valtype))))
31767 mode = TARGET_32BIT ? SImode : DImode;
31769 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31770 /* _Decimal128 must use an even/odd register pair. */
31771 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31772 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31773 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31774 regno = FP_ARG_RETURN;
31775 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31776 && targetm.calls.split_complex_arg)
31777 return rs6000_complex_function_value (mode);
31778 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31779 return register is used in both cases, and we won't see V2DImode/V2DFmode
31780 for pure altivec, combine the two cases. */
31781 else if (TREE_CODE (valtype) == VECTOR_TYPE
31782 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31783 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31784 regno = ALTIVEC_ARG_RETURN;
31785 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31786 && (mode == DFmode || mode == DCmode
31787 || mode == TFmode || mode == TCmode))
31788 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31789 else
31790 regno = GP_ARG_RETURN;
31792 return gen_rtx_REG (mode, regno);
31795 /* Define how to find the value returned by a library function
31796 assuming the value has mode MODE. */
31798 rs6000_libcall_value (machine_mode mode)
31800 unsigned int regno;
31802 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31803 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31804 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
31806 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31807 /* _Decimal128 must use an even/odd register pair. */
31808 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31809 else if (SCALAR_FLOAT_MODE_P (mode)
31810 && TARGET_HARD_FLOAT && TARGET_FPRS
31811 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31812 regno = FP_ARG_RETURN;
31813 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31814 return register is used in both cases, and we won't see V2DImode/V2DFmode
31815 for pure altivec, combine the two cases. */
31816 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31817 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31818 regno = ALTIVEC_ARG_RETURN;
31819 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31820 return rs6000_complex_function_value (mode);
31821 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31822 && (mode == DFmode || mode == DCmode
31823 || mode == TFmode || mode == TCmode))
31824 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31825 else
31826 regno = GP_ARG_RETURN;
31828 return gen_rtx_REG (mode, regno);
31832 /* Return true if we use LRA instead of reload pass. */
31833 static bool
31834 rs6000_lra_p (void)
31836 return rs6000_lra_flag;
31839 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31840 Frame pointer elimination is automatically handled.
31842 For the RS/6000, if frame pointer elimination is being done, we would like
31843 to convert ap into fp, not sp.
31845 We need r30 if -mminimal-toc was specified, and there are constant pool
31846 references. */
31848 static bool
31849 rs6000_can_eliminate (const int from, const int to)
31851 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31852 ? ! frame_pointer_needed
31853 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31854 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31855 : true);
31858 /* Define the offset between two registers, FROM to be eliminated and its
31859 replacement TO, at the start of a routine. */
31860 HOST_WIDE_INT
31861 rs6000_initial_elimination_offset (int from, int to)
31863 rs6000_stack_t *info = rs6000_stack_info ();
31864 HOST_WIDE_INT offset;
31866 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31867 offset = info->push_p ? 0 : -info->total_size;
31868 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31870 offset = info->push_p ? 0 : -info->total_size;
31871 if (FRAME_GROWS_DOWNWARD)
31872 offset += info->fixed_size + info->vars_size + info->parm_size;
31874 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31875 offset = FRAME_GROWS_DOWNWARD
31876 ? info->fixed_size + info->vars_size + info->parm_size
31877 : 0;
31878 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31879 offset = info->total_size;
31880 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31881 offset = info->push_p ? info->total_size : 0;
31882 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31883 offset = 0;
31884 else
31885 gcc_unreachable ();
31887 return offset;
31890 static rtx
31891 rs6000_dwarf_register_span (rtx reg)
31893 rtx parts[8];
31894 int i, words;
31895 unsigned regno = REGNO (reg);
31896 machine_mode mode = GET_MODE (reg);
31898 if (TARGET_SPE
31899 && regno < 32
31900 && (SPE_VECTOR_MODE (GET_MODE (reg))
31901 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31902 && mode != SFmode && mode != SDmode && mode != SCmode)))
31904 else
31905 return NULL_RTX;
31907 regno = REGNO (reg);
31909 /* The duality of the SPE register size wreaks all kinds of havoc.
31910 This is a way of distinguishing r0 in 32-bits from r0 in
31911 64-bits. */
31912 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31913 gcc_assert (words <= 4);
31914 for (i = 0; i < words; i++, regno++)
31916 if (BYTES_BIG_ENDIAN)
31918 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31919 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31921 else
31923 parts[2 * i] = gen_rtx_REG (SImode, regno);
31924 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31928 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31931 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31933 static void
31934 rs6000_init_dwarf_reg_sizes_extra (tree address)
31936 if (TARGET_SPE)
31938 int i;
31939 machine_mode mode = TYPE_MODE (char_type_node);
31940 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31941 rtx mem = gen_rtx_MEM (BLKmode, addr);
31942 rtx value = gen_int_mode (4, mode);
31944 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31946 int column = DWARF_REG_TO_UNWIND_COLUMN
31947 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31948 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31950 emit_move_insn (adjust_address (mem, mode, offset), value);
31954 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31956 int i;
31957 machine_mode mode = TYPE_MODE (char_type_node);
31958 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31959 rtx mem = gen_rtx_MEM (BLKmode, addr);
31960 rtx value = gen_int_mode (16, mode);
31962 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31963 The unwinder still needs to know the size of Altivec registers. */
31965 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31967 int column = DWARF_REG_TO_UNWIND_COLUMN
31968 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31969 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31971 emit_move_insn (adjust_address (mem, mode, offset), value);
31976 /* Map internal gcc register numbers to debug format register numbers.
31977 FORMAT specifies the type of debug register number to use:
31978 0 -- debug information, except for frame-related sections
31979 1 -- DWARF .debug_frame section
31980 2 -- DWARF .eh_frame section */
31982 unsigned int
31983 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31985 /* We never use the GCC internal number for SPE high registers.
31986 Those are mapped to the 1200..1231 range for all debug formats. */
31987 if (SPE_HIGH_REGNO_P (regno))
31988 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31990 /* Except for the above, we use the internal number for non-DWARF
31991 debug information, and also for .eh_frame. */
31992 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31993 return regno;
31995 /* On some platforms, we use the standard DWARF register
31996 numbering for .debug_info and .debug_frame. */
31997 #ifdef RS6000_USE_DWARF_NUMBERING
31998 if (regno <= 63)
31999 return regno;
32000 if (regno == LR_REGNO)
32001 return 108;
32002 if (regno == CTR_REGNO)
32003 return 109;
32004 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
32005 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
32006 The actual code emitted saves the whole of CR, so we map CR2_REGNO
32007 to the DWARF reg for CR. */
32008 if (format == 1 && regno == CR2_REGNO)
32009 return 64;
32010 if (CR_REGNO_P (regno))
32011 return regno - CR0_REGNO + 86;
32012 if (regno == CA_REGNO)
32013 return 101; /* XER */
32014 if (ALTIVEC_REGNO_P (regno))
32015 return regno - FIRST_ALTIVEC_REGNO + 1124;
32016 if (regno == VRSAVE_REGNO)
32017 return 356;
32018 if (regno == VSCR_REGNO)
32019 return 67;
32020 if (regno == SPE_ACC_REGNO)
32021 return 99;
32022 if (regno == SPEFSCR_REGNO)
32023 return 612;
32024 #endif
32025 return regno;
32028 /* target hook eh_return_filter_mode */
32029 static machine_mode
32030 rs6000_eh_return_filter_mode (void)
32032 return TARGET_32BIT ? SImode : word_mode;
32035 /* Target hook for scalar_mode_supported_p. */
32036 static bool
32037 rs6000_scalar_mode_supported_p (machine_mode mode)
32039 /* -m32 does not support TImode. This is the default, from
32040 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
32041 same ABI as for -m32. But default_scalar_mode_supported_p allows
32042 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
32043 for -mpowerpc64. */
32044 if (TARGET_32BIT && mode == TImode)
32045 return false;
32047 if (DECIMAL_FLOAT_MODE_P (mode))
32048 return default_decimal_float_supported_p ();
32049 else
32050 return default_scalar_mode_supported_p (mode);
32053 /* Target hook for vector_mode_supported_p. */
32054 static bool
32055 rs6000_vector_mode_supported_p (machine_mode mode)
32058 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
32059 return true;
32061 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
32062 return true;
32064 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
32065 return true;
32067 else
32068 return false;
32071 /* Target hook for invalid_arg_for_unprototyped_fn. */
32072 static const char *
32073 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
32075 return (!rs6000_darwin64_abi
32076 && typelist == 0
32077 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
32078 && (funcdecl == NULL_TREE
32079 || (TREE_CODE (funcdecl) == FUNCTION_DECL
32080 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
32081 ? N_("AltiVec argument passed to unprototyped function")
32082 : NULL;
32085 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
32086 setup by using __stack_chk_fail_local hidden function instead of
32087 calling __stack_chk_fail directly. Otherwise it is better to call
32088 __stack_chk_fail directly. */
32090 static tree ATTRIBUTE_UNUSED
32091 rs6000_stack_protect_fail (void)
32093 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
32094 ? default_hidden_stack_protect_fail ()
32095 : default_external_stack_protect_fail ();
32098 void
32099 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
32100 int num_operands ATTRIBUTE_UNUSED)
32102 if (rs6000_warn_cell_microcode)
32104 const char *temp;
32105 int insn_code_number = recog_memoized (insn);
32106 location_t location = INSN_LOCATION (insn);
32108 /* Punt on insns we cannot recognize. */
32109 if (insn_code_number < 0)
32110 return;
32112 temp = get_insn_template (insn_code_number, insn);
32114 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
32115 warning_at (location, OPT_mwarn_cell_microcode,
32116 "emitting microcode insn %s\t[%s] #%d",
32117 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32118 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
32119 warning_at (location, OPT_mwarn_cell_microcode,
32120 "emitting conditional microcode insn %s\t[%s] #%d",
32121 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32125 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32127 #if TARGET_ELF
32128 static unsigned HOST_WIDE_INT
32129 rs6000_asan_shadow_offset (void)
32131 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
32133 #endif
32135 /* Mask options that we want to support inside of attribute((target)) and
32136 #pragma GCC target operations. Note, we do not include things like
32137 64/32-bit, endianess, hard/soft floating point, etc. that would have
32138 different calling sequences. */
32140 struct rs6000_opt_mask {
32141 const char *name; /* option name */
32142 HOST_WIDE_INT mask; /* mask to set */
32143 bool invert; /* invert sense of mask */
32144 bool valid_target; /* option is a target option */
32147 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32149 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32150 { "cmpb", OPTION_MASK_CMPB, false, true },
32151 { "crypto", OPTION_MASK_CRYPTO, false, true },
32152 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32153 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32154 { "fprnd", OPTION_MASK_FPRND, false, true },
32155 { "hard-dfp", OPTION_MASK_DFP, false, true },
32156 { "htm", OPTION_MASK_HTM, false, true },
32157 { "isel", OPTION_MASK_ISEL, false, true },
32158 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32159 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32160 { "mulhw", OPTION_MASK_MULHW, false, true },
32161 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32162 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32163 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32164 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32165 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32166 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32167 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32168 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32169 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32170 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32171 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32172 { "string", OPTION_MASK_STRING, false, true },
32173 { "update", OPTION_MASK_NO_UPDATE, true , true },
32174 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
32175 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
32176 { "vsx", OPTION_MASK_VSX, false, true },
32177 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32178 #ifdef OPTION_MASK_64BIT
32179 #if TARGET_AIX_OS
32180 { "aix64", OPTION_MASK_64BIT, false, false },
32181 { "aix32", OPTION_MASK_64BIT, true, false },
32182 #else
32183 { "64", OPTION_MASK_64BIT, false, false },
32184 { "32", OPTION_MASK_64BIT, true, false },
32185 #endif
32186 #endif
32187 #ifdef OPTION_MASK_EABI
32188 { "eabi", OPTION_MASK_EABI, false, false },
32189 #endif
32190 #ifdef OPTION_MASK_LITTLE_ENDIAN
32191 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32192 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32193 #endif
32194 #ifdef OPTION_MASK_RELOCATABLE
32195 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32196 #endif
32197 #ifdef OPTION_MASK_STRICT_ALIGN
32198 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32199 #endif
32200 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32201 { "string", OPTION_MASK_STRING, false, false },
32204 /* Builtin mask mapping for printing the flags. */
32205 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32207 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32208 { "vsx", RS6000_BTM_VSX, false, false },
32209 { "spe", RS6000_BTM_SPE, false, false },
32210 { "paired", RS6000_BTM_PAIRED, false, false },
32211 { "fre", RS6000_BTM_FRE, false, false },
32212 { "fres", RS6000_BTM_FRES, false, false },
32213 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32214 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32215 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32216 { "cell", RS6000_BTM_CELL, false, false },
32217 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32218 { "crypto", RS6000_BTM_CRYPTO, false, false },
32219 { "htm", RS6000_BTM_HTM, false, false },
32220 { "hard-dfp", RS6000_BTM_DFP, false, false },
32221 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32222 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32225 /* Option variables that we want to support inside attribute((target)) and
32226 #pragma GCC target operations. */
32228 struct rs6000_opt_var {
32229 const char *name; /* option name */
32230 size_t global_offset; /* offset of the option in global_options. */
32231 size_t target_offset; /* offset of the option in target optiosn. */
32234 static struct rs6000_opt_var const rs6000_opt_vars[] =
32236 { "friz",
32237 offsetof (struct gcc_options, x_TARGET_FRIZ),
32238 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32239 { "avoid-indexed-addresses",
32240 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32241 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32242 { "paired",
32243 offsetof (struct gcc_options, x_rs6000_paired_float),
32244 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32245 { "longcall",
32246 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32247 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32250 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32251 parsing. Return true if there were no errors. */
32253 static bool
32254 rs6000_inner_target_options (tree args, bool attr_p)
32256 bool ret = true;
32258 if (args == NULL_TREE)
32261 else if (TREE_CODE (args) == STRING_CST)
32263 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32264 char *q;
32266 while ((q = strtok (p, ",")) != NULL)
32268 bool error_p = false;
32269 bool not_valid_p = false;
32270 const char *cpu_opt = NULL;
32272 p = NULL;
32273 if (strncmp (q, "cpu=", 4) == 0)
32275 int cpu_index = rs6000_cpu_name_lookup (q+4);
32276 if (cpu_index >= 0)
32277 rs6000_cpu_index = cpu_index;
32278 else
32280 error_p = true;
32281 cpu_opt = q+4;
32284 else if (strncmp (q, "tune=", 5) == 0)
32286 int tune_index = rs6000_cpu_name_lookup (q+5);
32287 if (tune_index >= 0)
32288 rs6000_tune_index = tune_index;
32289 else
32291 error_p = true;
32292 cpu_opt = q+5;
32295 else
32297 size_t i;
32298 bool invert = false;
32299 char *r = q;
32301 error_p = true;
32302 if (strncmp (r, "no-", 3) == 0)
32304 invert = true;
32305 r += 3;
32308 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32309 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32311 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32313 if (!rs6000_opt_masks[i].valid_target)
32314 not_valid_p = true;
32315 else
32317 error_p = false;
32318 rs6000_isa_flags_explicit |= mask;
32320 /* VSX needs altivec, so -mvsx automagically sets
32321 altivec. */
32322 if (mask == OPTION_MASK_VSX && !invert)
32323 mask |= OPTION_MASK_ALTIVEC;
32325 if (rs6000_opt_masks[i].invert)
32326 invert = !invert;
32328 if (invert)
32329 rs6000_isa_flags &= ~mask;
32330 else
32331 rs6000_isa_flags |= mask;
32333 break;
32336 if (error_p && !not_valid_p)
32338 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32339 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32341 size_t j = rs6000_opt_vars[i].global_offset;
32342 *((int *) ((char *)&global_options + j)) = !invert;
32343 error_p = false;
32344 break;
32349 if (error_p)
32351 const char *eprefix, *esuffix;
32353 ret = false;
32354 if (attr_p)
32356 eprefix = "__attribute__((__target__(";
32357 esuffix = ")))";
32359 else
32361 eprefix = "#pragma GCC target ";
32362 esuffix = "";
32365 if (cpu_opt)
32366 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32367 q, esuffix);
32368 else if (not_valid_p)
32369 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32370 else
32371 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32376 else if (TREE_CODE (args) == TREE_LIST)
32380 tree value = TREE_VALUE (args);
32381 if (value)
32383 bool ret2 = rs6000_inner_target_options (value, attr_p);
32384 if (!ret2)
32385 ret = false;
32387 args = TREE_CHAIN (args);
32389 while (args != NULL_TREE);
32392 else
32393 gcc_unreachable ();
32395 return ret;
32398 /* Print out the target options as a list for -mdebug=target. */
32400 static void
32401 rs6000_debug_target_options (tree args, const char *prefix)
32403 if (args == NULL_TREE)
32404 fprintf (stderr, "%s<NULL>", prefix);
32406 else if (TREE_CODE (args) == STRING_CST)
32408 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32409 char *q;
32411 while ((q = strtok (p, ",")) != NULL)
32413 p = NULL;
32414 fprintf (stderr, "%s\"%s\"", prefix, q);
32415 prefix = ", ";
32419 else if (TREE_CODE (args) == TREE_LIST)
32423 tree value = TREE_VALUE (args);
32424 if (value)
32426 rs6000_debug_target_options (value, prefix);
32427 prefix = ", ";
32429 args = TREE_CHAIN (args);
32431 while (args != NULL_TREE);
32434 else
32435 gcc_unreachable ();
32437 return;
32441 /* Hook to validate attribute((target("..."))). */
32443 static bool
32444 rs6000_valid_attribute_p (tree fndecl,
32445 tree ARG_UNUSED (name),
32446 tree args,
32447 int flags)
32449 struct cl_target_option cur_target;
32450 bool ret;
32451 tree old_optimize = build_optimization_node (&global_options);
32452 tree new_target, new_optimize;
32453 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32455 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32457 if (TARGET_DEBUG_TARGET)
32459 tree tname = DECL_NAME (fndecl);
32460 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32461 if (tname)
32462 fprintf (stderr, "function: %.*s\n",
32463 (int) IDENTIFIER_LENGTH (tname),
32464 IDENTIFIER_POINTER (tname));
32465 else
32466 fprintf (stderr, "function: unknown\n");
32468 fprintf (stderr, "args:");
32469 rs6000_debug_target_options (args, " ");
32470 fprintf (stderr, "\n");
32472 if (flags)
32473 fprintf (stderr, "flags: 0x%x\n", flags);
32475 fprintf (stderr, "--------------------\n");
32478 old_optimize = build_optimization_node (&global_options);
32479 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32481 /* If the function changed the optimization levels as well as setting target
32482 options, start with the optimizations specified. */
32483 if (func_optimize && func_optimize != old_optimize)
32484 cl_optimization_restore (&global_options,
32485 TREE_OPTIMIZATION (func_optimize));
32487 /* The target attributes may also change some optimization flags, so update
32488 the optimization options if necessary. */
32489 cl_target_option_save (&cur_target, &global_options);
32490 rs6000_cpu_index = rs6000_tune_index = -1;
32491 ret = rs6000_inner_target_options (args, true);
32493 /* Set up any additional state. */
32494 if (ret)
32496 ret = rs6000_option_override_internal (false);
32497 new_target = build_target_option_node (&global_options);
32499 else
32500 new_target = NULL;
32502 new_optimize = build_optimization_node (&global_options);
32504 if (!new_target)
32505 ret = false;
32507 else if (fndecl)
32509 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32511 if (old_optimize != new_optimize)
32512 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32515 cl_target_option_restore (&global_options, &cur_target);
32517 if (old_optimize != new_optimize)
32518 cl_optimization_restore (&global_options,
32519 TREE_OPTIMIZATION (old_optimize));
32521 return ret;
32525 /* Hook to validate the current #pragma GCC target and set the state, and
32526 update the macros based on what was changed. If ARGS is NULL, then
32527 POP_TARGET is used to reset the options. */
32529 bool
32530 rs6000_pragma_target_parse (tree args, tree pop_target)
32532 tree prev_tree = build_target_option_node (&global_options);
32533 tree cur_tree;
32534 struct cl_target_option *prev_opt, *cur_opt;
32535 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32536 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32538 if (TARGET_DEBUG_TARGET)
32540 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32541 fprintf (stderr, "args:");
32542 rs6000_debug_target_options (args, " ");
32543 fprintf (stderr, "\n");
32545 if (pop_target)
32547 fprintf (stderr, "pop_target:\n");
32548 debug_tree (pop_target);
32550 else
32551 fprintf (stderr, "pop_target: <NULL>\n");
32553 fprintf (stderr, "--------------------\n");
32556 if (! args)
32558 cur_tree = ((pop_target)
32559 ? pop_target
32560 : target_option_default_node);
32561 cl_target_option_restore (&global_options,
32562 TREE_TARGET_OPTION (cur_tree));
32564 else
32566 rs6000_cpu_index = rs6000_tune_index = -1;
32567 if (!rs6000_inner_target_options (args, false)
32568 || !rs6000_option_override_internal (false)
32569 || (cur_tree = build_target_option_node (&global_options))
32570 == NULL_TREE)
32572 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32573 fprintf (stderr, "invalid pragma\n");
32575 return false;
32579 target_option_current_node = cur_tree;
32581 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32582 change the macros that are defined. */
32583 if (rs6000_target_modify_macros_ptr)
32585 prev_opt = TREE_TARGET_OPTION (prev_tree);
32586 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32587 prev_flags = prev_opt->x_rs6000_isa_flags;
32589 cur_opt = TREE_TARGET_OPTION (cur_tree);
32590 cur_flags = cur_opt->x_rs6000_isa_flags;
32591 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32593 diff_bumask = (prev_bumask ^ cur_bumask);
32594 diff_flags = (prev_flags ^ cur_flags);
32596 if ((diff_flags != 0) || (diff_bumask != 0))
32598 /* Delete old macros. */
32599 rs6000_target_modify_macros_ptr (false,
32600 prev_flags & diff_flags,
32601 prev_bumask & diff_bumask);
32603 /* Define new macros. */
32604 rs6000_target_modify_macros_ptr (true,
32605 cur_flags & diff_flags,
32606 cur_bumask & diff_bumask);
32610 return true;
32614 /* Remember the last target of rs6000_set_current_function. */
32615 static GTY(()) tree rs6000_previous_fndecl;
32617 /* Establish appropriate back-end context for processing the function
32618 FNDECL. The argument might be NULL to indicate processing at top
32619 level, outside of any function scope. */
32620 static void
32621 rs6000_set_current_function (tree fndecl)
32623 tree old_tree = (rs6000_previous_fndecl
32624 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32625 : NULL_TREE);
32627 tree new_tree = (fndecl
32628 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32629 : NULL_TREE);
32631 if (TARGET_DEBUG_TARGET)
32633 bool print_final = false;
32634 fprintf (stderr, "\n==================== rs6000_set_current_function");
32636 if (fndecl)
32637 fprintf (stderr, ", fndecl %s (%p)",
32638 (DECL_NAME (fndecl)
32639 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32640 : "<unknown>"), (void *)fndecl);
32642 if (rs6000_previous_fndecl)
32643 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32645 fprintf (stderr, "\n");
32646 if (new_tree)
32648 fprintf (stderr, "\nnew fndecl target specific options:\n");
32649 debug_tree (new_tree);
32650 print_final = true;
32653 if (old_tree)
32655 fprintf (stderr, "\nold fndecl target specific options:\n");
32656 debug_tree (old_tree);
32657 print_final = true;
32660 if (print_final)
32661 fprintf (stderr, "--------------------\n");
32664 /* Only change the context if the function changes. This hook is called
32665 several times in the course of compiling a function, and we don't want to
32666 slow things down too much or call target_reinit when it isn't safe. */
32667 if (fndecl && fndecl != rs6000_previous_fndecl)
32669 rs6000_previous_fndecl = fndecl;
32670 if (old_tree == new_tree)
32673 else if (new_tree && new_tree != target_option_default_node)
32675 cl_target_option_restore (&global_options,
32676 TREE_TARGET_OPTION (new_tree));
32677 if (TREE_TARGET_GLOBALS (new_tree))
32678 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32679 else
32680 TREE_TARGET_GLOBALS (new_tree)
32681 = save_target_globals_default_opts ();
32684 else if (old_tree && old_tree != target_option_default_node)
32686 new_tree = target_option_current_node;
32687 cl_target_option_restore (&global_options,
32688 TREE_TARGET_OPTION (new_tree));
32689 if (TREE_TARGET_GLOBALS (new_tree))
32690 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32691 else if (new_tree == target_option_default_node)
32692 restore_target_globals (&default_target_globals);
32693 else
32694 TREE_TARGET_GLOBALS (new_tree)
32695 = save_target_globals_default_opts ();
32701 /* Save the current options */
32703 static void
32704 rs6000_function_specific_save (struct cl_target_option *ptr,
32705 struct gcc_options *opts)
32707 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32708 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32711 /* Restore the current options */
32713 static void
32714 rs6000_function_specific_restore (struct gcc_options *opts,
32715 struct cl_target_option *ptr)
32718 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32719 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32720 (void) rs6000_option_override_internal (false);
32723 /* Print the current options */
32725 static void
32726 rs6000_function_specific_print (FILE *file, int indent,
32727 struct cl_target_option *ptr)
32729 rs6000_print_isa_options (file, indent, "Isa options set",
32730 ptr->x_rs6000_isa_flags);
32732 rs6000_print_isa_options (file, indent, "Isa options explicit",
32733 ptr->x_rs6000_isa_flags_explicit);
32736 /* Helper function to print the current isa or misc options on a line. */
32738 static void
32739 rs6000_print_options_internal (FILE *file,
32740 int indent,
32741 const char *string,
32742 HOST_WIDE_INT flags,
32743 const char *prefix,
32744 const struct rs6000_opt_mask *opts,
32745 size_t num_elements)
32747 size_t i;
32748 size_t start_column = 0;
32749 size_t cur_column;
32750 size_t max_column = 76;
32751 const char *comma = "";
32753 if (indent)
32754 start_column += fprintf (file, "%*s", indent, "");
32756 if (!flags)
32758 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32759 return;
32762 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32764 /* Print the various mask options. */
32765 cur_column = start_column;
32766 for (i = 0; i < num_elements; i++)
32768 if ((flags & opts[i].mask) != 0)
32770 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32771 size_t len = (strlen (comma)
32772 + strlen (prefix)
32773 + strlen (no_str)
32774 + strlen (rs6000_opt_masks[i].name));
32776 cur_column += len;
32777 if (cur_column > max_column)
32779 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32780 cur_column = start_column + len;
32781 comma = "";
32784 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32785 rs6000_opt_masks[i].name);
32786 flags &= ~ opts[i].mask;
32787 comma = ", ";
32791 fputs ("\n", file);
32794 /* Helper function to print the current isa options on a line. */
32796 static void
32797 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32798 HOST_WIDE_INT flags)
32800 rs6000_print_options_internal (file, indent, string, flags, "-m",
32801 &rs6000_opt_masks[0],
32802 ARRAY_SIZE (rs6000_opt_masks));
32805 static void
32806 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32807 HOST_WIDE_INT flags)
32809 rs6000_print_options_internal (file, indent, string, flags, "",
32810 &rs6000_builtin_mask_names[0],
32811 ARRAY_SIZE (rs6000_builtin_mask_names));
32815 /* Hook to determine if one function can safely inline another. */
32817 static bool
32818 rs6000_can_inline_p (tree caller, tree callee)
32820 bool ret = false;
32821 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32822 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32824 /* If callee has no option attributes, then it is ok to inline. */
32825 if (!callee_tree)
32826 ret = true;
32828 /* If caller has no option attributes, but callee does then it is not ok to
32829 inline. */
32830 else if (!caller_tree)
32831 ret = false;
32833 else
32835 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32836 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32838 /* Callee's options should a subset of the caller's, i.e. a vsx function
32839 can inline an altivec function but a non-vsx function can't inline a
32840 vsx function. */
32841 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32842 == callee_opts->x_rs6000_isa_flags)
32843 ret = true;
32846 if (TARGET_DEBUG_TARGET)
32847 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32848 (DECL_NAME (caller)
32849 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32850 : "<unknown>"),
32851 (DECL_NAME (callee)
32852 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32853 : "<unknown>"),
32854 (ret ? "can" : "cannot"));
32856 return ret;
32859 /* Allocate a stack temp and fixup the address so it meets the particular
32860 memory requirements (either offetable or REG+REG addressing). */
32863 rs6000_allocate_stack_temp (machine_mode mode,
32864 bool offsettable_p,
32865 bool reg_reg_p)
32867 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32868 rtx addr = XEXP (stack, 0);
32869 int strict_p = (reload_in_progress || reload_completed);
32871 if (!legitimate_indirect_address_p (addr, strict_p))
32873 if (offsettable_p
32874 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32875 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32877 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32878 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32881 return stack;
32884 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32885 to such a form to deal with memory reference instructions like STFIWX that
32886 only take reg+reg addressing. */
32889 rs6000_address_for_fpconvert (rtx x)
32891 int strict_p = (reload_in_progress || reload_completed);
32892 rtx addr;
32894 gcc_assert (MEM_P (x));
32895 addr = XEXP (x, 0);
32896 if (! legitimate_indirect_address_p (addr, strict_p)
32897 && ! legitimate_indexed_address_p (addr, strict_p))
32899 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32901 rtx reg = XEXP (addr, 0);
32902 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32903 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32904 gcc_assert (REG_P (reg));
32905 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32906 addr = reg;
32908 else if (GET_CODE (addr) == PRE_MODIFY)
32910 rtx reg = XEXP (addr, 0);
32911 rtx expr = XEXP (addr, 1);
32912 gcc_assert (REG_P (reg));
32913 gcc_assert (GET_CODE (expr) == PLUS);
32914 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32915 addr = reg;
32918 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32921 return x;
32924 /* Given a memory reference, if it is not in the form for altivec memory
32925 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32926 convert to the altivec format. */
32929 rs6000_address_for_altivec (rtx x)
32931 gcc_assert (MEM_P (x));
32932 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32934 rtx addr = XEXP (x, 0);
32935 int strict_p = (reload_in_progress || reload_completed);
32937 if (!legitimate_indexed_address_p (addr, strict_p)
32938 && !legitimate_indirect_address_p (addr, strict_p))
32939 addr = copy_to_mode_reg (Pmode, addr);
32941 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32942 x = change_address (x, GET_MODE (x), addr);
32945 return x;
32948 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32950 On the RS/6000, all integer constants are acceptable, most won't be valid
32951 for particular insns, though. Only easy FP constants are acceptable. */
32953 static bool
32954 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32956 if (TARGET_ELF && tls_referenced_p (x))
32957 return false;
32959 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32960 || GET_MODE (x) == VOIDmode
32961 || (TARGET_POWERPC64 && mode == DImode)
32962 || easy_fp_constant (x, mode)
32963 || easy_vector_constant (x, mode));
32967 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
32969 static bool
32970 chain_already_loaded (rtx_insn *last)
32972 for (; last != NULL; last = PREV_INSN (last))
32974 if (NONJUMP_INSN_P (last))
32976 rtx patt = PATTERN (last);
32978 if (GET_CODE (patt) == SET)
32980 rtx lhs = XEXP (patt, 0);
32982 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
32983 return true;
32987 return false;
32990 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32992 void
32993 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32995 const bool direct_call_p
32996 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
32997 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32998 rtx toc_load = NULL_RTX;
32999 rtx toc_restore = NULL_RTX;
33000 rtx func_addr;
33001 rtx abi_reg = NULL_RTX;
33002 rtx call[4];
33003 int n_call;
33004 rtx insn;
33006 /* Handle longcall attributes. */
33007 if (INTVAL (cookie) & CALL_LONG)
33008 func_desc = rs6000_longcall_ref (func_desc);
33010 /* Handle indirect calls. */
33011 if (GET_CODE (func_desc) != SYMBOL_REF
33012 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
33014 /* Save the TOC into its reserved slot before the call,
33015 and prepare to restore it after the call. */
33016 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
33017 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
33018 rtx stack_toc_mem = gen_frame_mem (Pmode,
33019 gen_rtx_PLUS (Pmode, stack_ptr,
33020 stack_toc_offset));
33021 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
33022 gen_rtvec (1, stack_toc_offset),
33023 UNSPEC_TOCSLOT);
33024 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_unspec);
33026 /* Can we optimize saving the TOC in the prologue or
33027 do we need to do it at every call? */
33028 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
33029 cfun->machine->save_toc_in_prologue = true;
33030 else
33032 MEM_VOLATILE_P (stack_toc_mem) = 1;
33033 emit_move_insn (stack_toc_mem, toc_reg);
33036 if (DEFAULT_ABI == ABI_ELFv2)
33038 /* A function pointer in the ELFv2 ABI is just a plain address, but
33039 the ABI requires it to be loaded into r12 before the call. */
33040 func_addr = gen_rtx_REG (Pmode, 12);
33041 emit_move_insn (func_addr, func_desc);
33042 abi_reg = func_addr;
33044 else
33046 /* A function pointer under AIX is a pointer to a data area whose
33047 first word contains the actual address of the function, whose
33048 second word contains a pointer to its TOC, and whose third word
33049 contains a value to place in the static chain register (r11).
33050 Note that if we load the static chain, our "trampoline" need
33051 not have any executable code. */
33053 /* Load up address of the actual function. */
33054 func_desc = force_reg (Pmode, func_desc);
33055 func_addr = gen_reg_rtx (Pmode);
33056 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
33058 /* Prepare to load the TOC of the called function. Note that the
33059 TOC load must happen immediately before the actual call so
33060 that unwinding the TOC registers works correctly. See the
33061 comment in frob_update_context. */
33062 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
33063 rtx func_toc_mem = gen_rtx_MEM (Pmode,
33064 gen_rtx_PLUS (Pmode, func_desc,
33065 func_toc_offset));
33066 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
33068 /* If we have a static chain, load it up. But, if the call was
33069 originally direct, the 3rd word has not been written since no
33070 trampoline has been built, so we ought not to load it, lest we
33071 override a static chain value. */
33072 if (!direct_call_p
33073 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
33074 && !chain_already_loaded (crtl->emit.sequence_stack->last))
33076 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
33077 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
33078 rtx func_sc_mem = gen_rtx_MEM (Pmode,
33079 gen_rtx_PLUS (Pmode, func_desc,
33080 func_sc_offset));
33081 emit_move_insn (sc_reg, func_sc_mem);
33082 abi_reg = sc_reg;
33086 else
33088 /* Direct calls use the TOC: for local calls, the callee will
33089 assume the TOC register is set; for non-local calls, the
33090 PLT stub needs the TOC register. */
33091 abi_reg = toc_reg;
33092 func_addr = func_desc;
33095 /* Create the call. */
33096 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
33097 if (value != NULL_RTX)
33098 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33099 n_call = 1;
33101 if (toc_load)
33102 call[n_call++] = toc_load;
33103 if (toc_restore)
33104 call[n_call++] = toc_restore;
33106 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
33108 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
33109 insn = emit_call_insn (insn);
33111 /* Mention all registers defined by the ABI to hold information
33112 as uses in CALL_INSN_FUNCTION_USAGE. */
33113 if (abi_reg)
33114 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
33117 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
33119 void
33120 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
33122 rtx call[2];
33123 rtx insn;
33125 gcc_assert (INTVAL (cookie) == 0);
33127 /* Create the call. */
33128 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
33129 if (value != NULL_RTX)
33130 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
33132 call[1] = simple_return_rtx;
33134 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
33135 insn = emit_call_insn (insn);
33137 /* Note use of the TOC register. */
33138 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
33139 /* We need to also mark a use of the link register since the function we
33140 sibling-call to will use it to return to our caller. */
33141 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
33144 /* Return whether we need to always update the saved TOC pointer when we update
33145 the stack pointer. */
33147 static bool
33148 rs6000_save_toc_in_prologue_p (void)
33150 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
33153 #ifdef HAVE_GAS_HIDDEN
33154 # define USE_HIDDEN_LINKONCE 1
33155 #else
33156 # define USE_HIDDEN_LINKONCE 0
33157 #endif
33159 /* Fills in the label name that should be used for a 476 link stack thunk. */
33161 void
33162 get_ppc476_thunk_name (char name[32])
33164 gcc_assert (TARGET_LINK_STACK);
33166 if (USE_HIDDEN_LINKONCE)
33167 sprintf (name, "__ppc476.get_thunk");
33168 else
33169 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33172 /* This function emits the simple thunk routine that is used to preserve
33173 the link stack on the 476 cpu. */
33175 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33176 static void
33177 rs6000_code_end (void)
33179 char name[32];
33180 tree decl;
33182 if (!TARGET_LINK_STACK)
33183 return;
33185 get_ppc476_thunk_name (name);
33187 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33188 build_function_type_list (void_type_node, NULL_TREE));
33189 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33190 NULL_TREE, void_type_node);
33191 TREE_PUBLIC (decl) = 1;
33192 TREE_STATIC (decl) = 1;
33194 #if RS6000_WEAK
33195 if (USE_HIDDEN_LINKONCE)
33197 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33198 targetm.asm_out.unique_section (decl, 0);
33199 switch_to_section (get_named_section (decl, NULL, 0));
33200 DECL_WEAK (decl) = 1;
33201 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33202 targetm.asm_out.globalize_label (asm_out_file, name);
33203 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33204 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33206 else
33207 #endif
33209 switch_to_section (text_section);
33210 ASM_OUTPUT_LABEL (asm_out_file, name);
33213 DECL_INITIAL (decl) = make_node (BLOCK);
33214 current_function_decl = decl;
33215 init_function_start (decl);
33216 first_function_block_is_cold = false;
33217 /* Make sure unwind info is emitted for the thunk if needed. */
33218 final_start_function (emit_barrier (), asm_out_file, 1);
33220 fputs ("\tblr\n", asm_out_file);
33222 final_end_function ();
33223 init_insn_lengths ();
33224 free_after_compilation (cfun);
33225 set_cfun (NULL);
33226 current_function_decl = NULL;
33229 /* Add r30 to hard reg set if the prologue sets it up and it is not
33230 pic_offset_table_rtx. */
33232 static void
33233 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33235 if (!TARGET_SINGLE_PIC_BASE
33236 && TARGET_TOC
33237 && TARGET_MINIMAL_TOC
33238 && get_pool_size () != 0)
33239 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33243 /* Helper function for rs6000_split_logical to emit a logical instruction after
33244 spliting the operation to single GPR registers.
33246 DEST is the destination register.
33247 OP1 and OP2 are the input source registers.
33248 CODE is the base operation (AND, IOR, XOR, NOT).
33249 MODE is the machine mode.
33250 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33251 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33252 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33254 static void
33255 rs6000_split_logical_inner (rtx dest,
33256 rtx op1,
33257 rtx op2,
33258 enum rtx_code code,
33259 machine_mode mode,
33260 bool complement_final_p,
33261 bool complement_op1_p,
33262 bool complement_op2_p)
33264 rtx bool_rtx;
33266 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33267 if (op2 && GET_CODE (op2) == CONST_INT
33268 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33269 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33271 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33272 HOST_WIDE_INT value = INTVAL (op2) & mask;
33274 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33275 if (code == AND)
33277 if (value == 0)
33279 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33280 return;
33283 else if (value == mask)
33285 if (!rtx_equal_p (dest, op1))
33286 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33287 return;
33291 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33292 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33293 else if (code == IOR || code == XOR)
33295 if (value == 0)
33297 if (!rtx_equal_p (dest, op1))
33298 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33299 return;
33304 if (code == AND && mode == SImode
33305 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33307 emit_insn (gen_andsi3 (dest, op1, op2));
33308 return;
33311 if (complement_op1_p)
33312 op1 = gen_rtx_NOT (mode, op1);
33314 if (complement_op2_p)
33315 op2 = gen_rtx_NOT (mode, op2);
33317 /* For canonical RTL, if only one arm is inverted it is the first. */
33318 if (!complement_op1_p && complement_op2_p)
33319 std::swap (op1, op2);
33321 bool_rtx = ((code == NOT)
33322 ? gen_rtx_NOT (mode, op1)
33323 : gen_rtx_fmt_ee (code, mode, op1, op2));
33325 if (complement_final_p)
33326 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33328 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33331 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33332 operations are split immediately during RTL generation to allow for more
33333 optimizations of the AND/IOR/XOR.
33335 OPERANDS is an array containing the destination and two input operands.
33336 CODE is the base operation (AND, IOR, XOR, NOT).
33337 MODE is the machine mode.
33338 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33339 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33340 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33341 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33342 formation of the AND instructions. */
33344 static void
33345 rs6000_split_logical_di (rtx operands[3],
33346 enum rtx_code code,
33347 bool complement_final_p,
33348 bool complement_op1_p,
33349 bool complement_op2_p)
33351 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33352 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33353 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33354 enum hi_lo { hi = 0, lo = 1 };
33355 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33356 size_t i;
33358 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33359 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33360 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33361 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33363 if (code == NOT)
33364 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33365 else
33367 if (GET_CODE (operands[2]) != CONST_INT)
33369 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33370 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33372 else
33374 HOST_WIDE_INT value = INTVAL (operands[2]);
33375 HOST_WIDE_INT value_hi_lo[2];
33377 gcc_assert (!complement_final_p);
33378 gcc_assert (!complement_op1_p);
33379 gcc_assert (!complement_op2_p);
33381 value_hi_lo[hi] = value >> 32;
33382 value_hi_lo[lo] = value & lower_32bits;
33384 for (i = 0; i < 2; i++)
33386 HOST_WIDE_INT sub_value = value_hi_lo[i];
33388 if (sub_value & sign_bit)
33389 sub_value |= upper_32bits;
33391 op2_hi_lo[i] = GEN_INT (sub_value);
33393 /* If this is an AND instruction, check to see if we need to load
33394 the value in a register. */
33395 if (code == AND && sub_value != -1 && sub_value != 0
33396 && !and_operand (op2_hi_lo[i], SImode))
33397 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33402 for (i = 0; i < 2; i++)
33404 /* Split large IOR/XOR operations. */
33405 if ((code == IOR || code == XOR)
33406 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33407 && !complement_final_p
33408 && !complement_op1_p
33409 && !complement_op2_p
33410 && !logical_const_operand (op2_hi_lo[i], SImode))
33412 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33413 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33414 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33415 rtx tmp = gen_reg_rtx (SImode);
33417 /* Make sure the constant is sign extended. */
33418 if ((hi_16bits & sign_bit) != 0)
33419 hi_16bits |= upper_32bits;
33421 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33422 code, SImode, false, false, false);
33424 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33425 code, SImode, false, false, false);
33427 else
33428 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33429 code, SImode, complement_final_p,
33430 complement_op1_p, complement_op2_p);
33433 return;
33436 /* Split the insns that make up boolean operations operating on multiple GPR
33437 registers. The boolean MD patterns ensure that the inputs either are
33438 exactly the same as the output registers, or there is no overlap.
33440 OPERANDS is an array containing the destination and two input operands.
33441 CODE is the base operation (AND, IOR, XOR, NOT).
33442 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33443 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33444 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33446 void
33447 rs6000_split_logical (rtx operands[3],
33448 enum rtx_code code,
33449 bool complement_final_p,
33450 bool complement_op1_p,
33451 bool complement_op2_p)
33453 machine_mode mode = GET_MODE (operands[0]);
33454 machine_mode sub_mode;
33455 rtx op0, op1, op2;
33456 int sub_size, regno0, regno1, nregs, i;
33458 /* If this is DImode, use the specialized version that can run before
33459 register allocation. */
33460 if (mode == DImode && !TARGET_POWERPC64)
33462 rs6000_split_logical_di (operands, code, complement_final_p,
33463 complement_op1_p, complement_op2_p);
33464 return;
33467 op0 = operands[0];
33468 op1 = operands[1];
33469 op2 = (code == NOT) ? NULL_RTX : operands[2];
33470 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33471 sub_size = GET_MODE_SIZE (sub_mode);
33472 regno0 = REGNO (op0);
33473 regno1 = REGNO (op1);
33475 gcc_assert (reload_completed);
33476 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33477 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33479 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33480 gcc_assert (nregs > 1);
33482 if (op2 && REG_P (op2))
33483 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33485 for (i = 0; i < nregs; i++)
33487 int offset = i * sub_size;
33488 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33489 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33490 rtx sub_op2 = ((code == NOT)
33491 ? NULL_RTX
33492 : simplify_subreg (sub_mode, op2, mode, offset));
33494 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33495 complement_final_p, complement_op1_p,
33496 complement_op2_p);
33499 return;
33503 /* Return true if the peephole2 can combine a load involving a combination of
33504 an addis instruction and a load with an offset that can be fused together on
33505 a power8. */
33507 bool
33508 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33509 rtx addis_value, /* addis value. */
33510 rtx target, /* target register that is loaded. */
33511 rtx mem) /* bottom part of the memory addr. */
33513 rtx addr;
33514 rtx base_reg;
33516 /* Validate arguments. */
33517 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33518 return false;
33520 if (!base_reg_operand (target, GET_MODE (target)))
33521 return false;
33523 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33524 return false;
33526 /* Allow sign/zero extension. */
33527 if (GET_CODE (mem) == ZERO_EXTEND
33528 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33529 mem = XEXP (mem, 0);
33531 if (!MEM_P (mem))
33532 return false;
33534 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33535 return false;
33537 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33538 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33539 return false;
33541 /* Validate that the register used to load the high value is either the
33542 register being loaded, or we can safely replace its use.
33544 This function is only called from the peephole2 pass and we assume that
33545 there are 2 instructions in the peephole (addis and load), so we want to
33546 check if the target register was not used in the memory address and the
33547 register to hold the addis result is dead after the peephole. */
33548 if (REGNO (addis_reg) != REGNO (target))
33550 if (reg_mentioned_p (target, mem))
33551 return false;
33553 if (!peep2_reg_dead_p (2, addis_reg))
33554 return false;
33556 /* If the target register being loaded is the stack pointer, we must
33557 avoid loading any other value into it, even temporarily. */
33558 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33559 return false;
33562 base_reg = XEXP (addr, 0);
33563 return REGNO (addis_reg) == REGNO (base_reg);
33566 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33567 sequence. We adjust the addis register to use the target register. If the
33568 load sign extends, we adjust the code to do the zero extending load, and an
33569 explicit sign extension later since the fusion only covers zero extending
33570 loads.
33572 The operands are:
33573 operands[0] register set with addis (to be replaced with target)
33574 operands[1] value set via addis
33575 operands[2] target register being loaded
33576 operands[3] D-form memory reference using operands[0]. */
33578 void
33579 expand_fusion_gpr_load (rtx *operands)
33581 rtx addis_value = operands[1];
33582 rtx target = operands[2];
33583 rtx orig_mem = operands[3];
33584 rtx new_addr, new_mem, orig_addr, offset;
33585 enum rtx_code plus_or_lo_sum;
33586 machine_mode target_mode = GET_MODE (target);
33587 machine_mode extend_mode = target_mode;
33588 machine_mode ptr_mode = Pmode;
33589 enum rtx_code extend = UNKNOWN;
33591 if (GET_CODE (orig_mem) == ZERO_EXTEND
33592 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33594 extend = GET_CODE (orig_mem);
33595 orig_mem = XEXP (orig_mem, 0);
33596 target_mode = GET_MODE (orig_mem);
33599 gcc_assert (MEM_P (orig_mem));
33601 orig_addr = XEXP (orig_mem, 0);
33602 plus_or_lo_sum = GET_CODE (orig_addr);
33603 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33605 offset = XEXP (orig_addr, 1);
33606 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33607 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33609 if (extend != UNKNOWN)
33610 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33612 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33613 UNSPEC_FUSION_GPR);
33614 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33616 if (extend == SIGN_EXTEND)
33618 int sub_off = ((BYTES_BIG_ENDIAN)
33619 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33620 : 0);
33621 rtx sign_reg
33622 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33624 emit_insn (gen_rtx_SET (VOIDmode, target,
33625 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33628 return;
33631 /* Return a string to fuse an addis instruction with a gpr load to the same
33632 register that we loaded up the addis instruction. The address that is used
33633 is the logical address that was formed during peephole2:
33634 (lo_sum (high) (low-part))
33636 The code is complicated, so we call output_asm_insn directly, and just
33637 return "". */
33639 const char *
33640 emit_fusion_gpr_load (rtx target, rtx mem)
33642 rtx addis_value;
33643 rtx fuse_ops[10];
33644 rtx addr;
33645 rtx load_offset;
33646 const char *addis_str = NULL;
33647 const char *load_str = NULL;
33648 const char *mode_name = NULL;
33649 char insn_template[80];
33650 machine_mode mode;
33651 const char *comment_str = ASM_COMMENT_START;
33653 if (GET_CODE (mem) == ZERO_EXTEND)
33654 mem = XEXP (mem, 0);
33656 gcc_assert (REG_P (target) && MEM_P (mem));
33658 if (*comment_str == ' ')
33659 comment_str++;
33661 addr = XEXP (mem, 0);
33662 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33663 gcc_unreachable ();
33665 addis_value = XEXP (addr, 0);
33666 load_offset = XEXP (addr, 1);
33668 /* Now emit the load instruction to the same register. */
33669 mode = GET_MODE (mem);
33670 switch (mode)
33672 case QImode:
33673 mode_name = "char";
33674 load_str = "lbz";
33675 break;
33677 case HImode:
33678 mode_name = "short";
33679 load_str = "lhz";
33680 break;
33682 case SImode:
33683 mode_name = "int";
33684 load_str = "lwz";
33685 break;
33687 case DImode:
33688 gcc_assert (TARGET_POWERPC64);
33689 mode_name = "long";
33690 load_str = "ld";
33691 break;
33693 default:
33694 gcc_unreachable ();
33697 /* Emit the addis instruction. */
33698 fuse_ops[0] = target;
33699 if (satisfies_constraint_L (addis_value))
33701 fuse_ops[1] = addis_value;
33702 addis_str = "lis %0,%v1";
33705 else if (GET_CODE (addis_value) == PLUS)
33707 rtx op0 = XEXP (addis_value, 0);
33708 rtx op1 = XEXP (addis_value, 1);
33710 if (REG_P (op0) && CONST_INT_P (op1)
33711 && satisfies_constraint_L (op1))
33713 fuse_ops[1] = op0;
33714 fuse_ops[2] = op1;
33715 addis_str = "addis %0,%1,%v2";
33719 else if (GET_CODE (addis_value) == HIGH)
33721 rtx value = XEXP (addis_value, 0);
33722 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33724 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33725 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33726 if (TARGET_ELF)
33727 addis_str = "addis %0,%2,%1@toc@ha";
33729 else if (TARGET_XCOFF)
33730 addis_str = "addis %0,%1@u(%2)";
33732 else
33733 gcc_unreachable ();
33736 else if (GET_CODE (value) == PLUS)
33738 rtx op0 = XEXP (value, 0);
33739 rtx op1 = XEXP (value, 1);
33741 if (GET_CODE (op0) == UNSPEC
33742 && XINT (op0, 1) == UNSPEC_TOCREL
33743 && CONST_INT_P (op1))
33745 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33746 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33747 fuse_ops[3] = op1;
33748 if (TARGET_ELF)
33749 addis_str = "addis %0,%2,%1+%3@toc@ha";
33751 else if (TARGET_XCOFF)
33752 addis_str = "addis %0,%1+%3@u(%2)";
33754 else
33755 gcc_unreachable ();
33759 else if (satisfies_constraint_L (value))
33761 fuse_ops[1] = value;
33762 addis_str = "lis %0,%v1";
33765 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33767 fuse_ops[1] = value;
33768 addis_str = "lis %0,%1@ha";
33772 if (!addis_str)
33773 fatal_insn ("Could not generate addis value for fusion", addis_value);
33775 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33776 comment_str, mode_name);
33777 output_asm_insn (insn_template, fuse_ops);
33779 /* Emit the D-form load instruction. */
33780 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33782 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33783 fuse_ops[1] = load_offset;
33784 output_asm_insn (insn_template, fuse_ops);
33787 else if (GET_CODE (load_offset) == UNSPEC
33788 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33790 if (TARGET_ELF)
33791 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33793 else if (TARGET_XCOFF)
33794 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33796 else
33797 gcc_unreachable ();
33799 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33800 output_asm_insn (insn_template, fuse_ops);
33803 else if (GET_CODE (load_offset) == PLUS
33804 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33805 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33806 && CONST_INT_P (XEXP (load_offset, 1)))
33808 rtx tocrel_unspec = XEXP (load_offset, 0);
33809 if (TARGET_ELF)
33810 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33812 else if (TARGET_XCOFF)
33813 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33815 else
33816 gcc_unreachable ();
33818 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33819 fuse_ops[2] = XEXP (load_offset, 1);
33820 output_asm_insn (insn_template, fuse_ops);
33823 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33825 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33827 fuse_ops[1] = load_offset;
33828 output_asm_insn (insn_template, fuse_ops);
33831 else
33832 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33834 return "";
33837 /* Analyze vector computations and remove unnecessary doubleword
33838 swaps (xxswapdi instructions). This pass is performed only
33839 for little-endian VSX code generation.
33841 For this specific case, loads and stores of 4x32 and 2x64 vectors
33842 are inefficient. These are implemented using the lvx2dx and
33843 stvx2dx instructions, which invert the order of doublewords in
33844 a vector register. Thus the code generation inserts an xxswapdi
33845 after each such load, and prior to each such store. (For spill
33846 code after register assignment, an additional xxswapdi is inserted
33847 following each store in order to return a hard register to its
33848 unpermuted value.)
33850 The extra xxswapdi instructions reduce performance. This can be
33851 particularly bad for vectorized code. The purpose of this pass
33852 is to reduce the number of xxswapdi instructions required for
33853 correctness.
33855 The primary insight is that much code that operates on vectors
33856 does not care about the relative order of elements in a register,
33857 so long as the correct memory order is preserved. If we have
33858 a computation where all input values are provided by lvxd2x/xxswapdi
33859 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33860 and all intermediate computations are pure SIMD (independent of
33861 element order), then all the xxswapdi's associated with the loads
33862 and stores may be removed.
33864 This pass uses some of the infrastructure and logical ideas from
33865 the "web" pass in web.c. We create maximal webs of computations
33866 fitting the description above using union-find. Each such web is
33867 then optimized by removing its unnecessary xxswapdi instructions.
33869 The pass is placed prior to global optimization so that we can
33870 perform the optimization in the safest and simplest way possible;
33871 that is, by replacing each xxswapdi insn with a register copy insn.
33872 Subsequent forward propagation will remove copies where possible.
33874 There are some operations sensitive to element order for which we
33875 can still allow the operation, provided we modify those operations.
33876 These include CONST_VECTORs, for which we must swap the first and
33877 second halves of the constant vector; and SUBREGs, for which we
33878 must adjust the byte offset to account for the swapped doublewords.
33879 A remaining opportunity would be non-immediate-form splats, for
33880 which we should adjust the selected lane of the input. We should
33881 also make code generation adjustments for sum-across operations,
33882 since this is a common vectorizer reduction.
33884 Because we run prior to the first split, we can see loads and stores
33885 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33886 vector loads and stores that have not yet been split into a permuting
33887 load/store and a swap. (One way this can happen is with a builtin
33888 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33889 than deleting a swap, we convert the load/store into a permuting
33890 load/store (which effectively removes the swap). */
33892 /* Notes on Permutes
33894 We do not currently handle computations that contain permutes. There
33895 is a general transformation that can be performed correctly, but it
33896 may introduce more expensive code than it replaces. To handle these
33897 would require a cost model to determine when to perform the optimization.
33898 This commentary records how this could be done if desired.
33900 The most general permute is something like this (example for V16QI):
33902 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33903 (parallel [(const_int a0) (const_int a1)
33905 (const_int a14) (const_int a15)]))
33907 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33908 to produce in the result.
33910 Regardless of mode, we can convert the PARALLEL to a mask of 16
33911 byte-element selectors. Let's call this M, with M[i] representing
33912 the ith byte-element selector value. Then if we swap doublewords
33913 throughout the computation, we can get correct behavior by replacing
33914 M with M' as follows:
33916 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33917 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33918 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33919 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33921 This seems promising at first, since we are just replacing one mask
33922 with another. But certain masks are preferable to others. If M
33923 is a mask that matches a vmrghh pattern, for example, M' certainly
33924 will not. Instead of a single vmrghh, we would generate a load of
33925 M' and a vperm. So we would need to know how many xxswapd's we can
33926 remove as a result of this transformation to determine if it's
33927 profitable; and preferably the logic would need to be aware of all
33928 the special preferable masks.
33930 Another form of permute is an UNSPEC_VPERM, in which the mask is
33931 already in a register. In some cases, this mask may be a constant
33932 that we can discover with ud-chains, in which case the above
33933 transformation is ok. However, the common usage here is for the
33934 mask to be produced by an UNSPEC_LVSL, in which case the mask
33935 cannot be known at compile time. In such a case we would have to
33936 generate several instructions to compute M' as above at run time,
33937 and a cost model is needed again. */
33939 /* This is based on the union-find logic in web.c. web_entry_base is
33940 defined in df.h. */
33941 class swap_web_entry : public web_entry_base
33943 public:
33944 /* Pointer to the insn. */
33945 rtx_insn *insn;
33946 /* Set if insn contains a mention of a vector register. All other
33947 fields are undefined if this field is unset. */
33948 unsigned int is_relevant : 1;
33949 /* Set if insn is a load. */
33950 unsigned int is_load : 1;
33951 /* Set if insn is a store. */
33952 unsigned int is_store : 1;
33953 /* Set if insn is a doubleword swap. This can either be a register swap
33954 or a permuting load or store (test is_load and is_store for this). */
33955 unsigned int is_swap : 1;
33956 /* Set if the insn has a live-in use of a parameter register. */
33957 unsigned int is_live_in : 1;
33958 /* Set if the insn has a live-out def of a return register. */
33959 unsigned int is_live_out : 1;
33960 /* Set if the insn contains a subreg reference of a vector register. */
33961 unsigned int contains_subreg : 1;
33962 /* Set if the insn contains a 128-bit integer operand. */
33963 unsigned int is_128_int : 1;
33964 /* Set if this is a call-insn. */
33965 unsigned int is_call : 1;
33966 /* Set if this insn does not perform a vector operation for which
33967 element order matters, or if we know how to fix it up if it does.
33968 Undefined if is_swap is set. */
33969 unsigned int is_swappable : 1;
33970 /* A nonzero value indicates what kind of special handling for this
33971 insn is required if doublewords are swapped. Undefined if
33972 is_swappable is not set. */
33973 unsigned int special_handling : 3;
33974 /* Set if the web represented by this entry cannot be optimized. */
33975 unsigned int web_not_optimizable : 1;
33976 /* Set if this insn should be deleted. */
33977 unsigned int will_delete : 1;
33980 enum special_handling_values {
33981 SH_NONE = 0,
33982 SH_CONST_VECTOR,
33983 SH_SUBREG,
33984 SH_NOSWAP_LD,
33985 SH_NOSWAP_ST,
33986 SH_EXTRACT,
33987 SH_SPLAT
33990 /* Union INSN with all insns containing definitions that reach USE.
33991 Detect whether USE is live-in to the current function. */
33992 static void
33993 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33995 struct df_link *link = DF_REF_CHAIN (use);
33997 if (!link)
33998 insn_entry[INSN_UID (insn)].is_live_in = 1;
34000 while (link)
34002 if (DF_REF_IS_ARTIFICIAL (link->ref))
34003 insn_entry[INSN_UID (insn)].is_live_in = 1;
34005 if (DF_REF_INSN_INFO (link->ref))
34007 rtx def_insn = DF_REF_INSN (link->ref);
34008 (void)unionfind_union (insn_entry + INSN_UID (insn),
34009 insn_entry + INSN_UID (def_insn));
34012 link = link->next;
34016 /* Union INSN with all insns containing uses reached from DEF.
34017 Detect whether DEF is live-out from the current function. */
34018 static void
34019 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
34021 struct df_link *link = DF_REF_CHAIN (def);
34023 if (!link)
34024 insn_entry[INSN_UID (insn)].is_live_out = 1;
34026 while (link)
34028 /* This could be an eh use or some other artificial use;
34029 we treat these all the same (killing the optimization). */
34030 if (DF_REF_IS_ARTIFICIAL (link->ref))
34031 insn_entry[INSN_UID (insn)].is_live_out = 1;
34033 if (DF_REF_INSN_INFO (link->ref))
34035 rtx use_insn = DF_REF_INSN (link->ref);
34036 (void)unionfind_union (insn_entry + INSN_UID (insn),
34037 insn_entry + INSN_UID (use_insn));
34040 link = link->next;
34044 /* Return 1 iff INSN is a load insn, including permuting loads that
34045 represent an lvxd2x instruction; else return 0. */
34046 static unsigned int
34047 insn_is_load_p (rtx insn)
34049 rtx body = PATTERN (insn);
34051 if (GET_CODE (body) == SET)
34053 if (GET_CODE (SET_SRC (body)) == MEM)
34054 return 1;
34056 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
34057 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
34058 return 1;
34060 return 0;
34063 if (GET_CODE (body) != PARALLEL)
34064 return 0;
34066 rtx set = XVECEXP (body, 0, 0);
34068 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
34069 return 1;
34071 return 0;
34074 /* Return 1 iff INSN is a store insn, including permuting stores that
34075 represent an stvxd2x instruction; else return 0. */
34076 static unsigned int
34077 insn_is_store_p (rtx insn)
34079 rtx body = PATTERN (insn);
34080 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
34081 return 1;
34082 if (GET_CODE (body) != PARALLEL)
34083 return 0;
34084 rtx set = XVECEXP (body, 0, 0);
34085 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
34086 return 1;
34087 return 0;
34090 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
34091 a permuting load, or a permuting store. */
34092 static unsigned int
34093 insn_is_swap_p (rtx insn)
34095 rtx body = PATTERN (insn);
34096 if (GET_CODE (body) != SET)
34097 return 0;
34098 rtx rhs = SET_SRC (body);
34099 if (GET_CODE (rhs) != VEC_SELECT)
34100 return 0;
34101 rtx parallel = XEXP (rhs, 1);
34102 if (GET_CODE (parallel) != PARALLEL)
34103 return 0;
34104 unsigned int len = XVECLEN (parallel, 0);
34105 if (len != 2 && len != 4 && len != 8 && len != 16)
34106 return 0;
34107 for (unsigned int i = 0; i < len / 2; ++i)
34109 rtx op = XVECEXP (parallel, 0, i);
34110 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
34111 return 0;
34113 for (unsigned int i = len / 2; i < len; ++i)
34115 rtx op = XVECEXP (parallel, 0, i);
34116 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
34117 return 0;
34119 return 1;
34122 /* Return 1 iff OP is an operand that will not be affected by having
34123 vector doublewords swapped in memory. */
34124 static unsigned int
34125 rtx_is_swappable_p (rtx op, unsigned int *special)
34127 enum rtx_code code = GET_CODE (op);
34128 int i, j;
34129 rtx parallel;
34131 switch (code)
34133 case LABEL_REF:
34134 case SYMBOL_REF:
34135 case CLOBBER:
34136 case REG:
34137 return 1;
34139 case VEC_CONCAT:
34140 case ASM_INPUT:
34141 case ASM_OPERANDS:
34142 return 0;
34144 case CONST_VECTOR:
34146 *special = SH_CONST_VECTOR;
34147 return 1;
34150 case VEC_DUPLICATE:
34151 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
34152 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
34153 it represents a vector splat for which we can do special
34154 handling. */
34155 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
34156 return 1;
34157 else if (GET_CODE (XEXP (op, 0)) == REG
34158 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
34159 /* This catches V2DF and V2DI splat, at a minimum. */
34160 return 1;
34161 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
34162 /* If the duplicated item is from a select, defer to the select
34163 processing to see if we can change the lane for the splat. */
34164 return rtx_is_swappable_p (XEXP (op, 0), special);
34165 else
34166 return 0;
34168 case VEC_SELECT:
34169 /* A vec_extract operation is ok if we change the lane. */
34170 if (GET_CODE (XEXP (op, 0)) == REG
34171 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34172 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34173 && XVECLEN (parallel, 0) == 1
34174 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34176 *special = SH_EXTRACT;
34177 return 1;
34179 else
34180 return 0;
34182 case UNSPEC:
34184 /* Various operations are unsafe for this optimization, at least
34185 without significant additional work. Permutes are obviously
34186 problematic, as both the permute control vector and the ordering
34187 of the target values are invalidated by doubleword swapping.
34188 Vector pack and unpack modify the number of vector lanes.
34189 Merge-high/low will not operate correctly on swapped operands.
34190 Vector shifts across element boundaries are clearly uncool,
34191 as are vector select and concatenate operations. Vector
34192 sum-across instructions define one operand with a specific
34193 order-dependent element, so additional fixup code would be
34194 needed to make those work. Vector set and non-immediate-form
34195 vector splat are element-order sensitive. A few of these
34196 cases might be workable with special handling if required. */
34197 int val = XINT (op, 1);
34198 switch (val)
34200 default:
34201 break;
34202 case UNSPEC_VMRGH_DIRECT:
34203 case UNSPEC_VMRGL_DIRECT:
34204 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34205 case UNSPEC_VPACK_SIGN_UNS_SAT:
34206 case UNSPEC_VPACK_UNS_UNS_MOD:
34207 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34208 case UNSPEC_VPACK_UNS_UNS_SAT:
34209 case UNSPEC_VPERM:
34210 case UNSPEC_VPERM_UNS:
34211 case UNSPEC_VPERMHI:
34212 case UNSPEC_VPERMSI:
34213 case UNSPEC_VPKPX:
34214 case UNSPEC_VSLDOI:
34215 case UNSPEC_VSLO:
34216 case UNSPEC_VSRO:
34217 case UNSPEC_VSUM2SWS:
34218 case UNSPEC_VSUM4S:
34219 case UNSPEC_VSUM4UBS:
34220 case UNSPEC_VSUMSWS:
34221 case UNSPEC_VSUMSWS_DIRECT:
34222 case UNSPEC_VSX_CONCAT:
34223 case UNSPEC_VSX_SET:
34224 case UNSPEC_VSX_SLDWI:
34225 case UNSPEC_VUNPACK_HI_SIGN:
34226 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34227 case UNSPEC_VUNPACK_LO_SIGN:
34228 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34229 case UNSPEC_VUPKHPX:
34230 case UNSPEC_VUPKHS_V4SF:
34231 case UNSPEC_VUPKHU_V4SF:
34232 case UNSPEC_VUPKLPX:
34233 case UNSPEC_VUPKLS_V4SF:
34234 case UNSPEC_VUPKLU_V4SF:
34235 /* The following could be handled as an idiom with XXSPLTW.
34236 These place a scalar in BE element zero, but the XXSPLTW
34237 will currently expect it in BE element 2 in a swapped
34238 region. When one of these feeds an XXSPLTW with no other
34239 defs/uses either way, we can avoid the lane change for
34240 XXSPLTW and things will be correct. TBD. */
34241 case UNSPEC_VSX_CVDPSPN:
34242 case UNSPEC_VSX_CVSPDP:
34243 case UNSPEC_VSX_CVSPDPN:
34244 return 0;
34245 case UNSPEC_VSPLT_DIRECT:
34246 *special = SH_SPLAT;
34247 return 1;
34251 default:
34252 break;
34255 const char *fmt = GET_RTX_FORMAT (code);
34256 int ok = 1;
34258 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34259 if (fmt[i] == 'e' || fmt[i] == 'u')
34261 unsigned int special_op = SH_NONE;
34262 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34263 /* Ensure we never have two kinds of special handling
34264 for the same insn. */
34265 if (*special != SH_NONE && special_op != SH_NONE
34266 && *special != special_op)
34267 return 0;
34268 *special = special_op;
34270 else if (fmt[i] == 'E')
34271 for (j = 0; j < XVECLEN (op, i); ++j)
34273 unsigned int special_op = SH_NONE;
34274 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34275 /* Ensure we never have two kinds of special handling
34276 for the same insn. */
34277 if (*special != SH_NONE && special_op != SH_NONE
34278 && *special != special_op)
34279 return 0;
34280 *special = special_op;
34283 return ok;
34286 /* Return 1 iff INSN is an operand that will not be affected by
34287 having vector doublewords swapped in memory (in which case
34288 *SPECIAL is unchanged), or that can be modified to be correct
34289 if vector doublewords are swapped in memory (in which case
34290 *SPECIAL is changed to a value indicating how). */
34291 static unsigned int
34292 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34293 unsigned int *special)
34295 /* Calls are always bad. */
34296 if (GET_CODE (insn) == CALL_INSN)
34297 return 0;
34299 /* Loads and stores seen here are not permuting, but we can still
34300 fix them up by converting them to permuting ones. Exceptions:
34301 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34302 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34303 for the SET source. */
34304 rtx body = PATTERN (insn);
34305 int i = INSN_UID (insn);
34307 if (insn_entry[i].is_load)
34309 if (GET_CODE (body) == SET)
34311 *special = SH_NOSWAP_LD;
34312 return 1;
34314 else
34315 return 0;
34318 if (insn_entry[i].is_store)
34320 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34322 *special = SH_NOSWAP_ST;
34323 return 1;
34325 else
34326 return 0;
34329 /* Otherwise check the operands for vector lane violations. */
34330 return rtx_is_swappable_p (body, special);
34333 enum chain_purpose { FOR_LOADS, FOR_STORES };
34335 /* Return true if the UD or DU chain headed by LINK is non-empty,
34336 and every entry on the chain references an insn that is a
34337 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34338 register swap must have only permuting loads as reaching defs.
34339 If PURPOSE is FOR_STORES, each such register swap must have only
34340 register swaps or permuting stores as reached uses. */
34341 static bool
34342 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34343 enum chain_purpose purpose)
34345 if (!link)
34346 return false;
34348 for (; link; link = link->next)
34350 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34351 continue;
34353 if (DF_REF_IS_ARTIFICIAL (link->ref))
34354 return false;
34356 rtx reached_insn = DF_REF_INSN (link->ref);
34357 unsigned uid = INSN_UID (reached_insn);
34358 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34360 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34361 || insn_entry[uid].is_store)
34362 return false;
34364 if (purpose == FOR_LOADS)
34366 df_ref use;
34367 FOR_EACH_INSN_INFO_USE (use, insn_info)
34369 struct df_link *swap_link = DF_REF_CHAIN (use);
34371 while (swap_link)
34373 if (DF_REF_IS_ARTIFICIAL (link->ref))
34374 return false;
34376 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34377 unsigned uid2 = INSN_UID (swap_def_insn);
34379 /* Only permuting loads are allowed. */
34380 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34381 return false;
34383 swap_link = swap_link->next;
34387 else if (purpose == FOR_STORES)
34389 df_ref def;
34390 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34392 struct df_link *swap_link = DF_REF_CHAIN (def);
34394 while (swap_link)
34396 if (DF_REF_IS_ARTIFICIAL (link->ref))
34397 return false;
34399 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34400 unsigned uid2 = INSN_UID (swap_use_insn);
34402 /* Permuting stores or register swaps are allowed. */
34403 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34404 return false;
34406 swap_link = swap_link->next;
34412 return true;
34415 /* Mark the xxswapdi instructions associated with permuting loads and
34416 stores for removal. Note that we only flag them for deletion here,
34417 as there is a possibility of a swap being reached from multiple
34418 loads, etc. */
34419 static void
34420 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34422 rtx insn = insn_entry[i].insn;
34423 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34425 if (insn_entry[i].is_load)
34427 df_ref def;
34428 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34430 struct df_link *link = DF_REF_CHAIN (def);
34432 /* We know by now that these are swaps, so we can delete
34433 them confidently. */
34434 while (link)
34436 rtx use_insn = DF_REF_INSN (link->ref);
34437 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34438 link = link->next;
34442 else if (insn_entry[i].is_store)
34444 df_ref use;
34445 FOR_EACH_INSN_INFO_USE (use, insn_info)
34447 /* Ignore uses for addressability. */
34448 machine_mode mode = GET_MODE (DF_REF_REG (use));
34449 if (!VECTOR_MODE_P (mode))
34450 continue;
34452 struct df_link *link = DF_REF_CHAIN (use);
34454 /* We know by now that these are swaps, so we can delete
34455 them confidently. */
34456 while (link)
34458 rtx def_insn = DF_REF_INSN (link->ref);
34459 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34460 link = link->next;
34466 /* OP is either a CONST_VECTOR or an expression containing one.
34467 Swap the first half of the vector with the second in the first
34468 case. Recurse to find it in the second. */
34469 static void
34470 swap_const_vector_halves (rtx op)
34472 int i;
34473 enum rtx_code code = GET_CODE (op);
34474 if (GET_CODE (op) == CONST_VECTOR)
34476 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34477 for (i = 0; i < half_units; ++i)
34479 rtx temp = CONST_VECTOR_ELT (op, i);
34480 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34481 CONST_VECTOR_ELT (op, i + half_units) = temp;
34484 else
34486 int j;
34487 const char *fmt = GET_RTX_FORMAT (code);
34488 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34489 if (fmt[i] == 'e' || fmt[i] == 'u')
34490 swap_const_vector_halves (XEXP (op, i));
34491 else if (fmt[i] == 'E')
34492 for (j = 0; j < XVECLEN (op, i); ++j)
34493 swap_const_vector_halves (XVECEXP (op, i, j));
34497 /* Find all subregs of a vector expression that perform a narrowing,
34498 and adjust the subreg index to account for doubleword swapping. */
34499 static void
34500 adjust_subreg_index (rtx op)
34502 enum rtx_code code = GET_CODE (op);
34503 if (code == SUBREG
34504 && (GET_MODE_SIZE (GET_MODE (op))
34505 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34507 unsigned int index = SUBREG_BYTE (op);
34508 if (index < 8)
34509 index += 8;
34510 else
34511 index -= 8;
34512 SUBREG_BYTE (op) = index;
34515 const char *fmt = GET_RTX_FORMAT (code);
34516 int i,j;
34517 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34518 if (fmt[i] == 'e' || fmt[i] == 'u')
34519 adjust_subreg_index (XEXP (op, i));
34520 else if (fmt[i] == 'E')
34521 for (j = 0; j < XVECLEN (op, i); ++j)
34522 adjust_subreg_index (XVECEXP (op, i, j));
34525 /* Convert the non-permuting load INSN to a permuting one. */
34526 static void
34527 permute_load (rtx_insn *insn)
34529 rtx body = PATTERN (insn);
34530 rtx mem_op = SET_SRC (body);
34531 rtx tgt_reg = SET_DEST (body);
34532 machine_mode mode = GET_MODE (tgt_reg);
34533 int n_elts = GET_MODE_NUNITS (mode);
34534 int half_elts = n_elts / 2;
34535 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34536 int i, j;
34537 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34538 XVECEXP (par, 0, i) = GEN_INT (j);
34539 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34540 XVECEXP (par, 0, i) = GEN_INT (j);
34541 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34542 SET_SRC (body) = sel;
34543 INSN_CODE (insn) = -1; /* Force re-recognition. */
34544 df_insn_rescan (insn);
34546 if (dump_file)
34547 fprintf (dump_file, "Replacing load %d with permuted load\n",
34548 INSN_UID (insn));
34551 /* Convert the non-permuting store INSN to a permuting one. */
34552 static void
34553 permute_store (rtx_insn *insn)
34555 rtx body = PATTERN (insn);
34556 rtx src_reg = SET_SRC (body);
34557 machine_mode mode = GET_MODE (src_reg);
34558 int n_elts = GET_MODE_NUNITS (mode);
34559 int half_elts = n_elts / 2;
34560 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34561 int i, j;
34562 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34563 XVECEXP (par, 0, i) = GEN_INT (j);
34564 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34565 XVECEXP (par, 0, i) = GEN_INT (j);
34566 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34567 SET_SRC (body) = sel;
34568 INSN_CODE (insn) = -1; /* Force re-recognition. */
34569 df_insn_rescan (insn);
34571 if (dump_file)
34572 fprintf (dump_file, "Replacing store %d with permuted store\n",
34573 INSN_UID (insn));
34576 /* Given OP that contains a vector extract operation, adjust the index
34577 of the extracted lane to account for the doubleword swap. */
34578 static void
34579 adjust_extract (rtx_insn *insn)
34581 rtx src = SET_SRC (PATTERN (insn));
34582 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34583 account for that. */
34584 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34585 rtx par = XEXP (sel, 1);
34586 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34587 int lane = INTVAL (XVECEXP (par, 0, 0));
34588 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34589 XVECEXP (par, 0, 0) = GEN_INT (lane);
34590 INSN_CODE (insn) = -1; /* Force re-recognition. */
34591 df_insn_rescan (insn);
34593 if (dump_file)
34594 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34597 /* Given OP that contains a vector direct-splat operation, adjust the index
34598 of the source lane to account for the doubleword swap. */
34599 static void
34600 adjust_splat (rtx_insn *insn)
34602 rtx body = PATTERN (insn);
34603 rtx unspec = XEXP (body, 1);
34604 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34605 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34606 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34607 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34608 INSN_CODE (insn) = -1; /* Force re-recognition. */
34609 df_insn_rescan (insn);
34611 if (dump_file)
34612 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34615 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34616 with special handling. Take care of that here. */
34617 static void
34618 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34620 rtx_insn *insn = insn_entry[i].insn;
34621 rtx body = PATTERN (insn);
34623 switch (insn_entry[i].special_handling)
34625 default:
34626 gcc_unreachable ();
34627 case SH_CONST_VECTOR:
34629 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34630 gcc_assert (GET_CODE (body) == SET);
34631 rtx rhs = SET_SRC (body);
34632 swap_const_vector_halves (rhs);
34633 if (dump_file)
34634 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34635 break;
34637 case SH_SUBREG:
34638 /* A subreg of the same size is already safe. For subregs that
34639 select a smaller portion of a reg, adjust the index for
34640 swapped doublewords. */
34641 adjust_subreg_index (body);
34642 if (dump_file)
34643 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34644 break;
34645 case SH_NOSWAP_LD:
34646 /* Convert a non-permuting load to a permuting one. */
34647 permute_load (insn);
34648 break;
34649 case SH_NOSWAP_ST:
34650 /* Convert a non-permuting store to a permuting one. */
34651 permute_store (insn);
34652 break;
34653 case SH_EXTRACT:
34654 /* Change the lane on an extract operation. */
34655 adjust_extract (insn);
34656 break;
34657 case SH_SPLAT:
34658 /* Change the lane on a direct-splat operation. */
34659 adjust_splat (insn);
34660 break;
34664 /* Find the insn from the Ith table entry, which is known to be a
34665 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34666 static void
34667 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34669 rtx_insn *insn = insn_entry[i].insn;
34670 rtx body = PATTERN (insn);
34671 rtx src_reg = XEXP (SET_SRC (body), 0);
34672 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34673 rtx_insn *new_insn = emit_insn_before (copy, insn);
34674 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34675 df_insn_rescan (new_insn);
34677 if (dump_file)
34679 unsigned int new_uid = INSN_UID (new_insn);
34680 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34683 df_insn_delete (insn);
34684 remove_insn (insn);
34685 insn->set_deleted ();
34688 /* Dump the swap table to DUMP_FILE. */
34689 static void
34690 dump_swap_insn_table (swap_web_entry *insn_entry)
34692 int e = get_max_uid ();
34693 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34695 for (int i = 0; i < e; ++i)
34696 if (insn_entry[i].is_relevant)
34698 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34699 fprintf (dump_file, "%6d %6d ", i,
34700 pred_entry && pred_entry->insn
34701 ? INSN_UID (pred_entry->insn) : 0);
34702 if (insn_entry[i].is_load)
34703 fputs ("load ", dump_file);
34704 if (insn_entry[i].is_store)
34705 fputs ("store ", dump_file);
34706 if (insn_entry[i].is_swap)
34707 fputs ("swap ", dump_file);
34708 if (insn_entry[i].is_live_in)
34709 fputs ("live-in ", dump_file);
34710 if (insn_entry[i].is_live_out)
34711 fputs ("live-out ", dump_file);
34712 if (insn_entry[i].contains_subreg)
34713 fputs ("subreg ", dump_file);
34714 if (insn_entry[i].is_128_int)
34715 fputs ("int128 ", dump_file);
34716 if (insn_entry[i].is_call)
34717 fputs ("call ", dump_file);
34718 if (insn_entry[i].is_swappable)
34720 fputs ("swappable ", dump_file);
34721 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34722 fputs ("special:constvec ", dump_file);
34723 else if (insn_entry[i].special_handling == SH_SUBREG)
34724 fputs ("special:subreg ", dump_file);
34725 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34726 fputs ("special:load ", dump_file);
34727 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34728 fputs ("special:store ", dump_file);
34729 else if (insn_entry[i].special_handling == SH_EXTRACT)
34730 fputs ("special:extract ", dump_file);
34731 else if (insn_entry[i].special_handling == SH_SPLAT)
34732 fputs ("special:splat ", dump_file);
34734 if (insn_entry[i].web_not_optimizable)
34735 fputs ("unoptimizable ", dump_file);
34736 if (insn_entry[i].will_delete)
34737 fputs ("delete ", dump_file);
34738 fputs ("\n", dump_file);
34740 fputs ("\n", dump_file);
34743 /* Main entry point for this pass. */
34744 unsigned int
34745 rs6000_analyze_swaps (function *fun)
34747 swap_web_entry *insn_entry;
34748 basic_block bb;
34749 rtx_insn *insn;
34751 /* Dataflow analysis for use-def chains. */
34752 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34753 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34754 df_analyze ();
34755 df_set_flags (DF_DEFER_INSN_RESCAN);
34757 /* Allocate structure to represent webs of insns. */
34758 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34760 /* Walk the insns to gather basic data. */
34761 FOR_ALL_BB_FN (bb, fun)
34762 FOR_BB_INSNS (bb, insn)
34764 unsigned int uid = INSN_UID (insn);
34765 if (NONDEBUG_INSN_P (insn))
34767 insn_entry[uid].insn = insn;
34769 if (GET_CODE (insn) == CALL_INSN)
34770 insn_entry[uid].is_call = 1;
34772 /* Walk the uses and defs to see if we mention vector regs.
34773 Record any constraints on optimization of such mentions. */
34774 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34775 df_ref mention;
34776 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34778 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34779 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34781 /* If a use gets its value from a call insn, it will be
34782 a hard register and will look like (reg:V4SI 3 3).
34783 The df analysis creates two mentions for GPR3 and GPR4,
34784 both DImode. We must recognize this and treat it as a
34785 vector mention to ensure the call is unioned with this
34786 use. */
34787 if (mode == DImode && DF_REF_INSN_INFO (mention))
34789 rtx feeder = DF_REF_INSN (mention);
34790 /* FIXME: It is pretty hard to get from the df mention
34791 to the mode of the use in the insn. We arbitrarily
34792 pick a vector mode here, even though the use might
34793 be a real DImode. We can be too conservative
34794 (create a web larger than necessary) because of
34795 this, so consider eventually fixing this. */
34796 if (GET_CODE (feeder) == CALL_INSN)
34797 mode = V4SImode;
34800 if (VECTOR_MODE_P (mode) || mode == TImode)
34802 insn_entry[uid].is_relevant = 1;
34803 if (mode == TImode || mode == V1TImode)
34804 insn_entry[uid].is_128_int = 1;
34805 if (DF_REF_INSN_INFO (mention))
34806 insn_entry[uid].contains_subreg
34807 = !rtx_equal_p (DF_REF_REG (mention),
34808 DF_REF_REAL_REG (mention));
34809 union_defs (insn_entry, insn, mention);
34812 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34814 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34815 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34817 /* If we're loading up a hard vector register for a call,
34818 it looks like (set (reg:V4SI 9 9) (...)). The df
34819 analysis creates two mentions for GPR9 and GPR10, both
34820 DImode. So relying on the mode from the mentions
34821 isn't sufficient to ensure we union the call into the
34822 web with the parameter setup code. */
34823 if (mode == DImode && GET_CODE (insn) == SET
34824 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34825 mode = GET_MODE (SET_DEST (insn));
34827 if (VECTOR_MODE_P (mode) || mode == TImode)
34829 insn_entry[uid].is_relevant = 1;
34830 if (mode == TImode || mode == V1TImode)
34831 insn_entry[uid].is_128_int = 1;
34832 if (DF_REF_INSN_INFO (mention))
34833 insn_entry[uid].contains_subreg
34834 = !rtx_equal_p (DF_REF_REG (mention),
34835 DF_REF_REAL_REG (mention));
34836 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34837 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34838 insn_entry[uid].is_live_out = 1;
34839 union_uses (insn_entry, insn, mention);
34843 if (insn_entry[uid].is_relevant)
34845 /* Determine if this is a load or store. */
34846 insn_entry[uid].is_load = insn_is_load_p (insn);
34847 insn_entry[uid].is_store = insn_is_store_p (insn);
34849 /* Determine if this is a doubleword swap. If not,
34850 determine whether it can legally be swapped. */
34851 if (insn_is_swap_p (insn))
34852 insn_entry[uid].is_swap = 1;
34853 else
34855 unsigned int special = SH_NONE;
34856 insn_entry[uid].is_swappable
34857 = insn_is_swappable_p (insn_entry, insn, &special);
34858 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34859 insn_entry[uid].is_swappable = 0;
34860 else if (special != SH_NONE)
34861 insn_entry[uid].special_handling = special;
34862 else if (insn_entry[uid].contains_subreg)
34863 insn_entry[uid].special_handling = SH_SUBREG;
34869 if (dump_file)
34871 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34872 dump_swap_insn_table (insn_entry);
34875 /* Record unoptimizable webs. */
34876 unsigned e = get_max_uid (), i;
34877 for (i = 0; i < e; ++i)
34879 if (!insn_entry[i].is_relevant)
34880 continue;
34882 swap_web_entry *root
34883 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34885 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34886 || (insn_entry[i].contains_subreg
34887 && insn_entry[i].special_handling != SH_SUBREG)
34888 || insn_entry[i].is_128_int || insn_entry[i].is_call
34889 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34890 root->web_not_optimizable = 1;
34892 /* If we have loads or stores that aren't permuting then the
34893 optimization isn't appropriate. */
34894 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34895 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34896 root->web_not_optimizable = 1;
34898 /* If we have permuting loads or stores that are not accompanied
34899 by a register swap, the optimization isn't appropriate. */
34900 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34902 rtx insn = insn_entry[i].insn;
34903 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34904 df_ref def;
34906 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34908 struct df_link *link = DF_REF_CHAIN (def);
34910 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34912 root->web_not_optimizable = 1;
34913 break;
34917 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34919 rtx insn = insn_entry[i].insn;
34920 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34921 df_ref use;
34923 FOR_EACH_INSN_INFO_USE (use, insn_info)
34925 struct df_link *link = DF_REF_CHAIN (use);
34927 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34929 root->web_not_optimizable = 1;
34930 break;
34936 if (dump_file)
34938 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34939 dump_swap_insn_table (insn_entry);
34942 /* For each load and store in an optimizable web (which implies
34943 the loads and stores are permuting), find the associated
34944 register swaps and mark them for removal. Due to various
34945 optimizations we may mark the same swap more than once. Also
34946 perform special handling for swappable insns that require it. */
34947 for (i = 0; i < e; ++i)
34948 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34949 && insn_entry[i].is_swap)
34951 swap_web_entry* root_entry
34952 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34953 if (!root_entry->web_not_optimizable)
34954 mark_swaps_for_removal (insn_entry, i);
34956 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34958 swap_web_entry* root_entry
34959 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34960 if (!root_entry->web_not_optimizable)
34961 handle_special_swappables (insn_entry, i);
34964 /* Now delete the swaps marked for removal. */
34965 for (i = 0; i < e; ++i)
34966 if (insn_entry[i].will_delete)
34967 replace_swap_with_copy (insn_entry, i);
34969 /* Clean up. */
34970 free (insn_entry);
34971 return 0;
34974 const pass_data pass_data_analyze_swaps =
34976 RTL_PASS, /* type */
34977 "swaps", /* name */
34978 OPTGROUP_NONE, /* optinfo_flags */
34979 TV_NONE, /* tv_id */
34980 0, /* properties_required */
34981 0, /* properties_provided */
34982 0, /* properties_destroyed */
34983 0, /* todo_flags_start */
34984 TODO_df_finish, /* todo_flags_finish */
34987 class pass_analyze_swaps : public rtl_opt_pass
34989 public:
34990 pass_analyze_swaps(gcc::context *ctxt)
34991 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34994 /* opt_pass methods: */
34995 virtual bool gate (function *)
34997 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34998 && rs6000_optimize_swaps);
35001 virtual unsigned int execute (function *fun)
35003 return rs6000_analyze_swaps (fun);
35006 }; // class pass_analyze_swaps
35008 rtl_opt_pass *
35009 make_pass_analyze_swaps (gcc::context *ctxt)
35011 return new pass_analyze_swaps (ctxt);
35014 #ifdef RS6000_GLIBC_ATOMIC_FENV
35015 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
35016 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
35017 #endif
35019 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
35021 static void
35022 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
35024 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
35026 #ifdef RS6000_GLIBC_ATOMIC_FENV
35027 if (atomic_hold_decl == NULL_TREE)
35029 atomic_hold_decl
35030 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35031 get_identifier ("__atomic_feholdexcept"),
35032 build_function_type_list (void_type_node,
35033 double_ptr_type_node,
35034 NULL_TREE));
35035 TREE_PUBLIC (atomic_hold_decl) = 1;
35036 DECL_EXTERNAL (atomic_hold_decl) = 1;
35039 if (atomic_clear_decl == NULL_TREE)
35041 atomic_clear_decl
35042 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35043 get_identifier ("__atomic_feclearexcept"),
35044 build_function_type_list (void_type_node,
35045 NULL_TREE));
35046 TREE_PUBLIC (atomic_clear_decl) = 1;
35047 DECL_EXTERNAL (atomic_clear_decl) = 1;
35050 tree const_double = build_qualified_type (double_type_node,
35051 TYPE_QUAL_CONST);
35052 tree const_double_ptr = build_pointer_type (const_double);
35053 if (atomic_update_decl == NULL_TREE)
35055 atomic_update_decl
35056 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35057 get_identifier ("__atomic_feupdateenv"),
35058 build_function_type_list (void_type_node,
35059 const_double_ptr,
35060 NULL_TREE));
35061 TREE_PUBLIC (atomic_update_decl) = 1;
35062 DECL_EXTERNAL (atomic_update_decl) = 1;
35065 tree fenv_var = create_tmp_var (double_type_node);
35066 mark_addressable (fenv_var);
35067 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
35069 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
35070 *clear = build_call_expr (atomic_clear_decl, 0);
35071 *update = build_call_expr (atomic_update_decl, 1,
35072 fold_convert (const_double_ptr, fenv_addr));
35073 #endif
35074 return;
35077 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
35078 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
35079 tree call_mffs = build_call_expr (mffs, 0);
35081 /* Generates the equivalent of feholdexcept (&fenv_var)
35083 *fenv_var = __builtin_mffs ();
35084 double fenv_hold;
35085 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
35086 __builtin_mtfsf (0xff, fenv_hold); */
35088 /* Mask to clear everything except for the rounding modes and non-IEEE
35089 arithmetic flag. */
35090 const unsigned HOST_WIDE_INT hold_exception_mask =
35091 HOST_WIDE_INT_C (0xffffffff00000007);
35093 tree fenv_var = create_tmp_var (double_type_node);
35095 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
35097 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
35098 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35099 build_int_cst (uint64_type_node,
35100 hold_exception_mask));
35102 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35103 fenv_llu_and);
35105 tree hold_mtfsf = build_call_expr (mtfsf, 2,
35106 build_int_cst (unsigned_type_node, 0xff),
35107 fenv_hold_mtfsf);
35109 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
35111 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
35113 double fenv_clear = __builtin_mffs ();
35114 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
35115 __builtin_mtfsf (0xff, fenv_clear); */
35117 /* Mask to clear everything except for the rounding modes and non-IEEE
35118 arithmetic flag. */
35119 const unsigned HOST_WIDE_INT clear_exception_mask =
35120 HOST_WIDE_INT_C (0xffffffff00000000);
35122 tree fenv_clear = create_tmp_var (double_type_node);
35124 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
35126 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
35127 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
35128 fenv_clean_llu,
35129 build_int_cst (uint64_type_node,
35130 clear_exception_mask));
35132 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35133 fenv_clear_llu_and);
35135 tree clear_mtfsf = build_call_expr (mtfsf, 2,
35136 build_int_cst (unsigned_type_node, 0xff),
35137 fenv_clear_mtfsf);
35139 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
35141 /* Generates the equivalent of feupdateenv (&fenv_var)
35143 double old_fenv = __builtin_mffs ();
35144 double fenv_update;
35145 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
35146 (*(uint64_t*)fenv_var 0x1ff80fff);
35147 __builtin_mtfsf (0xff, fenv_update); */
35149 const unsigned HOST_WIDE_INT update_exception_mask =
35150 HOST_WIDE_INT_C (0xffffffff1fffff00);
35151 const unsigned HOST_WIDE_INT new_exception_mask =
35152 HOST_WIDE_INT_C (0x1ff80fff);
35154 tree old_fenv = create_tmp_var (double_type_node);
35155 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
35157 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
35158 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
35159 build_int_cst (uint64_type_node,
35160 update_exception_mask));
35162 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35163 build_int_cst (uint64_type_node,
35164 new_exception_mask));
35166 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
35167 old_llu_and, new_llu_and);
35169 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35170 new_llu_mask);
35172 tree update_mtfsf = build_call_expr (mtfsf, 2,
35173 build_int_cst (unsigned_type_node, 0xff),
35174 fenv_update_mtfsf);
35176 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35180 struct gcc_target targetm = TARGET_INITIALIZER;
35182 #include "gt-rs6000.h"