rs6000.c (rs6000_emit_allocate_stack): Return stack adjusting insn.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
bloba590ef4dae41dbaf8ae777c8bee50aba3ccfc619
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "hash-set.h"
35 #include "machmode.h"
36 #include "vec.h"
37 #include "double-int.h"
38 #include "input.h"
39 #include "alias.h"
40 #include "symtab.h"
41 #include "wide-int.h"
42 #include "inchash.h"
43 #include "tree.h"
44 #include "fold-const.h"
45 #include "stringpool.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "print-tree.h"
49 #include "varasm.h"
50 #include "hashtab.h"
51 #include "function.h"
52 #include "statistics.h"
53 #include "real.h"
54 #include "fixed-value.h"
55 #include "expmed.h"
56 #include "dojump.h"
57 #include "explow.h"
58 #include "emit-rtl.h"
59 #include "stmt.h"
60 #include "expr.h"
61 #include "insn-codes.h"
62 #include "optabs.h"
63 #include "except.h"
64 #include "output.h"
65 #include "dbxout.h"
66 #include "predict.h"
67 #include "dominance.h"
68 #include "cfg.h"
69 #include "cfgrtl.h"
70 #include "cfganal.h"
71 #include "lcm.h"
72 #include "cfgbuild.h"
73 #include "cfgcleanup.h"
74 #include "basic-block.h"
75 #include "diagnostic-core.h"
76 #include "toplev.h"
77 #include "ggc.h"
78 #include "tm_p.h"
79 #include "target.h"
80 #include "target-def.h"
81 #include "common/common-target.h"
82 #include "langhooks.h"
83 #include "reload.h"
84 #include "cfgloop.h"
85 #include "sched-int.h"
86 #include "hash-table.h"
87 #include "tree-ssa-alias.h"
88 #include "internal-fn.h"
89 #include "gimple-fold.h"
90 #include "tree-eh.h"
91 #include "gimple-expr.h"
92 #include "is-a.h"
93 #include "gimple.h"
94 #include "gimplify.h"
95 #include "gimple-iterator.h"
96 #include "gimple-walk.h"
97 #include "intl.h"
98 #include "params.h"
99 #include "tm-constrs.h"
100 #include "ira.h"
101 #include "opts.h"
102 #include "tree-vectorizer.h"
103 #include "dumpfile.h"
104 #include "hash-map.h"
105 #include "plugin-api.h"
106 #include "ipa-ref.h"
107 #include "cgraph.h"
108 #include "target-globals.h"
109 #include "builtins.h"
110 #include "context.h"
111 #include "tree-pass.h"
112 #if TARGET_XCOFF
113 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
114 #endif
115 #if TARGET_MACHO
116 #include "gstab.h" /* for N_SLINE */
117 #endif
119 #ifndef TARGET_NO_PROTOTYPE
120 #define TARGET_NO_PROTOTYPE 0
121 #endif
123 #define min(A,B) ((A) < (B) ? (A) : (B))
124 #define max(A,B) ((A) > (B) ? (A) : (B))
126 /* Structure used to define the rs6000 stack */
127 typedef struct rs6000_stack {
128 int reload_completed; /* stack info won't change from here on */
129 int first_gp_reg_save; /* first callee saved GP register used */
130 int first_fp_reg_save; /* first callee saved FP register used */
131 int first_altivec_reg_save; /* first callee saved AltiVec register used */
132 int lr_save_p; /* true if the link reg needs to be saved */
133 int cr_save_p; /* true if the CR reg needs to be saved */
134 unsigned int vrsave_mask; /* mask of vec registers to save */
135 int push_p; /* true if we need to allocate stack space */
136 int calls_p; /* true if the function makes any calls */
137 int world_save_p; /* true if we're saving *everything*:
138 r13-r31, cr, f14-f31, vrsave, v20-v31 */
139 enum rs6000_abi abi; /* which ABI to use */
140 int gp_save_offset; /* offset to save GP regs from initial SP */
141 int fp_save_offset; /* offset to save FP regs from initial SP */
142 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
143 int lr_save_offset; /* offset to save LR from initial SP */
144 int cr_save_offset; /* offset to save CR from initial SP */
145 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
146 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
147 int varargs_save_offset; /* offset to save the varargs registers */
148 int ehrd_offset; /* offset to EH return data */
149 int ehcr_offset; /* offset to EH CR field data */
150 int reg_size; /* register size (4 or 8) */
151 HOST_WIDE_INT vars_size; /* variable save area size */
152 int parm_size; /* outgoing parameter size */
153 int save_size; /* save area size */
154 int fixed_size; /* fixed size of stack frame */
155 int gp_size; /* size of saved GP registers */
156 int fp_size; /* size of saved FP registers */
157 int altivec_size; /* size of saved AltiVec registers */
158 int cr_size; /* size to hold CR if not in fixed area */
159 int vrsave_size; /* size to hold VRSAVE */
160 int altivec_padding_size; /* size of altivec alignment padding */
161 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
162 int spe_padding_size;
163 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
164 int spe_64bit_regs_used;
165 int savres_strategy;
166 } rs6000_stack_t;
168 /* A C structure for machine-specific, per-function data.
169 This is added to the cfun structure. */
170 typedef struct GTY(()) machine_function
172 /* Whether the instruction chain has been scanned already. */
173 int insn_chain_scanned_p;
174 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
175 int ra_needs_full_frame;
176 /* Flags if __builtin_return_address (0) was used. */
177 int ra_need_lr;
178 /* Cache lr_save_p after expansion of builtin_eh_return. */
179 int lr_save_state;
180 /* Whether we need to save the TOC to the reserved stack location in the
181 function prologue. */
182 bool save_toc_in_prologue;
183 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
184 varargs save area. */
185 HOST_WIDE_INT varargs_save_offset;
186 /* Temporary stack slot to use for SDmode copies. This slot is
187 64-bits wide and is allocated early enough so that the offset
188 does not overflow the 16-bit load/store offset field. */
189 rtx sdmode_stack_slot;
190 /* Alternative internal arg pointer for -fsplit-stack. */
191 rtx split_stack_arg_pointer;
192 /* Flag if r2 setup is needed with ELFv2 ABI. */
193 bool r2_setup_needed;
194 } machine_function;
196 /* Support targetm.vectorize.builtin_mask_for_load. */
197 static GTY(()) tree altivec_builtin_mask_for_load;
199 /* Set to nonzero once AIX common-mode calls have been defined. */
200 static GTY(()) int common_mode_defined;
202 /* Label number of label created for -mrelocatable, to call to so we can
203 get the address of the GOT section */
204 static int rs6000_pic_labelno;
206 #ifdef USING_ELFOS_H
207 /* Counter for labels which are to be placed in .fixup. */
208 int fixuplabelno = 0;
209 #endif
211 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
212 int dot_symbols;
214 /* Specify the machine mode that pointers have. After generation of rtl, the
215 compiler makes no further distinction between pointers and any other objects
216 of this machine mode. The type is unsigned since not all things that
217 include rs6000.h also include machmode.h. */
218 unsigned rs6000_pmode;
220 /* Width in bits of a pointer. */
221 unsigned rs6000_pointer_size;
223 #ifdef HAVE_AS_GNU_ATTRIBUTE
224 /* Flag whether floating point values have been passed/returned. */
225 static bool rs6000_passes_float;
226 /* Flag whether vector values have been passed/returned. */
227 static bool rs6000_passes_vector;
228 /* Flag whether small (<= 8 byte) structures have been returned. */
229 static bool rs6000_returns_struct;
230 #endif
232 /* Value is TRUE if register/mode pair is acceptable. */
233 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
235 /* Maximum number of registers needed for a given register class and mode. */
236 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
238 /* How many registers are needed for a given register and mode. */
239 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
241 /* Map register number to register class. */
242 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
244 static int dbg_cost_ctrl;
246 /* Built in types. */
247 tree rs6000_builtin_types[RS6000_BTI_MAX];
248 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
250 /* Flag to say the TOC is initialized */
251 int toc_initialized;
252 char toc_label_name[10];
254 /* Cached value of rs6000_variable_issue. This is cached in
255 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
256 static short cached_can_issue_more;
258 static GTY(()) section *read_only_data_section;
259 static GTY(()) section *private_data_section;
260 static GTY(()) section *tls_data_section;
261 static GTY(()) section *tls_private_data_section;
262 static GTY(()) section *read_only_private_data_section;
263 static GTY(()) section *sdata2_section;
264 static GTY(()) section *toc_section;
266 struct builtin_description
268 const HOST_WIDE_INT mask;
269 const enum insn_code icode;
270 const char *const name;
271 const enum rs6000_builtins code;
274 /* Describe the vector unit used for modes. */
275 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
276 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
278 /* Register classes for various constraints that are based on the target
279 switches. */
280 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
282 /* Describe the alignment of a vector. */
283 int rs6000_vector_align[NUM_MACHINE_MODES];
285 /* Map selected modes to types for builtins. */
286 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
288 /* What modes to automatically generate reciprocal divide estimate (fre) and
289 reciprocal sqrt (frsqrte) for. */
290 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
292 /* Masks to determine which reciprocal esitmate instructions to generate
293 automatically. */
294 enum rs6000_recip_mask {
295 RECIP_SF_DIV = 0x001, /* Use divide estimate */
296 RECIP_DF_DIV = 0x002,
297 RECIP_V4SF_DIV = 0x004,
298 RECIP_V2DF_DIV = 0x008,
300 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
301 RECIP_DF_RSQRT = 0x020,
302 RECIP_V4SF_RSQRT = 0x040,
303 RECIP_V2DF_RSQRT = 0x080,
305 /* Various combination of flags for -mrecip=xxx. */
306 RECIP_NONE = 0,
307 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
308 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
309 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
311 RECIP_HIGH_PRECISION = RECIP_ALL,
313 /* On low precision machines like the power5, don't enable double precision
314 reciprocal square root estimate, since it isn't accurate enough. */
315 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
318 /* -mrecip options. */
319 static struct
321 const char *string; /* option name */
322 unsigned int mask; /* mask bits to set */
323 } recip_options[] = {
324 { "all", RECIP_ALL },
325 { "none", RECIP_NONE },
326 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
327 | RECIP_V2DF_DIV) },
328 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
329 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
330 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
331 | RECIP_V2DF_RSQRT) },
332 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
333 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
336 /* Pointer to function (in rs6000-c.c) that can define or undefine target
337 macros that have changed. Languages that don't support the preprocessor
338 don't link in rs6000-c.c, so we can't call it directly. */
339 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
341 /* Simplfy register classes into simpler classifications. We assume
342 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
343 check for standard register classes (gpr/floating/altivec/vsx) and
344 floating/vector classes (float/altivec/vsx). */
346 enum rs6000_reg_type {
347 NO_REG_TYPE,
348 PSEUDO_REG_TYPE,
349 GPR_REG_TYPE,
350 VSX_REG_TYPE,
351 ALTIVEC_REG_TYPE,
352 FPR_REG_TYPE,
353 SPR_REG_TYPE,
354 CR_REG_TYPE,
355 SPE_ACC_TYPE,
356 SPEFSCR_REG_TYPE
359 /* Map register class to register type. */
360 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
362 /* First/last register type for the 'normal' register types (i.e. general
363 purpose, floating point, altivec, and VSX registers). */
364 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
366 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
369 /* Register classes we care about in secondary reload or go if legitimate
370 address. We only need to worry about GPR, FPR, and Altivec registers here,
371 along an ANY field that is the OR of the 3 register classes. */
373 enum rs6000_reload_reg_type {
374 RELOAD_REG_GPR, /* General purpose registers. */
375 RELOAD_REG_FPR, /* Traditional floating point regs. */
376 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
377 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
378 N_RELOAD_REG
381 /* For setting up register classes, loop through the 3 register classes mapping
382 into real registers, and skip the ANY class, which is just an OR of the
383 bits. */
384 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
385 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
387 /* Map reload register type to a register in the register class. */
388 struct reload_reg_map_type {
389 const char *name; /* Register class name. */
390 int reg; /* Register in the register class. */
393 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
394 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
395 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
396 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
397 { "Any", -1 }, /* RELOAD_REG_ANY. */
400 /* Mask bits for each register class, indexed per mode. Historically the
401 compiler has been more restrictive which types can do PRE_MODIFY instead of
402 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
403 typedef unsigned char addr_mask_type;
405 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
406 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
407 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
408 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
409 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
410 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
411 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
413 /* Register type masks based on the type, of valid addressing modes. */
414 struct rs6000_reg_addr {
415 enum insn_code reload_load; /* INSN to reload for loading. */
416 enum insn_code reload_store; /* INSN to reload for storing. */
417 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
418 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
419 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
420 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
421 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
424 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
426 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
427 static inline bool
428 mode_supports_pre_incdec_p (machine_mode mode)
430 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
431 != 0);
434 /* Helper function to say whether a mode supports PRE_MODIFY. */
435 static inline bool
436 mode_supports_pre_modify_p (machine_mode mode)
438 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
439 != 0);
443 /* Target cpu costs. */
445 struct processor_costs {
446 const int mulsi; /* cost of SImode multiplication. */
447 const int mulsi_const; /* cost of SImode multiplication by constant. */
448 const int mulsi_const9; /* cost of SImode mult by short constant. */
449 const int muldi; /* cost of DImode multiplication. */
450 const int divsi; /* cost of SImode division. */
451 const int divdi; /* cost of DImode division. */
452 const int fp; /* cost of simple SFmode and DFmode insns. */
453 const int dmul; /* cost of DFmode multiplication (and fmadd). */
454 const int sdiv; /* cost of SFmode division (fdivs). */
455 const int ddiv; /* cost of DFmode division (fdiv). */
456 const int cache_line_size; /* cache line size in bytes. */
457 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
458 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
459 const int simultaneous_prefetches; /* number of parallel prefetch
460 operations. */
461 const int sfdf_convert; /* cost of SF->DF conversion. */
464 const struct processor_costs *rs6000_cost;
466 /* Processor costs (relative to an add) */
468 /* Instruction size costs on 32bit processors. */
469 static const
470 struct processor_costs size32_cost = {
471 COSTS_N_INSNS (1), /* mulsi */
472 COSTS_N_INSNS (1), /* mulsi_const */
473 COSTS_N_INSNS (1), /* mulsi_const9 */
474 COSTS_N_INSNS (1), /* muldi */
475 COSTS_N_INSNS (1), /* divsi */
476 COSTS_N_INSNS (1), /* divdi */
477 COSTS_N_INSNS (1), /* fp */
478 COSTS_N_INSNS (1), /* dmul */
479 COSTS_N_INSNS (1), /* sdiv */
480 COSTS_N_INSNS (1), /* ddiv */
481 32, /* cache line size */
482 0, /* l1 cache */
483 0, /* l2 cache */
484 0, /* streams */
485 0, /* SF->DF convert */
488 /* Instruction size costs on 64bit processors. */
489 static const
490 struct processor_costs size64_cost = {
491 COSTS_N_INSNS (1), /* mulsi */
492 COSTS_N_INSNS (1), /* mulsi_const */
493 COSTS_N_INSNS (1), /* mulsi_const9 */
494 COSTS_N_INSNS (1), /* muldi */
495 COSTS_N_INSNS (1), /* divsi */
496 COSTS_N_INSNS (1), /* divdi */
497 COSTS_N_INSNS (1), /* fp */
498 COSTS_N_INSNS (1), /* dmul */
499 COSTS_N_INSNS (1), /* sdiv */
500 COSTS_N_INSNS (1), /* ddiv */
501 128, /* cache line size */
502 0, /* l1 cache */
503 0, /* l2 cache */
504 0, /* streams */
505 0, /* SF->DF convert */
508 /* Instruction costs on RS64A processors. */
509 static const
510 struct processor_costs rs64a_cost = {
511 COSTS_N_INSNS (20), /* mulsi */
512 COSTS_N_INSNS (12), /* mulsi_const */
513 COSTS_N_INSNS (8), /* mulsi_const9 */
514 COSTS_N_INSNS (34), /* muldi */
515 COSTS_N_INSNS (65), /* divsi */
516 COSTS_N_INSNS (67), /* divdi */
517 COSTS_N_INSNS (4), /* fp */
518 COSTS_N_INSNS (4), /* dmul */
519 COSTS_N_INSNS (31), /* sdiv */
520 COSTS_N_INSNS (31), /* ddiv */
521 128, /* cache line size */
522 128, /* l1 cache */
523 2048, /* l2 cache */
524 1, /* streams */
525 0, /* SF->DF convert */
528 /* Instruction costs on MPCCORE processors. */
529 static const
530 struct processor_costs mpccore_cost = {
531 COSTS_N_INSNS (2), /* mulsi */
532 COSTS_N_INSNS (2), /* mulsi_const */
533 COSTS_N_INSNS (2), /* mulsi_const9 */
534 COSTS_N_INSNS (2), /* muldi */
535 COSTS_N_INSNS (6), /* divsi */
536 COSTS_N_INSNS (6), /* divdi */
537 COSTS_N_INSNS (4), /* fp */
538 COSTS_N_INSNS (5), /* dmul */
539 COSTS_N_INSNS (10), /* sdiv */
540 COSTS_N_INSNS (17), /* ddiv */
541 32, /* cache line size */
542 4, /* l1 cache */
543 16, /* l2 cache */
544 1, /* streams */
545 0, /* SF->DF convert */
548 /* Instruction costs on PPC403 processors. */
549 static const
550 struct processor_costs ppc403_cost = {
551 COSTS_N_INSNS (4), /* mulsi */
552 COSTS_N_INSNS (4), /* mulsi_const */
553 COSTS_N_INSNS (4), /* mulsi_const9 */
554 COSTS_N_INSNS (4), /* muldi */
555 COSTS_N_INSNS (33), /* divsi */
556 COSTS_N_INSNS (33), /* divdi */
557 COSTS_N_INSNS (11), /* fp */
558 COSTS_N_INSNS (11), /* dmul */
559 COSTS_N_INSNS (11), /* sdiv */
560 COSTS_N_INSNS (11), /* ddiv */
561 32, /* cache line size */
562 4, /* l1 cache */
563 16, /* l2 cache */
564 1, /* streams */
565 0, /* SF->DF convert */
568 /* Instruction costs on PPC405 processors. */
569 static const
570 struct processor_costs ppc405_cost = {
571 COSTS_N_INSNS (5), /* mulsi */
572 COSTS_N_INSNS (4), /* mulsi_const */
573 COSTS_N_INSNS (3), /* mulsi_const9 */
574 COSTS_N_INSNS (5), /* muldi */
575 COSTS_N_INSNS (35), /* divsi */
576 COSTS_N_INSNS (35), /* divdi */
577 COSTS_N_INSNS (11), /* fp */
578 COSTS_N_INSNS (11), /* dmul */
579 COSTS_N_INSNS (11), /* sdiv */
580 COSTS_N_INSNS (11), /* ddiv */
581 32, /* cache line size */
582 16, /* l1 cache */
583 128, /* l2 cache */
584 1, /* streams */
585 0, /* SF->DF convert */
588 /* Instruction costs on PPC440 processors. */
589 static const
590 struct processor_costs ppc440_cost = {
591 COSTS_N_INSNS (3), /* mulsi */
592 COSTS_N_INSNS (2), /* mulsi_const */
593 COSTS_N_INSNS (2), /* mulsi_const9 */
594 COSTS_N_INSNS (3), /* muldi */
595 COSTS_N_INSNS (34), /* divsi */
596 COSTS_N_INSNS (34), /* divdi */
597 COSTS_N_INSNS (5), /* fp */
598 COSTS_N_INSNS (5), /* dmul */
599 COSTS_N_INSNS (19), /* sdiv */
600 COSTS_N_INSNS (33), /* ddiv */
601 32, /* cache line size */
602 32, /* l1 cache */
603 256, /* l2 cache */
604 1, /* streams */
605 0, /* SF->DF convert */
608 /* Instruction costs on PPC476 processors. */
609 static const
610 struct processor_costs ppc476_cost = {
611 COSTS_N_INSNS (4), /* mulsi */
612 COSTS_N_INSNS (4), /* mulsi_const */
613 COSTS_N_INSNS (4), /* mulsi_const9 */
614 COSTS_N_INSNS (4), /* muldi */
615 COSTS_N_INSNS (11), /* divsi */
616 COSTS_N_INSNS (11), /* divdi */
617 COSTS_N_INSNS (6), /* fp */
618 COSTS_N_INSNS (6), /* dmul */
619 COSTS_N_INSNS (19), /* sdiv */
620 COSTS_N_INSNS (33), /* ddiv */
621 32, /* l1 cache line size */
622 32, /* l1 cache */
623 512, /* l2 cache */
624 1, /* streams */
625 0, /* SF->DF convert */
628 /* Instruction costs on PPC601 processors. */
629 static const
630 struct processor_costs ppc601_cost = {
631 COSTS_N_INSNS (5), /* mulsi */
632 COSTS_N_INSNS (5), /* mulsi_const */
633 COSTS_N_INSNS (5), /* mulsi_const9 */
634 COSTS_N_INSNS (5), /* muldi */
635 COSTS_N_INSNS (36), /* divsi */
636 COSTS_N_INSNS (36), /* divdi */
637 COSTS_N_INSNS (4), /* fp */
638 COSTS_N_INSNS (5), /* dmul */
639 COSTS_N_INSNS (17), /* sdiv */
640 COSTS_N_INSNS (31), /* ddiv */
641 32, /* cache line size */
642 32, /* l1 cache */
643 256, /* l2 cache */
644 1, /* streams */
645 0, /* SF->DF convert */
648 /* Instruction costs on PPC603 processors. */
649 static const
650 struct processor_costs ppc603_cost = {
651 COSTS_N_INSNS (5), /* mulsi */
652 COSTS_N_INSNS (3), /* mulsi_const */
653 COSTS_N_INSNS (2), /* mulsi_const9 */
654 COSTS_N_INSNS (5), /* muldi */
655 COSTS_N_INSNS (37), /* divsi */
656 COSTS_N_INSNS (37), /* divdi */
657 COSTS_N_INSNS (3), /* fp */
658 COSTS_N_INSNS (4), /* dmul */
659 COSTS_N_INSNS (18), /* sdiv */
660 COSTS_N_INSNS (33), /* ddiv */
661 32, /* cache line size */
662 8, /* l1 cache */
663 64, /* l2 cache */
664 1, /* streams */
665 0, /* SF->DF convert */
668 /* Instruction costs on PPC604 processors. */
669 static const
670 struct processor_costs ppc604_cost = {
671 COSTS_N_INSNS (4), /* mulsi */
672 COSTS_N_INSNS (4), /* mulsi_const */
673 COSTS_N_INSNS (4), /* mulsi_const9 */
674 COSTS_N_INSNS (4), /* muldi */
675 COSTS_N_INSNS (20), /* divsi */
676 COSTS_N_INSNS (20), /* divdi */
677 COSTS_N_INSNS (3), /* fp */
678 COSTS_N_INSNS (3), /* dmul */
679 COSTS_N_INSNS (18), /* sdiv */
680 COSTS_N_INSNS (32), /* ddiv */
681 32, /* cache line size */
682 16, /* l1 cache */
683 512, /* l2 cache */
684 1, /* streams */
685 0, /* SF->DF convert */
688 /* Instruction costs on PPC604e processors. */
689 static const
690 struct processor_costs ppc604e_cost = {
691 COSTS_N_INSNS (2), /* mulsi */
692 COSTS_N_INSNS (2), /* mulsi_const */
693 COSTS_N_INSNS (2), /* mulsi_const9 */
694 COSTS_N_INSNS (2), /* muldi */
695 COSTS_N_INSNS (20), /* divsi */
696 COSTS_N_INSNS (20), /* divdi */
697 COSTS_N_INSNS (3), /* fp */
698 COSTS_N_INSNS (3), /* dmul */
699 COSTS_N_INSNS (18), /* sdiv */
700 COSTS_N_INSNS (32), /* ddiv */
701 32, /* cache line size */
702 32, /* l1 cache */
703 1024, /* l2 cache */
704 1, /* streams */
705 0, /* SF->DF convert */
708 /* Instruction costs on PPC620 processors. */
709 static const
710 struct processor_costs ppc620_cost = {
711 COSTS_N_INSNS (5), /* mulsi */
712 COSTS_N_INSNS (4), /* mulsi_const */
713 COSTS_N_INSNS (3), /* mulsi_const9 */
714 COSTS_N_INSNS (7), /* muldi */
715 COSTS_N_INSNS (21), /* divsi */
716 COSTS_N_INSNS (37), /* divdi */
717 COSTS_N_INSNS (3), /* fp */
718 COSTS_N_INSNS (3), /* dmul */
719 COSTS_N_INSNS (18), /* sdiv */
720 COSTS_N_INSNS (32), /* ddiv */
721 128, /* cache line size */
722 32, /* l1 cache */
723 1024, /* l2 cache */
724 1, /* streams */
725 0, /* SF->DF convert */
728 /* Instruction costs on PPC630 processors. */
729 static const
730 struct processor_costs ppc630_cost = {
731 COSTS_N_INSNS (5), /* mulsi */
732 COSTS_N_INSNS (4), /* mulsi_const */
733 COSTS_N_INSNS (3), /* mulsi_const9 */
734 COSTS_N_INSNS (7), /* muldi */
735 COSTS_N_INSNS (21), /* divsi */
736 COSTS_N_INSNS (37), /* divdi */
737 COSTS_N_INSNS (3), /* fp */
738 COSTS_N_INSNS (3), /* dmul */
739 COSTS_N_INSNS (17), /* sdiv */
740 COSTS_N_INSNS (21), /* ddiv */
741 128, /* cache line size */
742 64, /* l1 cache */
743 1024, /* l2 cache */
744 1, /* streams */
745 0, /* SF->DF convert */
748 /* Instruction costs on Cell processor. */
749 /* COSTS_N_INSNS (1) ~ one add. */
750 static const
751 struct processor_costs ppccell_cost = {
752 COSTS_N_INSNS (9/2)+2, /* mulsi */
753 COSTS_N_INSNS (6/2), /* mulsi_const */
754 COSTS_N_INSNS (6/2), /* mulsi_const9 */
755 COSTS_N_INSNS (15/2)+2, /* muldi */
756 COSTS_N_INSNS (38/2), /* divsi */
757 COSTS_N_INSNS (70/2), /* divdi */
758 COSTS_N_INSNS (10/2), /* fp */
759 COSTS_N_INSNS (10/2), /* dmul */
760 COSTS_N_INSNS (74/2), /* sdiv */
761 COSTS_N_INSNS (74/2), /* ddiv */
762 128, /* cache line size */
763 32, /* l1 cache */
764 512, /* l2 cache */
765 6, /* streams */
766 0, /* SF->DF convert */
769 /* Instruction costs on PPC750 and PPC7400 processors. */
770 static const
771 struct processor_costs ppc750_cost = {
772 COSTS_N_INSNS (5), /* mulsi */
773 COSTS_N_INSNS (3), /* mulsi_const */
774 COSTS_N_INSNS (2), /* mulsi_const9 */
775 COSTS_N_INSNS (5), /* muldi */
776 COSTS_N_INSNS (17), /* divsi */
777 COSTS_N_INSNS (17), /* divdi */
778 COSTS_N_INSNS (3), /* fp */
779 COSTS_N_INSNS (3), /* dmul */
780 COSTS_N_INSNS (17), /* sdiv */
781 COSTS_N_INSNS (31), /* ddiv */
782 32, /* cache line size */
783 32, /* l1 cache */
784 512, /* l2 cache */
785 1, /* streams */
786 0, /* SF->DF convert */
789 /* Instruction costs on PPC7450 processors. */
790 static const
791 struct processor_costs ppc7450_cost = {
792 COSTS_N_INSNS (4), /* mulsi */
793 COSTS_N_INSNS (3), /* mulsi_const */
794 COSTS_N_INSNS (3), /* mulsi_const9 */
795 COSTS_N_INSNS (4), /* muldi */
796 COSTS_N_INSNS (23), /* divsi */
797 COSTS_N_INSNS (23), /* divdi */
798 COSTS_N_INSNS (5), /* fp */
799 COSTS_N_INSNS (5), /* dmul */
800 COSTS_N_INSNS (21), /* sdiv */
801 COSTS_N_INSNS (35), /* ddiv */
802 32, /* cache line size */
803 32, /* l1 cache */
804 1024, /* l2 cache */
805 1, /* streams */
806 0, /* SF->DF convert */
809 /* Instruction costs on PPC8540 processors. */
810 static const
811 struct processor_costs ppc8540_cost = {
812 COSTS_N_INSNS (4), /* mulsi */
813 COSTS_N_INSNS (4), /* mulsi_const */
814 COSTS_N_INSNS (4), /* mulsi_const9 */
815 COSTS_N_INSNS (4), /* muldi */
816 COSTS_N_INSNS (19), /* divsi */
817 COSTS_N_INSNS (19), /* divdi */
818 COSTS_N_INSNS (4), /* fp */
819 COSTS_N_INSNS (4), /* dmul */
820 COSTS_N_INSNS (29), /* sdiv */
821 COSTS_N_INSNS (29), /* ddiv */
822 32, /* cache line size */
823 32, /* l1 cache */
824 256, /* l2 cache */
825 1, /* prefetch streams /*/
826 0, /* SF->DF convert */
829 /* Instruction costs on E300C2 and E300C3 cores. */
830 static const
831 struct processor_costs ppce300c2c3_cost = {
832 COSTS_N_INSNS (4), /* mulsi */
833 COSTS_N_INSNS (4), /* mulsi_const */
834 COSTS_N_INSNS (4), /* mulsi_const9 */
835 COSTS_N_INSNS (4), /* muldi */
836 COSTS_N_INSNS (19), /* divsi */
837 COSTS_N_INSNS (19), /* divdi */
838 COSTS_N_INSNS (3), /* fp */
839 COSTS_N_INSNS (4), /* dmul */
840 COSTS_N_INSNS (18), /* sdiv */
841 COSTS_N_INSNS (33), /* ddiv */
843 16, /* l1 cache */
844 16, /* l2 cache */
845 1, /* prefetch streams /*/
846 0, /* SF->DF convert */
849 /* Instruction costs on PPCE500MC processors. */
850 static const
851 struct processor_costs ppce500mc_cost = {
852 COSTS_N_INSNS (4), /* mulsi */
853 COSTS_N_INSNS (4), /* mulsi_const */
854 COSTS_N_INSNS (4), /* mulsi_const9 */
855 COSTS_N_INSNS (4), /* muldi */
856 COSTS_N_INSNS (14), /* divsi */
857 COSTS_N_INSNS (14), /* divdi */
858 COSTS_N_INSNS (8), /* fp */
859 COSTS_N_INSNS (10), /* dmul */
860 COSTS_N_INSNS (36), /* sdiv */
861 COSTS_N_INSNS (66), /* ddiv */
862 64, /* cache line size */
863 32, /* l1 cache */
864 128, /* l2 cache */
865 1, /* prefetch streams /*/
866 0, /* SF->DF convert */
869 /* Instruction costs on PPCE500MC64 processors. */
870 static const
871 struct processor_costs ppce500mc64_cost = {
872 COSTS_N_INSNS (4), /* mulsi */
873 COSTS_N_INSNS (4), /* mulsi_const */
874 COSTS_N_INSNS (4), /* mulsi_const9 */
875 COSTS_N_INSNS (4), /* muldi */
876 COSTS_N_INSNS (14), /* divsi */
877 COSTS_N_INSNS (14), /* divdi */
878 COSTS_N_INSNS (4), /* fp */
879 COSTS_N_INSNS (10), /* dmul */
880 COSTS_N_INSNS (36), /* sdiv */
881 COSTS_N_INSNS (66), /* ddiv */
882 64, /* cache line size */
883 32, /* l1 cache */
884 128, /* l2 cache */
885 1, /* prefetch streams /*/
886 0, /* SF->DF convert */
889 /* Instruction costs on PPCE5500 processors. */
890 static const
891 struct processor_costs ppce5500_cost = {
892 COSTS_N_INSNS (5), /* mulsi */
893 COSTS_N_INSNS (5), /* mulsi_const */
894 COSTS_N_INSNS (4), /* mulsi_const9 */
895 COSTS_N_INSNS (5), /* muldi */
896 COSTS_N_INSNS (14), /* divsi */
897 COSTS_N_INSNS (14), /* divdi */
898 COSTS_N_INSNS (7), /* fp */
899 COSTS_N_INSNS (10), /* dmul */
900 COSTS_N_INSNS (36), /* sdiv */
901 COSTS_N_INSNS (66), /* ddiv */
902 64, /* cache line size */
903 32, /* l1 cache */
904 128, /* l2 cache */
905 1, /* prefetch streams /*/
906 0, /* SF->DF convert */
909 /* Instruction costs on PPCE6500 processors. */
910 static const
911 struct processor_costs ppce6500_cost = {
912 COSTS_N_INSNS (5), /* mulsi */
913 COSTS_N_INSNS (5), /* mulsi_const */
914 COSTS_N_INSNS (4), /* mulsi_const9 */
915 COSTS_N_INSNS (5), /* muldi */
916 COSTS_N_INSNS (14), /* divsi */
917 COSTS_N_INSNS (14), /* divdi */
918 COSTS_N_INSNS (7), /* fp */
919 COSTS_N_INSNS (10), /* dmul */
920 COSTS_N_INSNS (36), /* sdiv */
921 COSTS_N_INSNS (66), /* ddiv */
922 64, /* cache line size */
923 32, /* l1 cache */
924 128, /* l2 cache */
925 1, /* prefetch streams /*/
926 0, /* SF->DF convert */
929 /* Instruction costs on AppliedMicro Titan processors. */
930 static const
931 struct processor_costs titan_cost = {
932 COSTS_N_INSNS (5), /* mulsi */
933 COSTS_N_INSNS (5), /* mulsi_const */
934 COSTS_N_INSNS (5), /* mulsi_const9 */
935 COSTS_N_INSNS (5), /* muldi */
936 COSTS_N_INSNS (18), /* divsi */
937 COSTS_N_INSNS (18), /* divdi */
938 COSTS_N_INSNS (10), /* fp */
939 COSTS_N_INSNS (10), /* dmul */
940 COSTS_N_INSNS (46), /* sdiv */
941 COSTS_N_INSNS (72), /* ddiv */
942 32, /* cache line size */
943 32, /* l1 cache */
944 512, /* l2 cache */
945 1, /* prefetch streams /*/
946 0, /* SF->DF convert */
949 /* Instruction costs on POWER4 and POWER5 processors. */
950 static const
951 struct processor_costs power4_cost = {
952 COSTS_N_INSNS (3), /* mulsi */
953 COSTS_N_INSNS (2), /* mulsi_const */
954 COSTS_N_INSNS (2), /* mulsi_const9 */
955 COSTS_N_INSNS (4), /* muldi */
956 COSTS_N_INSNS (18), /* divsi */
957 COSTS_N_INSNS (34), /* divdi */
958 COSTS_N_INSNS (3), /* fp */
959 COSTS_N_INSNS (3), /* dmul */
960 COSTS_N_INSNS (17), /* sdiv */
961 COSTS_N_INSNS (17), /* ddiv */
962 128, /* cache line size */
963 32, /* l1 cache */
964 1024, /* l2 cache */
965 8, /* prefetch streams /*/
966 0, /* SF->DF convert */
969 /* Instruction costs on POWER6 processors. */
970 static const
971 struct processor_costs power6_cost = {
972 COSTS_N_INSNS (8), /* mulsi */
973 COSTS_N_INSNS (8), /* mulsi_const */
974 COSTS_N_INSNS (8), /* mulsi_const9 */
975 COSTS_N_INSNS (8), /* muldi */
976 COSTS_N_INSNS (22), /* divsi */
977 COSTS_N_INSNS (28), /* divdi */
978 COSTS_N_INSNS (3), /* fp */
979 COSTS_N_INSNS (3), /* dmul */
980 COSTS_N_INSNS (13), /* sdiv */
981 COSTS_N_INSNS (16), /* ddiv */
982 128, /* cache line size */
983 64, /* l1 cache */
984 2048, /* l2 cache */
985 16, /* prefetch streams */
986 0, /* SF->DF convert */
989 /* Instruction costs on POWER7 processors. */
990 static const
991 struct processor_costs power7_cost = {
992 COSTS_N_INSNS (2), /* mulsi */
993 COSTS_N_INSNS (2), /* mulsi_const */
994 COSTS_N_INSNS (2), /* mulsi_const9 */
995 COSTS_N_INSNS (2), /* muldi */
996 COSTS_N_INSNS (18), /* divsi */
997 COSTS_N_INSNS (34), /* divdi */
998 COSTS_N_INSNS (3), /* fp */
999 COSTS_N_INSNS (3), /* dmul */
1000 COSTS_N_INSNS (13), /* sdiv */
1001 COSTS_N_INSNS (16), /* ddiv */
1002 128, /* cache line size */
1003 32, /* l1 cache */
1004 256, /* l2 cache */
1005 12, /* prefetch streams */
1006 COSTS_N_INSNS (3), /* SF->DF convert */
1009 /* Instruction costs on POWER8 processors. */
1010 static const
1011 struct processor_costs power8_cost = {
1012 COSTS_N_INSNS (3), /* mulsi */
1013 COSTS_N_INSNS (3), /* mulsi_const */
1014 COSTS_N_INSNS (3), /* mulsi_const9 */
1015 COSTS_N_INSNS (3), /* muldi */
1016 COSTS_N_INSNS (19), /* divsi */
1017 COSTS_N_INSNS (35), /* divdi */
1018 COSTS_N_INSNS (3), /* fp */
1019 COSTS_N_INSNS (3), /* dmul */
1020 COSTS_N_INSNS (14), /* sdiv */
1021 COSTS_N_INSNS (17), /* ddiv */
1022 128, /* cache line size */
1023 32, /* l1 cache */
1024 256, /* l2 cache */
1025 12, /* prefetch streams */
1026 COSTS_N_INSNS (3), /* SF->DF convert */
1029 /* Instruction costs on POWER A2 processors. */
1030 static const
1031 struct processor_costs ppca2_cost = {
1032 COSTS_N_INSNS (16), /* mulsi */
1033 COSTS_N_INSNS (16), /* mulsi_const */
1034 COSTS_N_INSNS (16), /* mulsi_const9 */
1035 COSTS_N_INSNS (16), /* muldi */
1036 COSTS_N_INSNS (22), /* divsi */
1037 COSTS_N_INSNS (28), /* divdi */
1038 COSTS_N_INSNS (3), /* fp */
1039 COSTS_N_INSNS (3), /* dmul */
1040 COSTS_N_INSNS (59), /* sdiv */
1041 COSTS_N_INSNS (72), /* ddiv */
1043 16, /* l1 cache */
1044 2048, /* l2 cache */
1045 16, /* prefetch streams */
1046 0, /* SF->DF convert */
1050 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1051 #undef RS6000_BUILTIN_1
1052 #undef RS6000_BUILTIN_2
1053 #undef RS6000_BUILTIN_3
1054 #undef RS6000_BUILTIN_A
1055 #undef RS6000_BUILTIN_D
1056 #undef RS6000_BUILTIN_E
1057 #undef RS6000_BUILTIN_H
1058 #undef RS6000_BUILTIN_P
1059 #undef RS6000_BUILTIN_Q
1060 #undef RS6000_BUILTIN_S
1061 #undef RS6000_BUILTIN_X
1063 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1064 { NAME, ICODE, MASK, ATTR },
1066 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1067 { NAME, ICODE, MASK, ATTR },
1069 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1070 { NAME, ICODE, MASK, ATTR },
1072 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1073 { NAME, ICODE, MASK, ATTR },
1075 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1076 { NAME, ICODE, MASK, ATTR },
1078 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1079 { NAME, ICODE, MASK, ATTR },
1081 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1082 { NAME, ICODE, MASK, ATTR },
1084 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1085 { NAME, ICODE, MASK, ATTR },
1087 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1088 { NAME, ICODE, MASK, ATTR },
1090 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1091 { NAME, ICODE, MASK, ATTR },
1093 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1094 { NAME, ICODE, MASK, ATTR },
1096 struct rs6000_builtin_info_type {
1097 const char *name;
1098 const enum insn_code icode;
1099 const HOST_WIDE_INT mask;
1100 const unsigned attr;
1103 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1105 #include "rs6000-builtin.def"
1108 #undef RS6000_BUILTIN_1
1109 #undef RS6000_BUILTIN_2
1110 #undef RS6000_BUILTIN_3
1111 #undef RS6000_BUILTIN_A
1112 #undef RS6000_BUILTIN_D
1113 #undef RS6000_BUILTIN_E
1114 #undef RS6000_BUILTIN_H
1115 #undef RS6000_BUILTIN_P
1116 #undef RS6000_BUILTIN_Q
1117 #undef RS6000_BUILTIN_S
1118 #undef RS6000_BUILTIN_X
1120 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1121 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1124 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1125 static bool spe_func_has_64bit_regs_p (void);
1126 static struct machine_function * rs6000_init_machine_status (void);
1127 static int rs6000_ra_ever_killed (void);
1128 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1129 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1130 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1131 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1132 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1133 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1134 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1135 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1136 bool);
1137 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1138 static bool is_microcoded_insn (rtx_insn *);
1139 static bool is_nonpipeline_insn (rtx_insn *);
1140 static bool is_cracked_insn (rtx_insn *);
1141 static bool is_load_insn (rtx, rtx *);
1142 static bool is_store_insn (rtx, rtx *);
1143 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1144 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1145 static bool insn_must_be_first_in_group (rtx_insn *);
1146 static bool insn_must_be_last_in_group (rtx_insn *);
1147 static void altivec_init_builtins (void);
1148 static tree builtin_function_type (machine_mode, machine_mode,
1149 machine_mode, machine_mode,
1150 enum rs6000_builtins, const char *name);
1151 static void rs6000_common_init_builtins (void);
1152 static void paired_init_builtins (void);
1153 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1154 static void spe_init_builtins (void);
1155 static void htm_init_builtins (void);
1156 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1157 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1158 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1159 static rs6000_stack_t *rs6000_stack_info (void);
1160 static void is_altivec_return_reg (rtx, void *);
1161 int easy_vector_constant (rtx, machine_mode);
1162 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1163 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1164 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1165 bool, bool);
1166 #if TARGET_MACHO
1167 static void macho_branch_islands (void);
1168 #endif
1169 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1170 int, int *);
1171 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1172 int, int, int *);
1173 static bool rs6000_mode_dependent_address (const_rtx);
1174 static bool rs6000_debug_mode_dependent_address (const_rtx);
1175 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1176 machine_mode, rtx);
1177 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1178 machine_mode,
1179 rtx);
1180 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1181 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1182 enum reg_class);
1183 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1184 machine_mode);
1185 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1186 enum reg_class,
1187 machine_mode);
1188 static bool rs6000_cannot_change_mode_class (machine_mode,
1189 machine_mode,
1190 enum reg_class);
1191 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1192 machine_mode,
1193 enum reg_class);
1194 static bool rs6000_save_toc_in_prologue_p (void);
1195 static rtx rs6000_internal_arg_pointer (void);
1197 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1198 int, int *)
1199 = rs6000_legitimize_reload_address;
1201 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1202 = rs6000_mode_dependent_address;
1204 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1205 machine_mode, rtx)
1206 = rs6000_secondary_reload_class;
1208 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1209 = rs6000_preferred_reload_class;
1211 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1212 machine_mode)
1213 = rs6000_secondary_memory_needed;
1215 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1216 machine_mode,
1217 enum reg_class)
1218 = rs6000_cannot_change_mode_class;
1220 const int INSN_NOT_AVAILABLE = -1;
1222 static void rs6000_print_isa_options (FILE *, int, const char *,
1223 HOST_WIDE_INT);
1224 static void rs6000_print_builtin_options (FILE *, int, const char *,
1225 HOST_WIDE_INT);
1227 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1228 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1229 enum rs6000_reg_type,
1230 machine_mode,
1231 secondary_reload_info *,
1232 bool);
1233 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1235 /* Hash table stuff for keeping track of TOC entries. */
1237 struct GTY((for_user)) toc_hash_struct
1239 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1240 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1241 rtx key;
1242 machine_mode key_mode;
1243 int labelno;
1246 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1248 static hashval_t hash (toc_hash_struct *);
1249 static bool equal (toc_hash_struct *, toc_hash_struct *);
1252 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1254 /* Hash table to keep track of the argument types for builtin functions. */
1256 struct GTY((for_user)) builtin_hash_struct
1258 tree type;
1259 machine_mode mode[4]; /* return value + 3 arguments. */
1260 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1263 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1265 static hashval_t hash (builtin_hash_struct *);
1266 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1269 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1272 /* Default register names. */
1273 char rs6000_reg_names[][8] =
1275 "0", "1", "2", "3", "4", "5", "6", "7",
1276 "8", "9", "10", "11", "12", "13", "14", "15",
1277 "16", "17", "18", "19", "20", "21", "22", "23",
1278 "24", "25", "26", "27", "28", "29", "30", "31",
1279 "0", "1", "2", "3", "4", "5", "6", "7",
1280 "8", "9", "10", "11", "12", "13", "14", "15",
1281 "16", "17", "18", "19", "20", "21", "22", "23",
1282 "24", "25", "26", "27", "28", "29", "30", "31",
1283 "mq", "lr", "ctr","ap",
1284 "0", "1", "2", "3", "4", "5", "6", "7",
1285 "ca",
1286 /* AltiVec registers. */
1287 "0", "1", "2", "3", "4", "5", "6", "7",
1288 "8", "9", "10", "11", "12", "13", "14", "15",
1289 "16", "17", "18", "19", "20", "21", "22", "23",
1290 "24", "25", "26", "27", "28", "29", "30", "31",
1291 "vrsave", "vscr",
1292 /* SPE registers. */
1293 "spe_acc", "spefscr",
1294 /* Soft frame pointer. */
1295 "sfp",
1296 /* HTM SPR registers. */
1297 "tfhar", "tfiar", "texasr",
1298 /* SPE High registers. */
1299 "0", "1", "2", "3", "4", "5", "6", "7",
1300 "8", "9", "10", "11", "12", "13", "14", "15",
1301 "16", "17", "18", "19", "20", "21", "22", "23",
1302 "24", "25", "26", "27", "28", "29", "30", "31"
1305 #ifdef TARGET_REGNAMES
1306 static const char alt_reg_names[][8] =
1308 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1309 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1310 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1311 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1312 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1313 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1314 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1315 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1316 "mq", "lr", "ctr", "ap",
1317 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1318 "ca",
1319 /* AltiVec registers. */
1320 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1321 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1322 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1323 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1324 "vrsave", "vscr",
1325 /* SPE registers. */
1326 "spe_acc", "spefscr",
1327 /* Soft frame pointer. */
1328 "sfp",
1329 /* HTM SPR registers. */
1330 "tfhar", "tfiar", "texasr",
1331 /* SPE High registers. */
1332 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1333 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1334 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1335 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1337 #endif
1339 /* Table of valid machine attributes. */
1341 static const struct attribute_spec rs6000_attribute_table[] =
1343 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1344 affects_type_identity } */
1345 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1346 false },
1347 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1348 false },
1349 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1350 false },
1351 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1352 false },
1353 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1354 false },
1355 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1356 SUBTARGET_ATTRIBUTE_TABLE,
1357 #endif
1358 { NULL, 0, 0, false, false, false, NULL, false }
1361 #ifndef TARGET_PROFILE_KERNEL
1362 #define TARGET_PROFILE_KERNEL 0
1363 #endif
1365 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1366 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1368 /* Initialize the GCC target structure. */
1369 #undef TARGET_ATTRIBUTE_TABLE
1370 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1371 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1372 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1373 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1374 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1376 #undef TARGET_ASM_ALIGNED_DI_OP
1377 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1379 /* Default unaligned ops are only provided for ELF. Find the ops needed
1380 for non-ELF systems. */
1381 #ifndef OBJECT_FORMAT_ELF
1382 #if TARGET_XCOFF
1383 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1384 64-bit targets. */
1385 #undef TARGET_ASM_UNALIGNED_HI_OP
1386 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1387 #undef TARGET_ASM_UNALIGNED_SI_OP
1388 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1389 #undef TARGET_ASM_UNALIGNED_DI_OP
1390 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1391 #else
1392 /* For Darwin. */
1393 #undef TARGET_ASM_UNALIGNED_HI_OP
1394 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1395 #undef TARGET_ASM_UNALIGNED_SI_OP
1396 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1397 #undef TARGET_ASM_UNALIGNED_DI_OP
1398 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1399 #undef TARGET_ASM_ALIGNED_DI_OP
1400 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1401 #endif
1402 #endif
1404 /* This hook deals with fixups for relocatable code and DI-mode objects
1405 in 64-bit code. */
1406 #undef TARGET_ASM_INTEGER
1407 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1409 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1410 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1411 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1412 #endif
1414 #undef TARGET_SET_UP_BY_PROLOGUE
1415 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1417 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1418 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1420 #undef TARGET_INTERNAL_ARG_POINTER
1421 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1423 #undef TARGET_HAVE_TLS
1424 #define TARGET_HAVE_TLS HAVE_AS_TLS
1426 #undef TARGET_CANNOT_FORCE_CONST_MEM
1427 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1429 #undef TARGET_DELEGITIMIZE_ADDRESS
1430 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1432 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1433 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1435 #undef TARGET_ASM_FUNCTION_PROLOGUE
1436 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1437 #undef TARGET_ASM_FUNCTION_EPILOGUE
1438 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1440 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1441 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1443 #undef TARGET_LEGITIMIZE_ADDRESS
1444 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1446 #undef TARGET_SCHED_VARIABLE_ISSUE
1447 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1449 #undef TARGET_SCHED_ISSUE_RATE
1450 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1451 #undef TARGET_SCHED_ADJUST_COST
1452 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1453 #undef TARGET_SCHED_ADJUST_PRIORITY
1454 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1455 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1456 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1457 #undef TARGET_SCHED_INIT
1458 #define TARGET_SCHED_INIT rs6000_sched_init
1459 #undef TARGET_SCHED_FINISH
1460 #define TARGET_SCHED_FINISH rs6000_sched_finish
1461 #undef TARGET_SCHED_REORDER
1462 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1463 #undef TARGET_SCHED_REORDER2
1464 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1466 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1467 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1469 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1470 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1472 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1473 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1474 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1475 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1476 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1477 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1478 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1479 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1481 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1482 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1483 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1484 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1485 rs6000_builtin_support_vector_misalignment
1486 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1487 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1488 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1489 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1490 rs6000_builtin_vectorization_cost
1491 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1492 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1493 rs6000_preferred_simd_mode
1494 #undef TARGET_VECTORIZE_INIT_COST
1495 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1496 #undef TARGET_VECTORIZE_ADD_STMT_COST
1497 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1498 #undef TARGET_VECTORIZE_FINISH_COST
1499 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1500 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1501 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1503 #undef TARGET_INIT_BUILTINS
1504 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1505 #undef TARGET_BUILTIN_DECL
1506 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1508 #undef TARGET_EXPAND_BUILTIN
1509 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1511 #undef TARGET_MANGLE_TYPE
1512 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1514 #undef TARGET_INIT_LIBFUNCS
1515 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1517 #if TARGET_MACHO
1518 #undef TARGET_BINDS_LOCAL_P
1519 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1520 #endif
1522 #undef TARGET_MS_BITFIELD_LAYOUT_P
1523 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1525 #undef TARGET_ASM_OUTPUT_MI_THUNK
1526 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1528 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1529 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1531 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1532 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1534 #undef TARGET_REGISTER_MOVE_COST
1535 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1536 #undef TARGET_MEMORY_MOVE_COST
1537 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1538 #undef TARGET_RTX_COSTS
1539 #define TARGET_RTX_COSTS rs6000_rtx_costs
1540 #undef TARGET_ADDRESS_COST
1541 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1543 #undef TARGET_DWARF_REGISTER_SPAN
1544 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1546 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1547 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1549 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1550 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1552 #undef TARGET_PROMOTE_FUNCTION_MODE
1553 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1555 #undef TARGET_RETURN_IN_MEMORY
1556 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1558 #undef TARGET_RETURN_IN_MSB
1559 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1561 #undef TARGET_SETUP_INCOMING_VARARGS
1562 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1564 /* Always strict argument naming on rs6000. */
1565 #undef TARGET_STRICT_ARGUMENT_NAMING
1566 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1567 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1568 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1569 #undef TARGET_SPLIT_COMPLEX_ARG
1570 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1571 #undef TARGET_MUST_PASS_IN_STACK
1572 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1573 #undef TARGET_PASS_BY_REFERENCE
1574 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1575 #undef TARGET_ARG_PARTIAL_BYTES
1576 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1577 #undef TARGET_FUNCTION_ARG_ADVANCE
1578 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1579 #undef TARGET_FUNCTION_ARG
1580 #define TARGET_FUNCTION_ARG rs6000_function_arg
1581 #undef TARGET_FUNCTION_ARG_BOUNDARY
1582 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1584 #undef TARGET_BUILD_BUILTIN_VA_LIST
1585 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1587 #undef TARGET_EXPAND_BUILTIN_VA_START
1588 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1590 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1591 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1593 #undef TARGET_EH_RETURN_FILTER_MODE
1594 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1596 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1597 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1599 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1600 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1602 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1603 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1605 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1606 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1608 #undef TARGET_MD_ASM_ADJUST
1609 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1611 #undef TARGET_OPTION_OVERRIDE
1612 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1614 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1615 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1616 rs6000_builtin_vectorized_function
1618 #if !TARGET_MACHO
1619 #undef TARGET_STACK_PROTECT_FAIL
1620 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1621 #endif
1623 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1624 The PowerPC architecture requires only weak consistency among
1625 processors--that is, memory accesses between processors need not be
1626 sequentially consistent and memory accesses among processors can occur
1627 in any order. The ability to order memory accesses weakly provides
1628 opportunities for more efficient use of the system bus. Unless a
1629 dependency exists, the 604e allows read operations to precede store
1630 operations. */
1631 #undef TARGET_RELAXED_ORDERING
1632 #define TARGET_RELAXED_ORDERING true
1634 #ifdef HAVE_AS_TLS
1635 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1636 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1637 #endif
1639 /* Use a 32-bit anchor range. This leads to sequences like:
1641 addis tmp,anchor,high
1642 add dest,tmp,low
1644 where tmp itself acts as an anchor, and can be shared between
1645 accesses to the same 64k page. */
1646 #undef TARGET_MIN_ANCHOR_OFFSET
1647 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1648 #undef TARGET_MAX_ANCHOR_OFFSET
1649 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1650 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1651 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1652 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1653 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1655 #undef TARGET_BUILTIN_RECIPROCAL
1656 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1658 #undef TARGET_EXPAND_TO_RTL_HOOK
1659 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1661 #undef TARGET_INSTANTIATE_DECLS
1662 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1664 #undef TARGET_SECONDARY_RELOAD
1665 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1667 #undef TARGET_LEGITIMATE_ADDRESS_P
1668 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1670 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1671 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1673 #undef TARGET_LRA_P
1674 #define TARGET_LRA_P rs6000_lra_p
1676 #undef TARGET_CAN_ELIMINATE
1677 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1679 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1680 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1682 #undef TARGET_TRAMPOLINE_INIT
1683 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1685 #undef TARGET_FUNCTION_VALUE
1686 #define TARGET_FUNCTION_VALUE rs6000_function_value
1688 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1689 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1691 #undef TARGET_OPTION_SAVE
1692 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1694 #undef TARGET_OPTION_RESTORE
1695 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1697 #undef TARGET_OPTION_PRINT
1698 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1700 #undef TARGET_CAN_INLINE_P
1701 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1703 #undef TARGET_SET_CURRENT_FUNCTION
1704 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1706 #undef TARGET_LEGITIMATE_CONSTANT_P
1707 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1709 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1710 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1712 #undef TARGET_CAN_USE_DOLOOP_P
1713 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1715 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1716 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1718 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1719 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1720 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1721 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1722 #undef TARGET_UNWIND_WORD_MODE
1723 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1726 /* Processor table. */
1727 struct rs6000_ptt
1729 const char *const name; /* Canonical processor name. */
1730 const enum processor_type processor; /* Processor type enum value. */
1731 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1734 static struct rs6000_ptt const processor_target_table[] =
1736 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1737 #include "rs6000-cpus.def"
1738 #undef RS6000_CPU
1741 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1742 name is invalid. */
1744 static int
1745 rs6000_cpu_name_lookup (const char *name)
1747 size_t i;
1749 if (name != NULL)
1751 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1752 if (! strcmp (name, processor_target_table[i].name))
1753 return (int)i;
1756 return -1;
1760 /* Return number of consecutive hard regs needed starting at reg REGNO
1761 to hold something of mode MODE.
1762 This is ordinarily the length in words of a value of mode MODE
1763 but can be less for certain modes in special long registers.
1765 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1766 scalar instructions. The upper 32 bits are only available to the
1767 SIMD instructions.
1769 POWER and PowerPC GPRs hold 32 bits worth;
1770 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1772 static int
1773 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1775 unsigned HOST_WIDE_INT reg_size;
1777 /* TF/TD modes are special in that they always take 2 registers. */
1778 if (FP_REGNO_P (regno))
1779 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1780 ? UNITS_PER_VSX_WORD
1781 : UNITS_PER_FP_WORD);
1783 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1784 reg_size = UNITS_PER_SPE_WORD;
1786 else if (ALTIVEC_REGNO_P (regno))
1787 reg_size = UNITS_PER_ALTIVEC_WORD;
1789 /* The value returned for SCmode in the E500 double case is 2 for
1790 ABI compatibility; storing an SCmode value in a single register
1791 would require function_arg and rs6000_spe_function_arg to handle
1792 SCmode so as to pass the value correctly in a pair of
1793 registers. */
1794 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1795 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1796 reg_size = UNITS_PER_FP_WORD;
1798 else
1799 reg_size = UNITS_PER_WORD;
1801 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1804 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1805 MODE. */
1806 static int
1807 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1809 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1811 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1812 register combinations, and use PTImode where we need to deal with quad
1813 word memory operations. Don't allow quad words in the argument or frame
1814 pointer registers, just registers 0..31. */
1815 if (mode == PTImode)
1816 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1817 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1818 && ((regno & 1) == 0));
1820 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1821 implementations. Don't allow an item to be split between a FP register
1822 and an Altivec register. Allow TImode in all VSX registers if the user
1823 asked for it. */
1824 if (TARGET_VSX && VSX_REGNO_P (regno)
1825 && (VECTOR_MEM_VSX_P (mode)
1826 || reg_addr[mode].scalar_in_vmx_p
1827 || (TARGET_VSX_TIMODE && mode == TImode)
1828 || (TARGET_VADDUQM && mode == V1TImode)))
1830 if (FP_REGNO_P (regno))
1831 return FP_REGNO_P (last_regno);
1833 if (ALTIVEC_REGNO_P (regno))
1835 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1836 return 0;
1838 return ALTIVEC_REGNO_P (last_regno);
1842 /* The GPRs can hold any mode, but values bigger than one register
1843 cannot go past R31. */
1844 if (INT_REGNO_P (regno))
1845 return INT_REGNO_P (last_regno);
1847 /* The float registers (except for VSX vector modes) can only hold floating
1848 modes and DImode. */
1849 if (FP_REGNO_P (regno))
1851 if (SCALAR_FLOAT_MODE_P (mode)
1852 && (mode != TDmode || (regno % 2) == 0)
1853 && FP_REGNO_P (last_regno))
1854 return 1;
1856 if (GET_MODE_CLASS (mode) == MODE_INT
1857 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1858 return 1;
1860 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1861 && PAIRED_VECTOR_MODE (mode))
1862 return 1;
1864 return 0;
1867 /* The CR register can only hold CC modes. */
1868 if (CR_REGNO_P (regno))
1869 return GET_MODE_CLASS (mode) == MODE_CC;
1871 if (CA_REGNO_P (regno))
1872 return mode == Pmode || mode == SImode;
1874 /* AltiVec only in AldyVec registers. */
1875 if (ALTIVEC_REGNO_P (regno))
1876 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1877 || mode == V1TImode);
1879 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1880 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1881 return 1;
1883 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1884 and it must be able to fit within the register set. */
1886 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1889 /* Print interesting facts about registers. */
1890 static void
1891 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1893 int r, m;
1895 for (r = first_regno; r <= last_regno; ++r)
1897 const char *comma = "";
1898 int len;
1900 if (first_regno == last_regno)
1901 fprintf (stderr, "%s:\t", reg_name);
1902 else
1903 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1905 len = 8;
1906 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1907 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1909 if (len > 70)
1911 fprintf (stderr, ",\n\t");
1912 len = 8;
1913 comma = "";
1916 if (rs6000_hard_regno_nregs[m][r] > 1)
1917 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1918 rs6000_hard_regno_nregs[m][r]);
1919 else
1920 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1922 comma = ", ";
1925 if (call_used_regs[r])
1927 if (len > 70)
1929 fprintf (stderr, ",\n\t");
1930 len = 8;
1931 comma = "";
1934 len += fprintf (stderr, "%s%s", comma, "call-used");
1935 comma = ", ";
1938 if (fixed_regs[r])
1940 if (len > 70)
1942 fprintf (stderr, ",\n\t");
1943 len = 8;
1944 comma = "";
1947 len += fprintf (stderr, "%s%s", comma, "fixed");
1948 comma = ", ";
1951 if (len > 70)
1953 fprintf (stderr, ",\n\t");
1954 comma = "";
1957 len += fprintf (stderr, "%sreg-class = %s", comma,
1958 reg_class_names[(int)rs6000_regno_regclass[r]]);
1959 comma = ", ";
1961 if (len > 70)
1963 fprintf (stderr, ",\n\t");
1964 comma = "";
1967 fprintf (stderr, "%sregno = %d\n", comma, r);
1971 static const char *
1972 rs6000_debug_vector_unit (enum rs6000_vector v)
1974 const char *ret;
1976 switch (v)
1978 case VECTOR_NONE: ret = "none"; break;
1979 case VECTOR_ALTIVEC: ret = "altivec"; break;
1980 case VECTOR_VSX: ret = "vsx"; break;
1981 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1982 case VECTOR_PAIRED: ret = "paired"; break;
1983 case VECTOR_SPE: ret = "spe"; break;
1984 case VECTOR_OTHER: ret = "other"; break;
1985 default: ret = "unknown"; break;
1988 return ret;
1991 /* Inner function printing just the address mask for a particular reload
1992 register class. */
1993 DEBUG_FUNCTION char *
1994 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1996 static char ret[8];
1997 char *p = ret;
1999 if ((mask & RELOAD_REG_VALID) != 0)
2000 *p++ = 'v';
2001 else if (keep_spaces)
2002 *p++ = ' ';
2004 if ((mask & RELOAD_REG_MULTIPLE) != 0)
2005 *p++ = 'm';
2006 else if (keep_spaces)
2007 *p++ = ' ';
2009 if ((mask & RELOAD_REG_INDEXED) != 0)
2010 *p++ = 'i';
2011 else if (keep_spaces)
2012 *p++ = ' ';
2014 if ((mask & RELOAD_REG_OFFSET) != 0)
2015 *p++ = 'o';
2016 else if (keep_spaces)
2017 *p++ = ' ';
2019 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2020 *p++ = '+';
2021 else if (keep_spaces)
2022 *p++ = ' ';
2024 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2025 *p++ = '+';
2026 else if (keep_spaces)
2027 *p++ = ' ';
2029 if ((mask & RELOAD_REG_AND_M16) != 0)
2030 *p++ = '&';
2031 else if (keep_spaces)
2032 *p++ = ' ';
2034 *p = '\0';
2036 return ret;
2039 /* Print the address masks in a human readble fashion. */
2040 DEBUG_FUNCTION void
2041 rs6000_debug_print_mode (ssize_t m)
2043 ssize_t rc;
2045 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2046 for (rc = 0; rc < N_RELOAD_REG; rc++)
2047 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2048 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2050 if (rs6000_vector_unit[m] != VECTOR_NONE
2051 || rs6000_vector_mem[m] != VECTOR_NONE
2052 || (reg_addr[m].reload_store != CODE_FOR_nothing)
2053 || (reg_addr[m].reload_load != CODE_FOR_nothing)
2054 || reg_addr[m].scalar_in_vmx_p)
2056 fprintf (stderr,
2057 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
2058 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2059 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2060 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2061 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2062 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2065 fputs ("\n", stderr);
2068 #define DEBUG_FMT_ID "%-32s= "
2069 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2070 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2071 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2073 /* Print various interesting information with -mdebug=reg. */
2074 static void
2075 rs6000_debug_reg_global (void)
2077 static const char *const tf[2] = { "false", "true" };
2078 const char *nl = (const char *)0;
2079 int m;
2080 size_t m1, m2, v;
2081 char costly_num[20];
2082 char nop_num[20];
2083 char flags_buffer[40];
2084 const char *costly_str;
2085 const char *nop_str;
2086 const char *trace_str;
2087 const char *abi_str;
2088 const char *cmodel_str;
2089 struct cl_target_option cl_opts;
2091 /* Modes we want tieable information on. */
2092 static const machine_mode print_tieable_modes[] = {
2093 QImode,
2094 HImode,
2095 SImode,
2096 DImode,
2097 TImode,
2098 PTImode,
2099 SFmode,
2100 DFmode,
2101 TFmode,
2102 SDmode,
2103 DDmode,
2104 TDmode,
2105 V8QImode,
2106 V4HImode,
2107 V2SImode,
2108 V16QImode,
2109 V8HImode,
2110 V4SImode,
2111 V2DImode,
2112 V1TImode,
2113 V32QImode,
2114 V16HImode,
2115 V8SImode,
2116 V4DImode,
2117 V2TImode,
2118 V2SFmode,
2119 V4SFmode,
2120 V2DFmode,
2121 V8SFmode,
2122 V4DFmode,
2123 CCmode,
2124 CCUNSmode,
2125 CCEQmode,
2128 /* Virtual regs we are interested in. */
2129 const static struct {
2130 int regno; /* register number. */
2131 const char *name; /* register name. */
2132 } virtual_regs[] = {
2133 { STACK_POINTER_REGNUM, "stack pointer:" },
2134 { TOC_REGNUM, "toc: " },
2135 { STATIC_CHAIN_REGNUM, "static chain: " },
2136 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2137 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2138 { ARG_POINTER_REGNUM, "arg pointer: " },
2139 { FRAME_POINTER_REGNUM, "frame pointer:" },
2140 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2141 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2142 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2143 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2144 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2145 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2146 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2147 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2148 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2151 fputs ("\nHard register information:\n", stderr);
2152 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2153 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2154 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2155 LAST_ALTIVEC_REGNO,
2156 "vs");
2157 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2158 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2159 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2160 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2161 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2162 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2163 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2164 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2166 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2167 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2168 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2170 fprintf (stderr,
2171 "\n"
2172 "d reg_class = %s\n"
2173 "f reg_class = %s\n"
2174 "v reg_class = %s\n"
2175 "wa reg_class = %s\n"
2176 "wd reg_class = %s\n"
2177 "wf reg_class = %s\n"
2178 "wg reg_class = %s\n"
2179 "wh reg_class = %s\n"
2180 "wi reg_class = %s\n"
2181 "wj reg_class = %s\n"
2182 "wk reg_class = %s\n"
2183 "wl reg_class = %s\n"
2184 "wm reg_class = %s\n"
2185 "wr reg_class = %s\n"
2186 "ws reg_class = %s\n"
2187 "wt reg_class = %s\n"
2188 "wu reg_class = %s\n"
2189 "wv reg_class = %s\n"
2190 "ww reg_class = %s\n"
2191 "wx reg_class = %s\n"
2192 "wy reg_class = %s\n"
2193 "wz reg_class = %s\n"
2194 "\n",
2195 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2196 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2197 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2198 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2199 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2200 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2201 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2202 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2203 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2204 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2205 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2206 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2207 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2208 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2209 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2210 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2211 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2212 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2213 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2214 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2215 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2216 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2218 nl = "\n";
2219 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2220 rs6000_debug_print_mode (m);
2222 fputs ("\n", stderr);
2224 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2226 machine_mode mode1 = print_tieable_modes[m1];
2227 bool first_time = true;
2229 nl = (const char *)0;
2230 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2232 machine_mode mode2 = print_tieable_modes[m2];
2233 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2235 if (first_time)
2237 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2238 nl = "\n";
2239 first_time = false;
2242 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2246 if (!first_time)
2247 fputs ("\n", stderr);
2250 if (nl)
2251 fputs (nl, stderr);
2253 if (rs6000_recip_control)
2255 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2257 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2258 if (rs6000_recip_bits[m])
2260 fprintf (stderr,
2261 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2262 GET_MODE_NAME (m),
2263 (RS6000_RECIP_AUTO_RE_P (m)
2264 ? "auto"
2265 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2266 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2267 ? "auto"
2268 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2271 fputs ("\n", stderr);
2274 if (rs6000_cpu_index >= 0)
2276 const char *name = processor_target_table[rs6000_cpu_index].name;
2277 HOST_WIDE_INT flags
2278 = processor_target_table[rs6000_cpu_index].target_enable;
2280 sprintf (flags_buffer, "-mcpu=%s flags", name);
2281 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2283 else
2284 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2286 if (rs6000_tune_index >= 0)
2288 const char *name = processor_target_table[rs6000_tune_index].name;
2289 HOST_WIDE_INT flags
2290 = processor_target_table[rs6000_tune_index].target_enable;
2292 sprintf (flags_buffer, "-mtune=%s flags", name);
2293 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2295 else
2296 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2298 cl_target_option_save (&cl_opts, &global_options);
2299 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2300 rs6000_isa_flags);
2302 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2303 rs6000_isa_flags_explicit);
2305 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2306 rs6000_builtin_mask);
2308 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2310 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2311 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2313 switch (rs6000_sched_costly_dep)
2315 case max_dep_latency:
2316 costly_str = "max_dep_latency";
2317 break;
2319 case no_dep_costly:
2320 costly_str = "no_dep_costly";
2321 break;
2323 case all_deps_costly:
2324 costly_str = "all_deps_costly";
2325 break;
2327 case true_store_to_load_dep_costly:
2328 costly_str = "true_store_to_load_dep_costly";
2329 break;
2331 case store_to_load_dep_costly:
2332 costly_str = "store_to_load_dep_costly";
2333 break;
2335 default:
2336 costly_str = costly_num;
2337 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2338 break;
2341 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2343 switch (rs6000_sched_insert_nops)
2345 case sched_finish_regroup_exact:
2346 nop_str = "sched_finish_regroup_exact";
2347 break;
2349 case sched_finish_pad_groups:
2350 nop_str = "sched_finish_pad_groups";
2351 break;
2353 case sched_finish_none:
2354 nop_str = "sched_finish_none";
2355 break;
2357 default:
2358 nop_str = nop_num;
2359 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2360 break;
2363 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2365 switch (rs6000_sdata)
2367 default:
2368 case SDATA_NONE:
2369 break;
2371 case SDATA_DATA:
2372 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2373 break;
2375 case SDATA_SYSV:
2376 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2377 break;
2379 case SDATA_EABI:
2380 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2381 break;
2385 switch (rs6000_traceback)
2387 case traceback_default: trace_str = "default"; break;
2388 case traceback_none: trace_str = "none"; break;
2389 case traceback_part: trace_str = "part"; break;
2390 case traceback_full: trace_str = "full"; break;
2391 default: trace_str = "unknown"; break;
2394 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2396 switch (rs6000_current_cmodel)
2398 case CMODEL_SMALL: cmodel_str = "small"; break;
2399 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2400 case CMODEL_LARGE: cmodel_str = "large"; break;
2401 default: cmodel_str = "unknown"; break;
2404 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2406 switch (rs6000_current_abi)
2408 case ABI_NONE: abi_str = "none"; break;
2409 case ABI_AIX: abi_str = "aix"; break;
2410 case ABI_ELFv2: abi_str = "ELFv2"; break;
2411 case ABI_V4: abi_str = "V4"; break;
2412 case ABI_DARWIN: abi_str = "darwin"; break;
2413 default: abi_str = "unknown"; break;
2416 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2418 if (rs6000_altivec_abi)
2419 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2421 if (rs6000_spe_abi)
2422 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2424 if (rs6000_darwin64_abi)
2425 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2427 if (rs6000_float_gprs)
2428 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2430 fprintf (stderr, DEBUG_FMT_S, "fprs",
2431 (TARGET_FPRS ? "true" : "false"));
2433 fprintf (stderr, DEBUG_FMT_S, "single_float",
2434 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2436 fprintf (stderr, DEBUG_FMT_S, "double_float",
2437 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2439 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2440 (TARGET_SOFT_FLOAT ? "true" : "false"));
2442 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2443 (TARGET_E500_SINGLE ? "true" : "false"));
2445 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2446 (TARGET_E500_DOUBLE ? "true" : "false"));
2448 if (TARGET_LINK_STACK)
2449 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2451 if (targetm.lra_p ())
2452 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2454 if (TARGET_P8_FUSION)
2455 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2456 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2458 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2459 TARGET_SECURE_PLT ? "secure" : "bss");
2460 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2461 aix_struct_return ? "aix" : "sysv");
2462 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2463 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2464 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2465 tf[!!rs6000_align_branch_targets]);
2466 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2467 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2468 rs6000_long_double_type_size);
2469 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2470 (int)rs6000_sched_restricted_insns_priority);
2471 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2472 (int)END_BUILTINS);
2473 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2474 (int)RS6000_BUILTIN_COUNT);
2476 if (TARGET_VSX)
2477 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2478 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2482 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2483 legitimate address support to figure out the appropriate addressing to
2484 use. */
2486 static void
2487 rs6000_setup_reg_addr_masks (void)
2489 ssize_t rc, reg, m, nregs;
2490 addr_mask_type any_addr_mask, addr_mask;
2492 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2494 machine_mode m2 = (machine_mode)m;
2496 /* SDmode is special in that we want to access it only via REG+REG
2497 addressing on power7 and above, since we want to use the LFIWZX and
2498 STFIWZX instructions to load it. */
2499 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2501 any_addr_mask = 0;
2502 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2504 addr_mask = 0;
2505 reg = reload_reg_map[rc].reg;
2507 /* Can mode values go in the GPR/FPR/Altivec registers? */
2508 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2510 nregs = rs6000_hard_regno_nregs[m][reg];
2511 addr_mask |= RELOAD_REG_VALID;
2513 /* Indicate if the mode takes more than 1 physical register. If
2514 it takes a single register, indicate it can do REG+REG
2515 addressing. */
2516 if (nregs > 1 || m == BLKmode)
2517 addr_mask |= RELOAD_REG_MULTIPLE;
2518 else
2519 addr_mask |= RELOAD_REG_INDEXED;
2521 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2522 addressing. Restrict addressing on SPE for 64-bit types
2523 because of the SUBREG hackery used to address 64-bit floats in
2524 '32-bit' GPRs. */
2526 if (TARGET_UPDATE
2527 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2528 && GET_MODE_SIZE (m2) <= 8
2529 && !VECTOR_MODE_P (m2)
2530 && !COMPLEX_MODE_P (m2)
2531 && !indexed_only_p
2532 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2534 addr_mask |= RELOAD_REG_PRE_INCDEC;
2536 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2537 we don't allow PRE_MODIFY for some multi-register
2538 operations. */
2539 switch (m)
2541 default:
2542 addr_mask |= RELOAD_REG_PRE_MODIFY;
2543 break;
2545 case DImode:
2546 if (TARGET_POWERPC64)
2547 addr_mask |= RELOAD_REG_PRE_MODIFY;
2548 break;
2550 case DFmode:
2551 case DDmode:
2552 if (TARGET_DF_INSN)
2553 addr_mask |= RELOAD_REG_PRE_MODIFY;
2554 break;
2559 /* GPR and FPR registers can do REG+OFFSET addressing, except
2560 possibly for SDmode. */
2561 if ((addr_mask != 0) && !indexed_only_p
2562 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2563 addr_mask |= RELOAD_REG_OFFSET;
2565 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2566 addressing on 128-bit types. */
2567 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2568 && (addr_mask & RELOAD_REG_VALID) != 0)
2569 addr_mask |= RELOAD_REG_AND_M16;
2571 reg_addr[m].addr_mask[rc] = addr_mask;
2572 any_addr_mask |= addr_mask;
2575 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2580 /* Initialize the various global tables that are based on register size. */
2581 static void
2582 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2584 ssize_t r, m, c;
2585 int align64;
2586 int align32;
2588 /* Precalculate REGNO_REG_CLASS. */
2589 rs6000_regno_regclass[0] = GENERAL_REGS;
2590 for (r = 1; r < 32; ++r)
2591 rs6000_regno_regclass[r] = BASE_REGS;
2593 for (r = 32; r < 64; ++r)
2594 rs6000_regno_regclass[r] = FLOAT_REGS;
2596 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2597 rs6000_regno_regclass[r] = NO_REGS;
2599 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2600 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2602 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2603 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2604 rs6000_regno_regclass[r] = CR_REGS;
2606 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2607 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2608 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2609 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2610 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2611 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2612 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2613 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2614 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2615 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2616 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2617 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2619 /* Precalculate register class to simpler reload register class. We don't
2620 need all of the register classes that are combinations of different
2621 classes, just the simple ones that have constraint letters. */
2622 for (c = 0; c < N_REG_CLASSES; c++)
2623 reg_class_to_reg_type[c] = NO_REG_TYPE;
2625 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2626 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2627 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2628 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2629 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2630 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2631 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2632 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2633 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2634 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2635 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2636 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2638 if (TARGET_VSX)
2640 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2641 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2643 else
2645 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2646 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2649 /* Precalculate the valid memory formats as well as the vector information,
2650 this must be set up before the rs6000_hard_regno_nregs_internal calls
2651 below. */
2652 gcc_assert ((int)VECTOR_NONE == 0);
2653 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2654 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2656 gcc_assert ((int)CODE_FOR_nothing == 0);
2657 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2659 gcc_assert ((int)NO_REGS == 0);
2660 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2662 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2663 believes it can use native alignment or still uses 128-bit alignment. */
2664 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2666 align64 = 64;
2667 align32 = 32;
2669 else
2671 align64 = 128;
2672 align32 = 128;
2675 /* V2DF mode, VSX only. */
2676 if (TARGET_VSX)
2678 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2679 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2680 rs6000_vector_align[V2DFmode] = align64;
2683 /* V4SF mode, either VSX or Altivec. */
2684 if (TARGET_VSX)
2686 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2687 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2688 rs6000_vector_align[V4SFmode] = align32;
2690 else if (TARGET_ALTIVEC)
2692 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2693 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2694 rs6000_vector_align[V4SFmode] = align32;
2697 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2698 and stores. */
2699 if (TARGET_ALTIVEC)
2701 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2702 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2703 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2704 rs6000_vector_align[V4SImode] = align32;
2705 rs6000_vector_align[V8HImode] = align32;
2706 rs6000_vector_align[V16QImode] = align32;
2708 if (TARGET_VSX)
2710 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2711 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2712 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2714 else
2716 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2717 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2718 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2722 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2723 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2724 if (TARGET_VSX)
2726 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2727 rs6000_vector_unit[V2DImode]
2728 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2729 rs6000_vector_align[V2DImode] = align64;
2731 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2732 rs6000_vector_unit[V1TImode]
2733 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2734 rs6000_vector_align[V1TImode] = 128;
2737 /* DFmode, see if we want to use the VSX unit. Memory is handled
2738 differently, so don't set rs6000_vector_mem. */
2739 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2741 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2742 rs6000_vector_align[DFmode] = 64;
2745 /* SFmode, see if we want to use the VSX unit. */
2746 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2748 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2749 rs6000_vector_align[SFmode] = 32;
2752 /* Allow TImode in VSX register and set the VSX memory macros. */
2753 if (TARGET_VSX && TARGET_VSX_TIMODE)
2755 rs6000_vector_mem[TImode] = VECTOR_VSX;
2756 rs6000_vector_align[TImode] = align64;
2759 /* TODO add SPE and paired floating point vector support. */
2761 /* Register class constraints for the constraints that depend on compile
2762 switches. When the VSX code was added, different constraints were added
2763 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2764 of the VSX registers are used. The register classes for scalar floating
2765 point types is set, based on whether we allow that type into the upper
2766 (Altivec) registers. GCC has register classes to target the Altivec
2767 registers for load/store operations, to select using a VSX memory
2768 operation instead of the traditional floating point operation. The
2769 constraints are:
2771 d - Register class to use with traditional DFmode instructions.
2772 f - Register class to use with traditional SFmode instructions.
2773 v - Altivec register.
2774 wa - Any VSX register.
2775 wc - Reserved to represent individual CR bits (used in LLVM).
2776 wd - Preferred register class for V2DFmode.
2777 wf - Preferred register class for V4SFmode.
2778 wg - Float register for power6x move insns.
2779 wh - FP register for direct move instructions.
2780 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2781 wj - FP or VSX register to hold 64-bit integers for direct moves.
2782 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2783 wl - Float register if we can do 32-bit signed int loads.
2784 wm - VSX register for ISA 2.07 direct move operations.
2785 wn - always NO_REGS.
2786 wr - GPR if 64-bit mode is permitted.
2787 ws - Register class to do ISA 2.06 DF operations.
2788 wt - VSX register for TImode in VSX registers.
2789 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2790 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2791 ww - Register class to do SF conversions in with VSX operations.
2792 wx - Float register if we can do 32-bit int stores.
2793 wy - Register class to do ISA 2.07 SF operations.
2794 wz - Float register if we can do 32-bit unsigned int loads. */
2796 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2797 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2799 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2800 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2802 if (TARGET_VSX)
2804 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2805 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2806 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2807 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2809 if (TARGET_VSX_TIMODE)
2810 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2812 if (TARGET_UPPER_REGS_DF) /* DFmode */
2814 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2815 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2817 else
2818 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2821 /* Add conditional constraints based on various options, to allow us to
2822 collapse multiple insn patterns. */
2823 if (TARGET_ALTIVEC)
2824 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2826 if (TARGET_MFPGPR) /* DFmode */
2827 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2829 if (TARGET_LFIWAX)
2830 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2832 if (TARGET_DIRECT_MOVE)
2834 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2835 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2836 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2837 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2838 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2839 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2842 if (TARGET_POWERPC64)
2843 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2845 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2847 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2848 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2849 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2851 else if (TARGET_P8_VECTOR)
2853 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2854 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2856 else if (TARGET_VSX)
2857 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2859 if (TARGET_STFIWX)
2860 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2862 if (TARGET_LFIWZX)
2863 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2865 /* Set up the reload helper and direct move functions. */
2866 if (TARGET_VSX || TARGET_ALTIVEC)
2868 if (TARGET_64BIT)
2870 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2871 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2872 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2873 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2874 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2875 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2876 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2877 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2878 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2879 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2880 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2881 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2882 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2883 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2884 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2885 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2886 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2887 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2888 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2889 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2891 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2892 available. */
2893 if (TARGET_NO_SDMODE_STACK)
2895 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2896 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2899 if (TARGET_VSX_TIMODE)
2901 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2902 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2905 if (TARGET_DIRECT_MOVE)
2907 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2908 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2909 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2910 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2911 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2912 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2913 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2914 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2915 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2917 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2918 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2919 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2920 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2921 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2922 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2923 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2924 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2925 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2928 else
2930 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2931 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2932 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2933 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2934 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2935 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2936 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2937 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2938 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2939 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2940 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2941 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2942 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2943 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2944 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2945 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2946 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2947 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2948 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2949 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2951 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2952 available. */
2953 if (TARGET_NO_SDMODE_STACK)
2955 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2956 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2959 if (TARGET_VSX_TIMODE)
2961 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2962 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2965 if (TARGET_DIRECT_MOVE)
2967 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2968 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2969 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2973 if (TARGET_UPPER_REGS_DF)
2974 reg_addr[DFmode].scalar_in_vmx_p = true;
2976 if (TARGET_UPPER_REGS_SF)
2977 reg_addr[SFmode].scalar_in_vmx_p = true;
2980 /* Precalculate HARD_REGNO_NREGS. */
2981 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2982 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2983 rs6000_hard_regno_nregs[m][r]
2984 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2986 /* Precalculate HARD_REGNO_MODE_OK. */
2987 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2988 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2989 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2990 rs6000_hard_regno_mode_ok_p[m][r] = true;
2992 /* Precalculate CLASS_MAX_NREGS sizes. */
2993 for (c = 0; c < LIM_REG_CLASSES; ++c)
2995 int reg_size;
2997 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2998 reg_size = UNITS_PER_VSX_WORD;
3000 else if (c == ALTIVEC_REGS)
3001 reg_size = UNITS_PER_ALTIVEC_WORD;
3003 else if (c == FLOAT_REGS)
3004 reg_size = UNITS_PER_FP_WORD;
3006 else
3007 reg_size = UNITS_PER_WORD;
3009 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3011 machine_mode m2 = (machine_mode)m;
3012 int reg_size2 = reg_size;
3014 /* TFmode/TDmode always takes 2 registers, even in VSX. */
3015 if (TARGET_VSX && VSX_REG_CLASS_P (c)
3016 && (m == TDmode || m == TFmode))
3017 reg_size2 = UNITS_PER_FP_WORD;
3019 rs6000_class_max_nregs[m][c]
3020 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3024 if (TARGET_E500_DOUBLE)
3025 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3027 /* Calculate which modes to automatically generate code to use a the
3028 reciprocal divide and square root instructions. In the future, possibly
3029 automatically generate the instructions even if the user did not specify
3030 -mrecip. The older machines double precision reciprocal sqrt estimate is
3031 not accurate enough. */
3032 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3033 if (TARGET_FRES)
3034 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3035 if (TARGET_FRE)
3036 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3037 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3038 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3039 if (VECTOR_UNIT_VSX_P (V2DFmode))
3040 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3042 if (TARGET_FRSQRTES)
3043 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3044 if (TARGET_FRSQRTE)
3045 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3046 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3047 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3048 if (VECTOR_UNIT_VSX_P (V2DFmode))
3049 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3051 if (rs6000_recip_control)
3053 if (!flag_finite_math_only)
3054 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3055 if (flag_trapping_math)
3056 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3057 if (!flag_reciprocal_math)
3058 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3059 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3061 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3062 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3063 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3065 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3066 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3067 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3069 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3070 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3071 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3073 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3074 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3075 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3077 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3078 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3079 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3081 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3082 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3083 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3085 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3086 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3087 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3089 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3090 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3091 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3095 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3096 legitimate address support to figure out the appropriate addressing to
3097 use. */
3098 rs6000_setup_reg_addr_masks ();
3100 if (global_init_p || TARGET_DEBUG_TARGET)
3102 if (TARGET_DEBUG_REG)
3103 rs6000_debug_reg_global ();
3105 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3106 fprintf (stderr,
3107 "SImode variable mult cost = %d\n"
3108 "SImode constant mult cost = %d\n"
3109 "SImode short constant mult cost = %d\n"
3110 "DImode multipliciation cost = %d\n"
3111 "SImode division cost = %d\n"
3112 "DImode division cost = %d\n"
3113 "Simple fp operation cost = %d\n"
3114 "DFmode multiplication cost = %d\n"
3115 "SFmode division cost = %d\n"
3116 "DFmode division cost = %d\n"
3117 "cache line size = %d\n"
3118 "l1 cache size = %d\n"
3119 "l2 cache size = %d\n"
3120 "simultaneous prefetches = %d\n"
3121 "\n",
3122 rs6000_cost->mulsi,
3123 rs6000_cost->mulsi_const,
3124 rs6000_cost->mulsi_const9,
3125 rs6000_cost->muldi,
3126 rs6000_cost->divsi,
3127 rs6000_cost->divdi,
3128 rs6000_cost->fp,
3129 rs6000_cost->dmul,
3130 rs6000_cost->sdiv,
3131 rs6000_cost->ddiv,
3132 rs6000_cost->cache_line_size,
3133 rs6000_cost->l1_cache_size,
3134 rs6000_cost->l2_cache_size,
3135 rs6000_cost->simultaneous_prefetches);
3139 #if TARGET_MACHO
3140 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3142 static void
3143 darwin_rs6000_override_options (void)
3145 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3146 off. */
3147 rs6000_altivec_abi = 1;
3148 TARGET_ALTIVEC_VRSAVE = 1;
3149 rs6000_current_abi = ABI_DARWIN;
3151 if (DEFAULT_ABI == ABI_DARWIN
3152 && TARGET_64BIT)
3153 darwin_one_byte_bool = 1;
3155 if (TARGET_64BIT && ! TARGET_POWERPC64)
3157 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3158 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3160 if (flag_mkernel)
3162 rs6000_default_long_calls = 1;
3163 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3166 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3167 Altivec. */
3168 if (!flag_mkernel && !flag_apple_kext
3169 && TARGET_64BIT
3170 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3171 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3173 /* Unless the user (not the configurer) has explicitly overridden
3174 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3175 G4 unless targeting the kernel. */
3176 if (!flag_mkernel
3177 && !flag_apple_kext
3178 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3179 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3180 && ! global_options_set.x_rs6000_cpu_index)
3182 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3185 #endif
3187 /* If not otherwise specified by a target, make 'long double' equivalent to
3188 'double'. */
3190 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3191 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3192 #endif
3194 /* Return the builtin mask of the various options used that could affect which
3195 builtins were used. In the past we used target_flags, but we've run out of
3196 bits, and some options like SPE and PAIRED are no longer in
3197 target_flags. */
3199 HOST_WIDE_INT
3200 rs6000_builtin_mask_calculate (void)
3202 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3203 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3204 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3205 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3206 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3207 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3208 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3209 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3210 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3211 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3212 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3213 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3214 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3215 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3216 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3217 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3220 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3221 to clobber the XER[CA] bit because clobbering that bit without telling
3222 the compiler worked just fine with versions of GCC before GCC 5, and
3223 breaking a lot of older code in ways that are hard to track down is
3224 not such a great idea. */
3226 static rtx_insn *
3227 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3228 vec<const char *> &/*constraints*/,
3229 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3231 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3232 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3233 return NULL;
3236 /* Override command line options. Mostly we process the processor type and
3237 sometimes adjust other TARGET_ options. */
3239 static bool
3240 rs6000_option_override_internal (bool global_init_p)
3242 bool ret = true;
3243 bool have_cpu = false;
3245 /* The default cpu requested at configure time, if any. */
3246 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3248 HOST_WIDE_INT set_masks;
3249 int cpu_index;
3250 int tune_index;
3251 struct cl_target_option *main_target_opt
3252 = ((global_init_p || target_option_default_node == NULL)
3253 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3255 /* Print defaults. */
3256 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3257 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3259 /* Remember the explicit arguments. */
3260 if (global_init_p)
3261 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3263 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3264 library functions, so warn about it. The flag may be useful for
3265 performance studies from time to time though, so don't disable it
3266 entirely. */
3267 if (global_options_set.x_rs6000_alignment_flags
3268 && rs6000_alignment_flags == MASK_ALIGN_POWER
3269 && DEFAULT_ABI == ABI_DARWIN
3270 && TARGET_64BIT)
3271 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3272 " it is incompatible with the installed C and C++ libraries");
3274 /* Numerous experiment shows that IRA based loop pressure
3275 calculation works better for RTL loop invariant motion on targets
3276 with enough (>= 32) registers. It is an expensive optimization.
3277 So it is on only for peak performance. */
3278 if (optimize >= 3 && global_init_p
3279 && !global_options_set.x_flag_ira_loop_pressure)
3280 flag_ira_loop_pressure = 1;
3282 /* Set the pointer size. */
3283 if (TARGET_64BIT)
3285 rs6000_pmode = (int)DImode;
3286 rs6000_pointer_size = 64;
3288 else
3290 rs6000_pmode = (int)SImode;
3291 rs6000_pointer_size = 32;
3294 /* Some OSs don't support saving the high part of 64-bit registers on context
3295 switch. Other OSs don't support saving Altivec registers. On those OSs,
3296 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3297 if the user wants either, the user must explicitly specify them and we
3298 won't interfere with the user's specification. */
3300 set_masks = POWERPC_MASKS;
3301 #ifdef OS_MISSING_POWERPC64
3302 if (OS_MISSING_POWERPC64)
3303 set_masks &= ~OPTION_MASK_POWERPC64;
3304 #endif
3305 #ifdef OS_MISSING_ALTIVEC
3306 if (OS_MISSING_ALTIVEC)
3307 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3308 #endif
3310 /* Don't override by the processor default if given explicitly. */
3311 set_masks &= ~rs6000_isa_flags_explicit;
3313 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3314 the cpu in a target attribute or pragma, but did not specify a tuning
3315 option, use the cpu for the tuning option rather than the option specified
3316 with -mtune on the command line. Process a '--with-cpu' configuration
3317 request as an implicit --cpu. */
3318 if (rs6000_cpu_index >= 0)
3320 cpu_index = rs6000_cpu_index;
3321 have_cpu = true;
3323 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3325 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3326 have_cpu = true;
3328 else if (implicit_cpu)
3330 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3331 have_cpu = true;
3333 else
3335 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3336 const char *default_cpu = ((!TARGET_POWERPC64)
3337 ? "powerpc"
3338 : ((BYTES_BIG_ENDIAN)
3339 ? "powerpc64"
3340 : "powerpc64le"));
3342 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3343 have_cpu = false;
3346 gcc_assert (cpu_index >= 0);
3348 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3349 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3350 with those from the cpu, except for options that were explicitly set. If
3351 we don't have a cpu, do not override the target bits set in
3352 TARGET_DEFAULT. */
3353 if (have_cpu)
3355 rs6000_isa_flags &= ~set_masks;
3356 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3357 & set_masks);
3359 else
3361 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3362 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3363 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3364 to using rs6000_isa_flags, we need to do the initialization here.
3366 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3367 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3368 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3369 : processor_target_table[cpu_index].target_enable);
3370 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3373 if (rs6000_tune_index >= 0)
3374 tune_index = rs6000_tune_index;
3375 else if (have_cpu)
3376 rs6000_tune_index = tune_index = cpu_index;
3377 else
3379 size_t i;
3380 enum processor_type tune_proc
3381 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3383 tune_index = -1;
3384 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3385 if (processor_target_table[i].processor == tune_proc)
3387 rs6000_tune_index = tune_index = i;
3388 break;
3392 gcc_assert (tune_index >= 0);
3393 rs6000_cpu = processor_target_table[tune_index].processor;
3395 /* Pick defaults for SPE related control flags. Do this early to make sure
3396 that the TARGET_ macros are representative ASAP. */
3398 int spe_capable_cpu =
3399 (rs6000_cpu == PROCESSOR_PPC8540
3400 || rs6000_cpu == PROCESSOR_PPC8548);
3402 if (!global_options_set.x_rs6000_spe_abi)
3403 rs6000_spe_abi = spe_capable_cpu;
3405 if (!global_options_set.x_rs6000_spe)
3406 rs6000_spe = spe_capable_cpu;
3408 if (!global_options_set.x_rs6000_float_gprs)
3409 rs6000_float_gprs =
3410 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3411 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3412 : 0);
3415 if (global_options_set.x_rs6000_spe_abi
3416 && rs6000_spe_abi
3417 && !TARGET_SPE_ABI)
3418 error ("not configured for SPE ABI");
3420 if (global_options_set.x_rs6000_spe
3421 && rs6000_spe
3422 && !TARGET_SPE)
3423 error ("not configured for SPE instruction set");
3425 if (main_target_opt != NULL
3426 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3427 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3428 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3429 error ("target attribute or pragma changes SPE ABI");
3431 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3432 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3433 || rs6000_cpu == PROCESSOR_PPCE5500)
3435 if (TARGET_ALTIVEC)
3436 error ("AltiVec not supported in this target");
3437 if (TARGET_SPE)
3438 error ("SPE not supported in this target");
3440 if (rs6000_cpu == PROCESSOR_PPCE6500)
3442 if (TARGET_SPE)
3443 error ("SPE not supported in this target");
3446 /* Disable Cell microcode if we are optimizing for the Cell
3447 and not optimizing for size. */
3448 if (rs6000_gen_cell_microcode == -1)
3449 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3450 && !optimize_size);
3452 /* If we are optimizing big endian systems for space and it's OK to
3453 use instructions that would be microcoded on the Cell, use the
3454 load/store multiple and string instructions. */
3455 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3456 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3457 | OPTION_MASK_STRING);
3459 /* Don't allow -mmultiple or -mstring on little endian systems
3460 unless the cpu is a 750, because the hardware doesn't support the
3461 instructions used in little endian mode, and causes an alignment
3462 trap. The 750 does not cause an alignment trap (except when the
3463 target is unaligned). */
3465 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3467 if (TARGET_MULTIPLE)
3469 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3470 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3471 warning (0, "-mmultiple is not supported on little endian systems");
3474 if (TARGET_STRING)
3476 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3477 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3478 warning (0, "-mstring is not supported on little endian systems");
3482 /* If little-endian, default to -mstrict-align on older processors.
3483 Testing for htm matches power8 and later. */
3484 if (!BYTES_BIG_ENDIAN
3485 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3486 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3488 /* -maltivec={le,be} implies -maltivec. */
3489 if (rs6000_altivec_element_order != 0)
3490 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3492 /* Disallow -maltivec=le in big endian mode for now. This is not
3493 known to be useful for anyone. */
3494 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3496 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3497 rs6000_altivec_element_order = 0;
3500 /* Add some warnings for VSX. */
3501 if (TARGET_VSX)
3503 const char *msg = NULL;
3504 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3505 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3507 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3508 msg = N_("-mvsx requires hardware floating point");
3509 else
3511 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3512 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3515 else if (TARGET_PAIRED_FLOAT)
3516 msg = N_("-mvsx and -mpaired are incompatible");
3517 else if (TARGET_AVOID_XFORM > 0)
3518 msg = N_("-mvsx needs indexed addressing");
3519 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3520 & OPTION_MASK_ALTIVEC))
3522 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3523 msg = N_("-mvsx and -mno-altivec are incompatible");
3524 else
3525 msg = N_("-mno-altivec disables vsx");
3528 if (msg)
3530 warning (0, msg);
3531 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3532 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3536 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3537 the -mcpu setting to enable options that conflict. */
3538 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3539 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3540 | OPTION_MASK_ALTIVEC
3541 | OPTION_MASK_VSX)) != 0)
3542 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3543 | OPTION_MASK_DIRECT_MOVE)
3544 & ~rs6000_isa_flags_explicit);
3546 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3547 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3549 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3550 unless the user explicitly used the -mno-<option> to disable the code. */
3551 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3552 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3553 else if (TARGET_VSX)
3554 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3555 else if (TARGET_POPCNTD)
3556 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3557 else if (TARGET_DFP)
3558 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3559 else if (TARGET_CMPB)
3560 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3561 else if (TARGET_FPRND)
3562 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3563 else if (TARGET_POPCNTB)
3564 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3565 else if (TARGET_ALTIVEC)
3566 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3568 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3570 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3571 error ("-mcrypto requires -maltivec");
3572 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3575 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3577 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3578 error ("-mdirect-move requires -mvsx");
3579 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3582 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3584 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3585 error ("-mpower8-vector requires -maltivec");
3586 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3589 if (TARGET_P8_VECTOR && !TARGET_VSX)
3591 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3592 error ("-mpower8-vector requires -mvsx");
3593 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3596 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3598 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3599 error ("-mvsx-timode requires -mvsx");
3600 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3603 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3605 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3606 error ("-mhard-dfp requires -mhard-float");
3607 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3610 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3611 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3612 the individual option. */
3613 if (TARGET_UPPER_REGS > 0)
3615 if (TARGET_VSX
3616 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3618 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3619 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3621 if (TARGET_P8_VECTOR
3622 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3624 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3625 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3628 else if (TARGET_UPPER_REGS == 0)
3630 if (TARGET_VSX
3631 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3633 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3634 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3636 if (TARGET_P8_VECTOR
3637 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3639 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3640 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3644 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3646 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3647 error ("-mupper-regs-df requires -mvsx");
3648 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3651 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3653 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3654 error ("-mupper-regs-sf requires -mpower8-vector");
3655 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3658 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3659 silently turn off quad memory mode. */
3660 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3662 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3663 warning (0, N_("-mquad-memory requires 64-bit mode"));
3665 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3666 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3668 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3669 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3672 /* Non-atomic quad memory load/store are disabled for little endian, since
3673 the words are reversed, but atomic operations can still be done by
3674 swapping the words. */
3675 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3677 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3678 warning (0, N_("-mquad-memory is not available in little endian mode"));
3680 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3683 /* Assume if the user asked for normal quad memory instructions, they want
3684 the atomic versions as well, unless they explicity told us not to use quad
3685 word atomic instructions. */
3686 if (TARGET_QUAD_MEMORY
3687 && !TARGET_QUAD_MEMORY_ATOMIC
3688 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3689 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3691 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3692 generating power8 instructions. */
3693 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3694 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3695 & OPTION_MASK_P8_FUSION);
3697 /* Power8 does not fuse sign extended loads with the addis. If we are
3698 optimizing at high levels for speed, convert a sign extended load into a
3699 zero extending load, and an explicit sign extension. */
3700 if (TARGET_P8_FUSION
3701 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3702 && optimize_function_for_speed_p (cfun)
3703 && optimize >= 3)
3704 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3706 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3707 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3709 /* E500mc does "better" if we inline more aggressively. Respect the
3710 user's opinion, though. */
3711 if (rs6000_block_move_inline_limit == 0
3712 && (rs6000_cpu == PROCESSOR_PPCE500MC
3713 || rs6000_cpu == PROCESSOR_PPCE500MC64
3714 || rs6000_cpu == PROCESSOR_PPCE5500
3715 || rs6000_cpu == PROCESSOR_PPCE6500))
3716 rs6000_block_move_inline_limit = 128;
3718 /* store_one_arg depends on expand_block_move to handle at least the
3719 size of reg_parm_stack_space. */
3720 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3721 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3723 if (global_init_p)
3725 /* If the appropriate debug option is enabled, replace the target hooks
3726 with debug versions that call the real version and then prints
3727 debugging information. */
3728 if (TARGET_DEBUG_COST)
3730 targetm.rtx_costs = rs6000_debug_rtx_costs;
3731 targetm.address_cost = rs6000_debug_address_cost;
3732 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3735 if (TARGET_DEBUG_ADDR)
3737 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3738 targetm.legitimize_address = rs6000_debug_legitimize_address;
3739 rs6000_secondary_reload_class_ptr
3740 = rs6000_debug_secondary_reload_class;
3741 rs6000_secondary_memory_needed_ptr
3742 = rs6000_debug_secondary_memory_needed;
3743 rs6000_cannot_change_mode_class_ptr
3744 = rs6000_debug_cannot_change_mode_class;
3745 rs6000_preferred_reload_class_ptr
3746 = rs6000_debug_preferred_reload_class;
3747 rs6000_legitimize_reload_address_ptr
3748 = rs6000_debug_legitimize_reload_address;
3749 rs6000_mode_dependent_address_ptr
3750 = rs6000_debug_mode_dependent_address;
3753 if (rs6000_veclibabi_name)
3755 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3756 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3757 else
3759 error ("unknown vectorization library ABI type (%s) for "
3760 "-mveclibabi= switch", rs6000_veclibabi_name);
3761 ret = false;
3766 if (!global_options_set.x_rs6000_long_double_type_size)
3768 if (main_target_opt != NULL
3769 && (main_target_opt->x_rs6000_long_double_type_size
3770 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3771 error ("target attribute or pragma changes long double size");
3772 else
3773 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3776 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3777 if (!global_options_set.x_rs6000_ieeequad)
3778 rs6000_ieeequad = 1;
3779 #endif
3781 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3782 target attribute or pragma which automatically enables both options,
3783 unless the altivec ABI was set. This is set by default for 64-bit, but
3784 not for 32-bit. */
3785 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3786 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3787 & ~rs6000_isa_flags_explicit);
3789 /* Enable Altivec ABI for AIX -maltivec. */
3790 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3792 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3793 error ("target attribute or pragma changes AltiVec ABI");
3794 else
3795 rs6000_altivec_abi = 1;
3798 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3799 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3800 be explicitly overridden in either case. */
3801 if (TARGET_ELF)
3803 if (!global_options_set.x_rs6000_altivec_abi
3804 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3806 if (main_target_opt != NULL &&
3807 !main_target_opt->x_rs6000_altivec_abi)
3808 error ("target attribute or pragma changes AltiVec ABI");
3809 else
3810 rs6000_altivec_abi = 1;
3814 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3815 So far, the only darwin64 targets are also MACH-O. */
3816 if (TARGET_MACHO
3817 && DEFAULT_ABI == ABI_DARWIN
3818 && TARGET_64BIT)
3820 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3821 error ("target attribute or pragma changes darwin64 ABI");
3822 else
3824 rs6000_darwin64_abi = 1;
3825 /* Default to natural alignment, for better performance. */
3826 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3830 /* Place FP constants in the constant pool instead of TOC
3831 if section anchors enabled. */
3832 if (flag_section_anchors
3833 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3834 TARGET_NO_FP_IN_TOC = 1;
3836 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3837 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3839 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3840 SUBTARGET_OVERRIDE_OPTIONS;
3841 #endif
3842 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3843 SUBSUBTARGET_OVERRIDE_OPTIONS;
3844 #endif
3845 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3846 SUB3TARGET_OVERRIDE_OPTIONS;
3847 #endif
3849 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3850 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3852 /* For the E500 family of cores, reset the single/double FP flags to let us
3853 check that they remain constant across attributes or pragmas. Also,
3854 clear a possible request for string instructions, not supported and which
3855 we might have silently queried above for -Os.
3857 For other families, clear ISEL in case it was set implicitly.
3860 switch (rs6000_cpu)
3862 case PROCESSOR_PPC8540:
3863 case PROCESSOR_PPC8548:
3864 case PROCESSOR_PPCE500MC:
3865 case PROCESSOR_PPCE500MC64:
3866 case PROCESSOR_PPCE5500:
3867 case PROCESSOR_PPCE6500:
3869 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3870 rs6000_double_float = TARGET_E500_DOUBLE;
3872 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3874 break;
3876 default:
3878 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3879 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3881 break;
3884 if (main_target_opt)
3886 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3887 error ("target attribute or pragma changes single precision floating "
3888 "point");
3889 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3890 error ("target attribute or pragma changes double precision floating "
3891 "point");
3894 /* Detect invalid option combinations with E500. */
3895 CHECK_E500_OPTIONS;
3897 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3898 && rs6000_cpu != PROCESSOR_POWER5
3899 && rs6000_cpu != PROCESSOR_POWER6
3900 && rs6000_cpu != PROCESSOR_POWER7
3901 && rs6000_cpu != PROCESSOR_POWER8
3902 && rs6000_cpu != PROCESSOR_PPCA2
3903 && rs6000_cpu != PROCESSOR_CELL
3904 && rs6000_cpu != PROCESSOR_PPC476);
3905 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3906 || rs6000_cpu == PROCESSOR_POWER5
3907 || rs6000_cpu == PROCESSOR_POWER7
3908 || rs6000_cpu == PROCESSOR_POWER8);
3909 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3910 || rs6000_cpu == PROCESSOR_POWER5
3911 || rs6000_cpu == PROCESSOR_POWER6
3912 || rs6000_cpu == PROCESSOR_POWER7
3913 || rs6000_cpu == PROCESSOR_POWER8
3914 || rs6000_cpu == PROCESSOR_PPCE500MC
3915 || rs6000_cpu == PROCESSOR_PPCE500MC64
3916 || rs6000_cpu == PROCESSOR_PPCE5500
3917 || rs6000_cpu == PROCESSOR_PPCE6500);
3919 /* Allow debug switches to override the above settings. These are set to -1
3920 in rs6000.opt to indicate the user hasn't directly set the switch. */
3921 if (TARGET_ALWAYS_HINT >= 0)
3922 rs6000_always_hint = TARGET_ALWAYS_HINT;
3924 if (TARGET_SCHED_GROUPS >= 0)
3925 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3927 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3928 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3930 rs6000_sched_restricted_insns_priority
3931 = (rs6000_sched_groups ? 1 : 0);
3933 /* Handle -msched-costly-dep option. */
3934 rs6000_sched_costly_dep
3935 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3937 if (rs6000_sched_costly_dep_str)
3939 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3940 rs6000_sched_costly_dep = no_dep_costly;
3941 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3942 rs6000_sched_costly_dep = all_deps_costly;
3943 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3944 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3945 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3946 rs6000_sched_costly_dep = store_to_load_dep_costly;
3947 else
3948 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3949 atoi (rs6000_sched_costly_dep_str));
3952 /* Handle -minsert-sched-nops option. */
3953 rs6000_sched_insert_nops
3954 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3956 if (rs6000_sched_insert_nops_str)
3958 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3959 rs6000_sched_insert_nops = sched_finish_none;
3960 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3961 rs6000_sched_insert_nops = sched_finish_pad_groups;
3962 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3963 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3964 else
3965 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3966 atoi (rs6000_sched_insert_nops_str));
3969 if (global_init_p)
3971 #ifdef TARGET_REGNAMES
3972 /* If the user desires alternate register names, copy in the
3973 alternate names now. */
3974 if (TARGET_REGNAMES)
3975 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3976 #endif
3978 /* Set aix_struct_return last, after the ABI is determined.
3979 If -maix-struct-return or -msvr4-struct-return was explicitly
3980 used, don't override with the ABI default. */
3981 if (!global_options_set.x_aix_struct_return)
3982 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3984 #if 0
3985 /* IBM XL compiler defaults to unsigned bitfields. */
3986 if (TARGET_XL_COMPAT)
3987 flag_signed_bitfields = 0;
3988 #endif
3990 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3991 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3993 if (TARGET_TOC)
3994 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3996 /* We can only guarantee the availability of DI pseudo-ops when
3997 assembling for 64-bit targets. */
3998 if (!TARGET_64BIT)
4000 targetm.asm_out.aligned_op.di = NULL;
4001 targetm.asm_out.unaligned_op.di = NULL;
4005 /* Set branch target alignment, if not optimizing for size. */
4006 if (!optimize_size)
4008 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4009 aligned 8byte to avoid misprediction by the branch predictor. */
4010 if (rs6000_cpu == PROCESSOR_TITAN
4011 || rs6000_cpu == PROCESSOR_CELL)
4013 if (align_functions <= 0)
4014 align_functions = 8;
4015 if (align_jumps <= 0)
4016 align_jumps = 8;
4017 if (align_loops <= 0)
4018 align_loops = 8;
4020 if (rs6000_align_branch_targets)
4022 if (align_functions <= 0)
4023 align_functions = 16;
4024 if (align_jumps <= 0)
4025 align_jumps = 16;
4026 if (align_loops <= 0)
4028 can_override_loop_align = 1;
4029 align_loops = 16;
4032 if (align_jumps_max_skip <= 0)
4033 align_jumps_max_skip = 15;
4034 if (align_loops_max_skip <= 0)
4035 align_loops_max_skip = 15;
4038 /* Arrange to save and restore machine status around nested functions. */
4039 init_machine_status = rs6000_init_machine_status;
4041 /* We should always be splitting complex arguments, but we can't break
4042 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4043 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4044 targetm.calls.split_complex_arg = NULL;
4047 /* Initialize rs6000_cost with the appropriate target costs. */
4048 if (optimize_size)
4049 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4050 else
4051 switch (rs6000_cpu)
4053 case PROCESSOR_RS64A:
4054 rs6000_cost = &rs64a_cost;
4055 break;
4057 case PROCESSOR_MPCCORE:
4058 rs6000_cost = &mpccore_cost;
4059 break;
4061 case PROCESSOR_PPC403:
4062 rs6000_cost = &ppc403_cost;
4063 break;
4065 case PROCESSOR_PPC405:
4066 rs6000_cost = &ppc405_cost;
4067 break;
4069 case PROCESSOR_PPC440:
4070 rs6000_cost = &ppc440_cost;
4071 break;
4073 case PROCESSOR_PPC476:
4074 rs6000_cost = &ppc476_cost;
4075 break;
4077 case PROCESSOR_PPC601:
4078 rs6000_cost = &ppc601_cost;
4079 break;
4081 case PROCESSOR_PPC603:
4082 rs6000_cost = &ppc603_cost;
4083 break;
4085 case PROCESSOR_PPC604:
4086 rs6000_cost = &ppc604_cost;
4087 break;
4089 case PROCESSOR_PPC604e:
4090 rs6000_cost = &ppc604e_cost;
4091 break;
4093 case PROCESSOR_PPC620:
4094 rs6000_cost = &ppc620_cost;
4095 break;
4097 case PROCESSOR_PPC630:
4098 rs6000_cost = &ppc630_cost;
4099 break;
4101 case PROCESSOR_CELL:
4102 rs6000_cost = &ppccell_cost;
4103 break;
4105 case PROCESSOR_PPC750:
4106 case PROCESSOR_PPC7400:
4107 rs6000_cost = &ppc750_cost;
4108 break;
4110 case PROCESSOR_PPC7450:
4111 rs6000_cost = &ppc7450_cost;
4112 break;
4114 case PROCESSOR_PPC8540:
4115 case PROCESSOR_PPC8548:
4116 rs6000_cost = &ppc8540_cost;
4117 break;
4119 case PROCESSOR_PPCE300C2:
4120 case PROCESSOR_PPCE300C3:
4121 rs6000_cost = &ppce300c2c3_cost;
4122 break;
4124 case PROCESSOR_PPCE500MC:
4125 rs6000_cost = &ppce500mc_cost;
4126 break;
4128 case PROCESSOR_PPCE500MC64:
4129 rs6000_cost = &ppce500mc64_cost;
4130 break;
4132 case PROCESSOR_PPCE5500:
4133 rs6000_cost = &ppce5500_cost;
4134 break;
4136 case PROCESSOR_PPCE6500:
4137 rs6000_cost = &ppce6500_cost;
4138 break;
4140 case PROCESSOR_TITAN:
4141 rs6000_cost = &titan_cost;
4142 break;
4144 case PROCESSOR_POWER4:
4145 case PROCESSOR_POWER5:
4146 rs6000_cost = &power4_cost;
4147 break;
4149 case PROCESSOR_POWER6:
4150 rs6000_cost = &power6_cost;
4151 break;
4153 case PROCESSOR_POWER7:
4154 rs6000_cost = &power7_cost;
4155 break;
4157 case PROCESSOR_POWER8:
4158 rs6000_cost = &power8_cost;
4159 break;
4161 case PROCESSOR_PPCA2:
4162 rs6000_cost = &ppca2_cost;
4163 break;
4165 default:
4166 gcc_unreachable ();
4169 if (global_init_p)
4171 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4172 rs6000_cost->simultaneous_prefetches,
4173 global_options.x_param_values,
4174 global_options_set.x_param_values);
4175 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4176 global_options.x_param_values,
4177 global_options_set.x_param_values);
4178 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4179 rs6000_cost->cache_line_size,
4180 global_options.x_param_values,
4181 global_options_set.x_param_values);
4182 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4183 global_options.x_param_values,
4184 global_options_set.x_param_values);
4186 /* Increase loop peeling limits based on performance analysis. */
4187 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4188 global_options.x_param_values,
4189 global_options_set.x_param_values);
4190 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4191 global_options.x_param_values,
4192 global_options_set.x_param_values);
4194 /* If using typedef char *va_list, signal that
4195 __builtin_va_start (&ap, 0) can be optimized to
4196 ap = __builtin_next_arg (0). */
4197 if (DEFAULT_ABI != ABI_V4)
4198 targetm.expand_builtin_va_start = NULL;
4201 /* Set up single/double float flags.
4202 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4203 then set both flags. */
4204 if (TARGET_HARD_FLOAT && TARGET_FPRS
4205 && rs6000_single_float == 0 && rs6000_double_float == 0)
4206 rs6000_single_float = rs6000_double_float = 1;
4208 /* If not explicitly specified via option, decide whether to generate indexed
4209 load/store instructions. */
4210 if (TARGET_AVOID_XFORM == -1)
4211 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4212 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4213 need indexed accesses and the type used is the scalar type of the element
4214 being loaded or stored. */
4215 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4216 && !TARGET_ALTIVEC);
4218 /* Set the -mrecip options. */
4219 if (rs6000_recip_name)
4221 char *p = ASTRDUP (rs6000_recip_name);
4222 char *q;
4223 unsigned int mask, i;
4224 bool invert;
4226 while ((q = strtok (p, ",")) != NULL)
4228 p = NULL;
4229 if (*q == '!')
4231 invert = true;
4232 q++;
4234 else
4235 invert = false;
4237 if (!strcmp (q, "default"))
4238 mask = ((TARGET_RECIP_PRECISION)
4239 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4240 else
4242 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4243 if (!strcmp (q, recip_options[i].string))
4245 mask = recip_options[i].mask;
4246 break;
4249 if (i == ARRAY_SIZE (recip_options))
4251 error ("unknown option for -mrecip=%s", q);
4252 invert = false;
4253 mask = 0;
4254 ret = false;
4258 if (invert)
4259 rs6000_recip_control &= ~mask;
4260 else
4261 rs6000_recip_control |= mask;
4265 /* Determine when unaligned vector accesses are permitted, and when
4266 they are preferred over masked Altivec loads. Note that if
4267 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
4268 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
4269 not true. */
4270 if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) {
4271 if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8
4272 && TARGET_ALLOW_MOVMISALIGN != 0)
4273 TARGET_EFFICIENT_UNALIGNED_VSX = 1;
4274 else
4275 TARGET_EFFICIENT_UNALIGNED_VSX = 0;
4278 if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8)
4279 TARGET_ALLOW_MOVMISALIGN = 1;
4281 /* Set the builtin mask of the various options used that could affect which
4282 builtins were used. In the past we used target_flags, but we've run out
4283 of bits, and some options like SPE and PAIRED are no longer in
4284 target_flags. */
4285 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4286 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4287 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4288 rs6000_builtin_mask);
4290 /* Initialize all of the registers. */
4291 rs6000_init_hard_regno_mode_ok (global_init_p);
4293 /* Save the initial options in case the user does function specific options */
4294 if (global_init_p)
4295 target_option_default_node = target_option_current_node
4296 = build_target_option_node (&global_options);
4298 /* If not explicitly specified via option, decide whether to generate the
4299 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4300 if (TARGET_LINK_STACK == -1)
4301 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4303 return ret;
4306 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4307 define the target cpu type. */
4309 static void
4310 rs6000_option_override (void)
4312 (void) rs6000_option_override_internal (true);
4314 /* Register machine-specific passes. This needs to be done at start-up.
4315 It's convenient to do it here (like i386 does). */
4316 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4318 struct register_pass_info analyze_swaps_info
4319 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4321 register_pass (&analyze_swaps_info);
4325 /* Implement targetm.vectorize.builtin_mask_for_load. */
4326 static tree
4327 rs6000_builtin_mask_for_load (void)
4329 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4330 if ((TARGET_ALTIVEC && !TARGET_VSX)
4331 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4332 return altivec_builtin_mask_for_load;
4333 else
4334 return 0;
4337 /* Implement LOOP_ALIGN. */
4339 rs6000_loop_align (rtx label)
4341 basic_block bb;
4342 int ninsns;
4344 /* Don't override loop alignment if -falign-loops was specified. */
4345 if (!can_override_loop_align)
4346 return align_loops_log;
4348 bb = BLOCK_FOR_INSN (label);
4349 ninsns = num_loop_insns(bb->loop_father);
4351 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4352 if (ninsns > 4 && ninsns <= 8
4353 && (rs6000_cpu == PROCESSOR_POWER4
4354 || rs6000_cpu == PROCESSOR_POWER5
4355 || rs6000_cpu == PROCESSOR_POWER6
4356 || rs6000_cpu == PROCESSOR_POWER7
4357 || rs6000_cpu == PROCESSOR_POWER8))
4358 return 5;
4359 else
4360 return align_loops_log;
4363 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4364 static int
4365 rs6000_loop_align_max_skip (rtx_insn *label)
4367 return (1 << rs6000_loop_align (label)) - 1;
4370 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4371 after applying N number of iterations. This routine does not determine
4372 how may iterations are required to reach desired alignment. */
4374 static bool
4375 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4377 if (is_packed)
4378 return false;
4380 if (TARGET_32BIT)
4382 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4383 return true;
4385 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4386 return true;
4388 return false;
4390 else
4392 if (TARGET_MACHO)
4393 return false;
4395 /* Assuming that all other types are naturally aligned. CHECKME! */
4396 return true;
4400 /* Return true if the vector misalignment factor is supported by the
4401 target. */
4402 static bool
4403 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4404 const_tree type,
4405 int misalignment,
4406 bool is_packed)
4408 if (TARGET_VSX)
4410 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4411 return true;
4413 /* Return if movmisalign pattern is not supported for this mode. */
4414 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4415 return false;
4417 if (misalignment == -1)
4419 /* Misalignment factor is unknown at compile time but we know
4420 it's word aligned. */
4421 if (rs6000_vector_alignment_reachable (type, is_packed))
4423 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4425 if (element_size == 64 || element_size == 32)
4426 return true;
4429 return false;
4432 /* VSX supports word-aligned vector. */
4433 if (misalignment % 4 == 0)
4434 return true;
4436 return false;
4439 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4440 static int
4441 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4442 tree vectype, int misalign)
4444 unsigned elements;
4445 tree elem_type;
4447 switch (type_of_cost)
4449 case scalar_stmt:
4450 case scalar_load:
4451 case scalar_store:
4452 case vector_stmt:
4453 case vector_load:
4454 case vector_store:
4455 case vec_to_scalar:
4456 case scalar_to_vec:
4457 case cond_branch_not_taken:
4458 return 1;
4460 case vec_perm:
4461 if (TARGET_VSX)
4462 return 3;
4463 else
4464 return 1;
4466 case vec_promote_demote:
4467 if (TARGET_VSX)
4468 return 4;
4469 else
4470 return 1;
4472 case cond_branch_taken:
4473 return 3;
4475 case unaligned_load:
4476 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4477 return 1;
4479 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4481 elements = TYPE_VECTOR_SUBPARTS (vectype);
4482 if (elements == 2)
4483 /* Double word aligned. */
4484 return 2;
4486 if (elements == 4)
4488 switch (misalign)
4490 case 8:
4491 /* Double word aligned. */
4492 return 2;
4494 case -1:
4495 /* Unknown misalignment. */
4496 case 4:
4497 case 12:
4498 /* Word aligned. */
4499 return 22;
4501 default:
4502 gcc_unreachable ();
4507 if (TARGET_ALTIVEC)
4508 /* Misaligned loads are not supported. */
4509 gcc_unreachable ();
4511 return 2;
4513 case unaligned_store:
4514 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4515 return 1;
4517 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4519 elements = TYPE_VECTOR_SUBPARTS (vectype);
4520 if (elements == 2)
4521 /* Double word aligned. */
4522 return 2;
4524 if (elements == 4)
4526 switch (misalign)
4528 case 8:
4529 /* Double word aligned. */
4530 return 2;
4532 case -1:
4533 /* Unknown misalignment. */
4534 case 4:
4535 case 12:
4536 /* Word aligned. */
4537 return 23;
4539 default:
4540 gcc_unreachable ();
4545 if (TARGET_ALTIVEC)
4546 /* Misaligned stores are not supported. */
4547 gcc_unreachable ();
4549 return 2;
4551 case vec_construct:
4552 elements = TYPE_VECTOR_SUBPARTS (vectype);
4553 elem_type = TREE_TYPE (vectype);
4554 /* 32-bit vectors loaded into registers are stored as double
4555 precision, so we need n/2 converts in addition to the usual
4556 n/2 merges to construct a vector of short floats from them. */
4557 if (SCALAR_FLOAT_TYPE_P (elem_type)
4558 && TYPE_PRECISION (elem_type) == 32)
4559 return elements + 1;
4560 else
4561 return elements / 2 + 1;
4563 default:
4564 gcc_unreachable ();
4568 /* Implement targetm.vectorize.preferred_simd_mode. */
4570 static machine_mode
4571 rs6000_preferred_simd_mode (machine_mode mode)
4573 if (TARGET_VSX)
4574 switch (mode)
4576 case DFmode:
4577 return V2DFmode;
4578 default:;
4580 if (TARGET_ALTIVEC || TARGET_VSX)
4581 switch (mode)
4583 case SFmode:
4584 return V4SFmode;
4585 case TImode:
4586 return V1TImode;
4587 case DImode:
4588 return V2DImode;
4589 case SImode:
4590 return V4SImode;
4591 case HImode:
4592 return V8HImode;
4593 case QImode:
4594 return V16QImode;
4595 default:;
4597 if (TARGET_SPE)
4598 switch (mode)
4600 case SFmode:
4601 return V2SFmode;
4602 case SImode:
4603 return V2SImode;
4604 default:;
4606 if (TARGET_PAIRED_FLOAT
4607 && mode == SFmode)
4608 return V2SFmode;
4609 return word_mode;
4612 typedef struct _rs6000_cost_data
4614 struct loop *loop_info;
4615 unsigned cost[3];
4616 } rs6000_cost_data;
4618 /* Test for likely overcommitment of vector hardware resources. If a
4619 loop iteration is relatively large, and too large a percentage of
4620 instructions in the loop are vectorized, the cost model may not
4621 adequately reflect delays from unavailable vector resources.
4622 Penalize the loop body cost for this case. */
4624 static void
4625 rs6000_density_test (rs6000_cost_data *data)
4627 const int DENSITY_PCT_THRESHOLD = 85;
4628 const int DENSITY_SIZE_THRESHOLD = 70;
4629 const int DENSITY_PENALTY = 10;
4630 struct loop *loop = data->loop_info;
4631 basic_block *bbs = get_loop_body (loop);
4632 int nbbs = loop->num_nodes;
4633 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4634 int i, density_pct;
4636 for (i = 0; i < nbbs; i++)
4638 basic_block bb = bbs[i];
4639 gimple_stmt_iterator gsi;
4641 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4643 gimple stmt = gsi_stmt (gsi);
4644 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4646 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4647 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4648 not_vec_cost++;
4652 free (bbs);
4653 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4655 if (density_pct > DENSITY_PCT_THRESHOLD
4656 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4658 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4659 if (dump_enabled_p ())
4660 dump_printf_loc (MSG_NOTE, vect_location,
4661 "density %d%%, cost %d exceeds threshold, penalizing "
4662 "loop body cost by %d%%", density_pct,
4663 vec_cost + not_vec_cost, DENSITY_PENALTY);
4667 /* Implement targetm.vectorize.init_cost. */
4669 static void *
4670 rs6000_init_cost (struct loop *loop_info)
4672 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4673 data->loop_info = loop_info;
4674 data->cost[vect_prologue] = 0;
4675 data->cost[vect_body] = 0;
4676 data->cost[vect_epilogue] = 0;
4677 return data;
4680 /* Implement targetm.vectorize.add_stmt_cost. */
4682 static unsigned
4683 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4684 struct _stmt_vec_info *stmt_info, int misalign,
4685 enum vect_cost_model_location where)
4687 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4688 unsigned retval = 0;
4690 if (flag_vect_cost_model)
4692 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4693 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4694 misalign);
4695 /* Statements in an inner loop relative to the loop being
4696 vectorized are weighted more heavily. The value here is
4697 arbitrary and could potentially be improved with analysis. */
4698 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4699 count *= 50; /* FIXME. */
4701 retval = (unsigned) (count * stmt_cost);
4702 cost_data->cost[where] += retval;
4705 return retval;
4708 /* Implement targetm.vectorize.finish_cost. */
4710 static void
4711 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4712 unsigned *body_cost, unsigned *epilogue_cost)
4714 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4716 if (cost_data->loop_info)
4717 rs6000_density_test (cost_data);
4719 *prologue_cost = cost_data->cost[vect_prologue];
4720 *body_cost = cost_data->cost[vect_body];
4721 *epilogue_cost = cost_data->cost[vect_epilogue];
4724 /* Implement targetm.vectorize.destroy_cost_data. */
4726 static void
4727 rs6000_destroy_cost_data (void *data)
4729 free (data);
4732 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4733 library with vectorized intrinsics. */
4735 static tree
4736 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4738 char name[32];
4739 const char *suffix = NULL;
4740 tree fntype, new_fndecl, bdecl = NULL_TREE;
4741 int n_args = 1;
4742 const char *bname;
4743 machine_mode el_mode, in_mode;
4744 int n, in_n;
4746 /* Libmass is suitable for unsafe math only as it does not correctly support
4747 parts of IEEE with the required precision such as denormals. Only support
4748 it if we have VSX to use the simd d2 or f4 functions.
4749 XXX: Add variable length support. */
4750 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4751 return NULL_TREE;
4753 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4754 n = TYPE_VECTOR_SUBPARTS (type_out);
4755 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4756 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4757 if (el_mode != in_mode
4758 || n != in_n)
4759 return NULL_TREE;
4761 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4763 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4764 switch (fn)
4766 case BUILT_IN_ATAN2:
4767 case BUILT_IN_HYPOT:
4768 case BUILT_IN_POW:
4769 n_args = 2;
4770 /* fall through */
4772 case BUILT_IN_ACOS:
4773 case BUILT_IN_ACOSH:
4774 case BUILT_IN_ASIN:
4775 case BUILT_IN_ASINH:
4776 case BUILT_IN_ATAN:
4777 case BUILT_IN_ATANH:
4778 case BUILT_IN_CBRT:
4779 case BUILT_IN_COS:
4780 case BUILT_IN_COSH:
4781 case BUILT_IN_ERF:
4782 case BUILT_IN_ERFC:
4783 case BUILT_IN_EXP2:
4784 case BUILT_IN_EXP:
4785 case BUILT_IN_EXPM1:
4786 case BUILT_IN_LGAMMA:
4787 case BUILT_IN_LOG10:
4788 case BUILT_IN_LOG1P:
4789 case BUILT_IN_LOG2:
4790 case BUILT_IN_LOG:
4791 case BUILT_IN_SIN:
4792 case BUILT_IN_SINH:
4793 case BUILT_IN_SQRT:
4794 case BUILT_IN_TAN:
4795 case BUILT_IN_TANH:
4796 bdecl = builtin_decl_implicit (fn);
4797 suffix = "d2"; /* pow -> powd2 */
4798 if (el_mode != DFmode
4799 || n != 2
4800 || !bdecl)
4801 return NULL_TREE;
4802 break;
4804 case BUILT_IN_ATAN2F:
4805 case BUILT_IN_HYPOTF:
4806 case BUILT_IN_POWF:
4807 n_args = 2;
4808 /* fall through */
4810 case BUILT_IN_ACOSF:
4811 case BUILT_IN_ACOSHF:
4812 case BUILT_IN_ASINF:
4813 case BUILT_IN_ASINHF:
4814 case BUILT_IN_ATANF:
4815 case BUILT_IN_ATANHF:
4816 case BUILT_IN_CBRTF:
4817 case BUILT_IN_COSF:
4818 case BUILT_IN_COSHF:
4819 case BUILT_IN_ERFF:
4820 case BUILT_IN_ERFCF:
4821 case BUILT_IN_EXP2F:
4822 case BUILT_IN_EXPF:
4823 case BUILT_IN_EXPM1F:
4824 case BUILT_IN_LGAMMAF:
4825 case BUILT_IN_LOG10F:
4826 case BUILT_IN_LOG1PF:
4827 case BUILT_IN_LOG2F:
4828 case BUILT_IN_LOGF:
4829 case BUILT_IN_SINF:
4830 case BUILT_IN_SINHF:
4831 case BUILT_IN_SQRTF:
4832 case BUILT_IN_TANF:
4833 case BUILT_IN_TANHF:
4834 bdecl = builtin_decl_implicit (fn);
4835 suffix = "4"; /* powf -> powf4 */
4836 if (el_mode != SFmode
4837 || n != 4
4838 || !bdecl)
4839 return NULL_TREE;
4840 break;
4842 default:
4843 return NULL_TREE;
4846 else
4847 return NULL_TREE;
4849 gcc_assert (suffix != NULL);
4850 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4851 if (!bname)
4852 return NULL_TREE;
4854 strcpy (name, bname + sizeof ("__builtin_") - 1);
4855 strcat (name, suffix);
4857 if (n_args == 1)
4858 fntype = build_function_type_list (type_out, type_in, NULL);
4859 else if (n_args == 2)
4860 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4861 else
4862 gcc_unreachable ();
4864 /* Build a function declaration for the vectorized function. */
4865 new_fndecl = build_decl (BUILTINS_LOCATION,
4866 FUNCTION_DECL, get_identifier (name), fntype);
4867 TREE_PUBLIC (new_fndecl) = 1;
4868 DECL_EXTERNAL (new_fndecl) = 1;
4869 DECL_IS_NOVOPS (new_fndecl) = 1;
4870 TREE_READONLY (new_fndecl) = 1;
4872 return new_fndecl;
4875 /* Returns a function decl for a vectorized version of the builtin function
4876 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4877 if it is not available. */
4879 static tree
4880 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4881 tree type_in)
4883 machine_mode in_mode, out_mode;
4884 int in_n, out_n;
4886 if (TARGET_DEBUG_BUILTIN)
4887 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4888 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4889 GET_MODE_NAME (TYPE_MODE (type_out)),
4890 GET_MODE_NAME (TYPE_MODE (type_in)));
4892 if (TREE_CODE (type_out) != VECTOR_TYPE
4893 || TREE_CODE (type_in) != VECTOR_TYPE
4894 || !TARGET_VECTORIZE_BUILTINS)
4895 return NULL_TREE;
4897 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4898 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4899 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4900 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4902 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4904 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4905 switch (fn)
4907 case BUILT_IN_CLZIMAX:
4908 case BUILT_IN_CLZLL:
4909 case BUILT_IN_CLZL:
4910 case BUILT_IN_CLZ:
4911 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4913 if (out_mode == QImode && out_n == 16)
4914 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4915 else if (out_mode == HImode && out_n == 8)
4916 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4917 else if (out_mode == SImode && out_n == 4)
4918 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4919 else if (out_mode == DImode && out_n == 2)
4920 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4922 break;
4923 case BUILT_IN_COPYSIGN:
4924 if (VECTOR_UNIT_VSX_P (V2DFmode)
4925 && out_mode == DFmode && out_n == 2
4926 && in_mode == DFmode && in_n == 2)
4927 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4928 break;
4929 case BUILT_IN_COPYSIGNF:
4930 if (out_mode != SFmode || out_n != 4
4931 || in_mode != SFmode || in_n != 4)
4932 break;
4933 if (VECTOR_UNIT_VSX_P (V4SFmode))
4934 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4935 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4936 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4937 break;
4938 case BUILT_IN_POPCOUNTIMAX:
4939 case BUILT_IN_POPCOUNTLL:
4940 case BUILT_IN_POPCOUNTL:
4941 case BUILT_IN_POPCOUNT:
4942 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4944 if (out_mode == QImode && out_n == 16)
4945 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4946 else if (out_mode == HImode && out_n == 8)
4947 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4948 else if (out_mode == SImode && out_n == 4)
4949 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4950 else if (out_mode == DImode && out_n == 2)
4951 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4953 break;
4954 case BUILT_IN_SQRT:
4955 if (VECTOR_UNIT_VSX_P (V2DFmode)
4956 && out_mode == DFmode && out_n == 2
4957 && in_mode == DFmode && in_n == 2)
4958 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4959 break;
4960 case BUILT_IN_SQRTF:
4961 if (VECTOR_UNIT_VSX_P (V4SFmode)
4962 && out_mode == SFmode && out_n == 4
4963 && in_mode == SFmode && in_n == 4)
4964 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4965 break;
4966 case BUILT_IN_CEIL:
4967 if (VECTOR_UNIT_VSX_P (V2DFmode)
4968 && out_mode == DFmode && out_n == 2
4969 && in_mode == DFmode && in_n == 2)
4970 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4971 break;
4972 case BUILT_IN_CEILF:
4973 if (out_mode != SFmode || out_n != 4
4974 || in_mode != SFmode || in_n != 4)
4975 break;
4976 if (VECTOR_UNIT_VSX_P (V4SFmode))
4977 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4978 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4979 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4980 break;
4981 case BUILT_IN_FLOOR:
4982 if (VECTOR_UNIT_VSX_P (V2DFmode)
4983 && out_mode == DFmode && out_n == 2
4984 && in_mode == DFmode && in_n == 2)
4985 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4986 break;
4987 case BUILT_IN_FLOORF:
4988 if (out_mode != SFmode || out_n != 4
4989 || in_mode != SFmode || in_n != 4)
4990 break;
4991 if (VECTOR_UNIT_VSX_P (V4SFmode))
4992 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4993 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4994 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4995 break;
4996 case BUILT_IN_FMA:
4997 if (VECTOR_UNIT_VSX_P (V2DFmode)
4998 && out_mode == DFmode && out_n == 2
4999 && in_mode == DFmode && in_n == 2)
5000 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5001 break;
5002 case BUILT_IN_FMAF:
5003 if (VECTOR_UNIT_VSX_P (V4SFmode)
5004 && out_mode == SFmode && out_n == 4
5005 && in_mode == SFmode && in_n == 4)
5006 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5007 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5008 && out_mode == SFmode && out_n == 4
5009 && in_mode == SFmode && in_n == 4)
5010 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5011 break;
5012 case BUILT_IN_TRUNC:
5013 if (VECTOR_UNIT_VSX_P (V2DFmode)
5014 && out_mode == DFmode && out_n == 2
5015 && in_mode == DFmode && in_n == 2)
5016 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5017 break;
5018 case BUILT_IN_TRUNCF:
5019 if (out_mode != SFmode || out_n != 4
5020 || in_mode != SFmode || in_n != 4)
5021 break;
5022 if (VECTOR_UNIT_VSX_P (V4SFmode))
5023 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5024 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5025 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5026 break;
5027 case BUILT_IN_NEARBYINT:
5028 if (VECTOR_UNIT_VSX_P (V2DFmode)
5029 && flag_unsafe_math_optimizations
5030 && out_mode == DFmode && out_n == 2
5031 && in_mode == DFmode && in_n == 2)
5032 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5033 break;
5034 case BUILT_IN_NEARBYINTF:
5035 if (VECTOR_UNIT_VSX_P (V4SFmode)
5036 && flag_unsafe_math_optimizations
5037 && out_mode == SFmode && out_n == 4
5038 && in_mode == SFmode && in_n == 4)
5039 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5040 break;
5041 case BUILT_IN_RINT:
5042 if (VECTOR_UNIT_VSX_P (V2DFmode)
5043 && !flag_trapping_math
5044 && out_mode == DFmode && out_n == 2
5045 && in_mode == DFmode && in_n == 2)
5046 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5047 break;
5048 case BUILT_IN_RINTF:
5049 if (VECTOR_UNIT_VSX_P (V4SFmode)
5050 && !flag_trapping_math
5051 && out_mode == SFmode && out_n == 4
5052 && in_mode == SFmode && in_n == 4)
5053 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5054 break;
5055 default:
5056 break;
5060 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
5062 enum rs6000_builtins fn
5063 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
5064 switch (fn)
5066 case RS6000_BUILTIN_RSQRTF:
5067 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5068 && out_mode == SFmode && out_n == 4
5069 && in_mode == SFmode && in_n == 4)
5070 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5071 break;
5072 case RS6000_BUILTIN_RSQRT:
5073 if (VECTOR_UNIT_VSX_P (V2DFmode)
5074 && out_mode == DFmode && out_n == 2
5075 && in_mode == DFmode && in_n == 2)
5076 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5077 break;
5078 case RS6000_BUILTIN_RECIPF:
5079 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5080 && out_mode == SFmode && out_n == 4
5081 && in_mode == SFmode && in_n == 4)
5082 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5083 break;
5084 case RS6000_BUILTIN_RECIP:
5085 if (VECTOR_UNIT_VSX_P (V2DFmode)
5086 && out_mode == DFmode && out_n == 2
5087 && in_mode == DFmode && in_n == 2)
5088 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5089 break;
5090 default:
5091 break;
5095 /* Generate calls to libmass if appropriate. */
5096 if (rs6000_veclib_handler)
5097 return rs6000_veclib_handler (fndecl, type_out, type_in);
5099 return NULL_TREE;
5102 /* Default CPU string for rs6000*_file_start functions. */
5103 static const char *rs6000_default_cpu;
5105 /* Do anything needed at the start of the asm file. */
5107 static void
5108 rs6000_file_start (void)
5110 char buffer[80];
5111 const char *start = buffer;
5112 FILE *file = asm_out_file;
5114 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5116 default_file_start ();
5118 if (flag_verbose_asm)
5120 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5122 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5124 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5125 start = "";
5128 if (global_options_set.x_rs6000_cpu_index)
5130 fprintf (file, "%s -mcpu=%s", start,
5131 processor_target_table[rs6000_cpu_index].name);
5132 start = "";
5135 if (global_options_set.x_rs6000_tune_index)
5137 fprintf (file, "%s -mtune=%s", start,
5138 processor_target_table[rs6000_tune_index].name);
5139 start = "";
5142 if (PPC405_ERRATUM77)
5144 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5145 start = "";
5148 #ifdef USING_ELFOS_H
5149 switch (rs6000_sdata)
5151 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5152 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5153 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5154 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5157 if (rs6000_sdata && g_switch_value)
5159 fprintf (file, "%s -G %d", start,
5160 g_switch_value);
5161 start = "";
5163 #endif
5165 if (*start == '\0')
5166 putc ('\n', file);
5169 #ifdef USING_ELFOS_H
5170 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5171 || !global_options_set.x_rs6000_cpu_index)
5173 fputs ("\t.machine ", asm_out_file);
5174 if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5175 fputs ("power8\n", asm_out_file);
5176 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5177 fputs ("power7\n", asm_out_file);
5178 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5179 fputs ("power6\n", asm_out_file);
5180 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5181 fputs ("power5\n", asm_out_file);
5182 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5183 fputs ("power4\n", asm_out_file);
5184 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5185 fputs ("ppc64\n", asm_out_file);
5186 else
5187 fputs ("ppc\n", asm_out_file);
5189 #endif
5191 if (DEFAULT_ABI == ABI_ELFv2)
5192 fprintf (file, "\t.abiversion 2\n");
5194 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5195 || (TARGET_ELF && flag_pic == 2))
5197 switch_to_section (toc_section);
5198 switch_to_section (text_section);
5203 /* Return nonzero if this function is known to have a null epilogue. */
5206 direct_return (void)
5208 if (reload_completed)
5210 rs6000_stack_t *info = rs6000_stack_info ();
5212 if (info->first_gp_reg_save == 32
5213 && info->first_fp_reg_save == 64
5214 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5215 && ! info->lr_save_p
5216 && ! info->cr_save_p
5217 && info->vrsave_size == 0
5218 && ! info->push_p)
5219 return 1;
5222 return 0;
5225 /* Return the number of instructions it takes to form a constant in an
5226 integer register. */
5229 num_insns_constant_wide (HOST_WIDE_INT value)
5231 /* signed constant loadable with addi */
5232 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5233 return 1;
5235 /* constant loadable with addis */
5236 else if ((value & 0xffff) == 0
5237 && (value >> 31 == -1 || value >> 31 == 0))
5238 return 1;
5240 else if (TARGET_POWERPC64)
5242 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5243 HOST_WIDE_INT high = value >> 31;
5245 if (high == 0 || high == -1)
5246 return 2;
5248 high >>= 1;
5250 if (low == 0)
5251 return num_insns_constant_wide (high) + 1;
5252 else if (high == 0)
5253 return num_insns_constant_wide (low) + 1;
5254 else
5255 return (num_insns_constant_wide (high)
5256 + num_insns_constant_wide (low) + 1);
5259 else
5260 return 2;
5264 num_insns_constant (rtx op, machine_mode mode)
5266 HOST_WIDE_INT low, high;
5268 switch (GET_CODE (op))
5270 case CONST_INT:
5271 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5272 && mask64_operand (op, mode))
5273 return 2;
5274 else
5275 return num_insns_constant_wide (INTVAL (op));
5277 case CONST_WIDE_INT:
5279 int i;
5280 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5281 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5282 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5283 return ins;
5286 case CONST_DOUBLE:
5287 if (mode == SFmode || mode == SDmode)
5289 long l;
5290 REAL_VALUE_TYPE rv;
5292 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5293 if (DECIMAL_FLOAT_MODE_P (mode))
5294 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5295 else
5296 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5297 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5300 long l[2];
5301 REAL_VALUE_TYPE rv;
5303 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5304 if (DECIMAL_FLOAT_MODE_P (mode))
5305 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5306 else
5307 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5308 high = l[WORDS_BIG_ENDIAN == 0];
5309 low = l[WORDS_BIG_ENDIAN != 0];
5311 if (TARGET_32BIT)
5312 return (num_insns_constant_wide (low)
5313 + num_insns_constant_wide (high));
5314 else
5316 if ((high == 0 && low >= 0)
5317 || (high == -1 && low < 0))
5318 return num_insns_constant_wide (low);
5320 else if (mask64_operand (op, mode))
5321 return 2;
5323 else if (low == 0)
5324 return num_insns_constant_wide (high) + 1;
5326 else
5327 return (num_insns_constant_wide (high)
5328 + num_insns_constant_wide (low) + 1);
5331 default:
5332 gcc_unreachable ();
5336 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5337 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5338 corresponding element of the vector, but for V4SFmode and V2SFmode,
5339 the corresponding "float" is interpreted as an SImode integer. */
5341 HOST_WIDE_INT
5342 const_vector_elt_as_int (rtx op, unsigned int elt)
5344 rtx tmp;
5346 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5347 gcc_assert (GET_MODE (op) != V2DImode
5348 && GET_MODE (op) != V2DFmode);
5350 tmp = CONST_VECTOR_ELT (op, elt);
5351 if (GET_MODE (op) == V4SFmode
5352 || GET_MODE (op) == V2SFmode)
5353 tmp = gen_lowpart (SImode, tmp);
5354 return INTVAL (tmp);
5357 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5358 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5359 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5360 all items are set to the same value and contain COPIES replicas of the
5361 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5362 operand and the others are set to the value of the operand's msb. */
5364 static bool
5365 vspltis_constant (rtx op, unsigned step, unsigned copies)
5367 machine_mode mode = GET_MODE (op);
5368 machine_mode inner = GET_MODE_INNER (mode);
5370 unsigned i;
5371 unsigned nunits;
5372 unsigned bitsize;
5373 unsigned mask;
5375 HOST_WIDE_INT val;
5376 HOST_WIDE_INT splat_val;
5377 HOST_WIDE_INT msb_val;
5379 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5380 return false;
5382 nunits = GET_MODE_NUNITS (mode);
5383 bitsize = GET_MODE_BITSIZE (inner);
5384 mask = GET_MODE_MASK (inner);
5386 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5387 splat_val = val;
5388 msb_val = val >= 0 ? 0 : -1;
5390 /* Construct the value to be splatted, if possible. If not, return 0. */
5391 for (i = 2; i <= copies; i *= 2)
5393 HOST_WIDE_INT small_val;
5394 bitsize /= 2;
5395 small_val = splat_val >> bitsize;
5396 mask >>= bitsize;
5397 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5398 return false;
5399 splat_val = small_val;
5402 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5403 if (EASY_VECTOR_15 (splat_val))
5406 /* Also check if we can splat, and then add the result to itself. Do so if
5407 the value is positive, of if the splat instruction is using OP's mode;
5408 for splat_val < 0, the splat and the add should use the same mode. */
5409 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5410 && (splat_val >= 0 || (step == 1 && copies == 1)))
5413 /* Also check if are loading up the most significant bit which can be done by
5414 loading up -1 and shifting the value left by -1. */
5415 else if (EASY_VECTOR_MSB (splat_val, inner))
5418 else
5419 return false;
5421 /* Check if VAL is present in every STEP-th element, and the
5422 other elements are filled with its most significant bit. */
5423 for (i = 1; i < nunits; ++i)
5425 HOST_WIDE_INT desired_val;
5426 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5427 if ((i & (step - 1)) == 0)
5428 desired_val = val;
5429 else
5430 desired_val = msb_val;
5432 if (desired_val != const_vector_elt_as_int (op, elt))
5433 return false;
5436 return true;
5440 /* Return true if OP is of the given MODE and can be synthesized
5441 with a vspltisb, vspltish or vspltisw. */
5443 bool
5444 easy_altivec_constant (rtx op, machine_mode mode)
5446 unsigned step, copies;
5448 if (mode == VOIDmode)
5449 mode = GET_MODE (op);
5450 else if (mode != GET_MODE (op))
5451 return false;
5453 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5454 constants. */
5455 if (mode == V2DFmode)
5456 return zero_constant (op, mode);
5458 else if (mode == V2DImode)
5460 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5461 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5462 return false;
5464 if (zero_constant (op, mode))
5465 return true;
5467 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5468 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5469 return true;
5471 return false;
5474 /* V1TImode is a special container for TImode. Ignore for now. */
5475 else if (mode == V1TImode)
5476 return false;
5478 /* Start with a vspltisw. */
5479 step = GET_MODE_NUNITS (mode) / 4;
5480 copies = 1;
5482 if (vspltis_constant (op, step, copies))
5483 return true;
5485 /* Then try with a vspltish. */
5486 if (step == 1)
5487 copies <<= 1;
5488 else
5489 step >>= 1;
5491 if (vspltis_constant (op, step, copies))
5492 return true;
5494 /* And finally a vspltisb. */
5495 if (step == 1)
5496 copies <<= 1;
5497 else
5498 step >>= 1;
5500 if (vspltis_constant (op, step, copies))
5501 return true;
5503 return false;
5506 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5507 result is OP. Abort if it is not possible. */
5510 gen_easy_altivec_constant (rtx op)
5512 machine_mode mode = GET_MODE (op);
5513 int nunits = GET_MODE_NUNITS (mode);
5514 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5515 unsigned step = nunits / 4;
5516 unsigned copies = 1;
5518 /* Start with a vspltisw. */
5519 if (vspltis_constant (op, step, copies))
5520 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5522 /* Then try with a vspltish. */
5523 if (step == 1)
5524 copies <<= 1;
5525 else
5526 step >>= 1;
5528 if (vspltis_constant (op, step, copies))
5529 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5531 /* And finally a vspltisb. */
5532 if (step == 1)
5533 copies <<= 1;
5534 else
5535 step >>= 1;
5537 if (vspltis_constant (op, step, copies))
5538 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5540 gcc_unreachable ();
5543 const char *
5544 output_vec_const_move (rtx *operands)
5546 int cst, cst2;
5547 machine_mode mode;
5548 rtx dest, vec;
5550 dest = operands[0];
5551 vec = operands[1];
5552 mode = GET_MODE (dest);
5554 if (TARGET_VSX)
5556 if (zero_constant (vec, mode))
5557 return "xxlxor %x0,%x0,%x0";
5559 if ((mode == V2DImode || mode == V1TImode)
5560 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5561 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5562 return "vspltisw %0,-1";
5565 if (TARGET_ALTIVEC)
5567 rtx splat_vec;
5568 if (zero_constant (vec, mode))
5569 return "vxor %0,%0,%0";
5571 splat_vec = gen_easy_altivec_constant (vec);
5572 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5573 operands[1] = XEXP (splat_vec, 0);
5574 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5575 return "#";
5577 switch (GET_MODE (splat_vec))
5579 case V4SImode:
5580 return "vspltisw %0,%1";
5582 case V8HImode:
5583 return "vspltish %0,%1";
5585 case V16QImode:
5586 return "vspltisb %0,%1";
5588 default:
5589 gcc_unreachable ();
5593 gcc_assert (TARGET_SPE);
5595 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5596 pattern of V1DI, V4HI, and V2SF.
5598 FIXME: We should probably return # and add post reload
5599 splitters for these, but this way is so easy ;-). */
5600 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5601 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5602 operands[1] = CONST_VECTOR_ELT (vec, 0);
5603 operands[2] = CONST_VECTOR_ELT (vec, 1);
5604 if (cst == cst2)
5605 return "li %0,%1\n\tevmergelo %0,%0,%0";
5606 else if (WORDS_BIG_ENDIAN)
5607 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5608 else
5609 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5612 /* Initialize TARGET of vector PAIRED to VALS. */
5614 void
5615 paired_expand_vector_init (rtx target, rtx vals)
5617 machine_mode mode = GET_MODE (target);
5618 int n_elts = GET_MODE_NUNITS (mode);
5619 int n_var = 0;
5620 rtx x, new_rtx, tmp, constant_op, op1, op2;
5621 int i;
5623 for (i = 0; i < n_elts; ++i)
5625 x = XVECEXP (vals, 0, i);
5626 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5627 ++n_var;
5629 if (n_var == 0)
5631 /* Load from constant pool. */
5632 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5633 return;
5636 if (n_var == 2)
5638 /* The vector is initialized only with non-constants. */
5639 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5640 XVECEXP (vals, 0, 1));
5642 emit_move_insn (target, new_rtx);
5643 return;
5646 /* One field is non-constant and the other one is a constant. Load the
5647 constant from the constant pool and use ps_merge instruction to
5648 construct the whole vector. */
5649 op1 = XVECEXP (vals, 0, 0);
5650 op2 = XVECEXP (vals, 0, 1);
5652 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5654 tmp = gen_reg_rtx (GET_MODE (constant_op));
5655 emit_move_insn (tmp, constant_op);
5657 if (CONSTANT_P (op1))
5658 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5659 else
5660 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5662 emit_move_insn (target, new_rtx);
5665 void
5666 paired_expand_vector_move (rtx operands[])
5668 rtx op0 = operands[0], op1 = operands[1];
5670 emit_move_insn (op0, op1);
5673 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5674 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5675 operands for the relation operation COND. This is a recursive
5676 function. */
5678 static void
5679 paired_emit_vector_compare (enum rtx_code rcode,
5680 rtx dest, rtx op0, rtx op1,
5681 rtx cc_op0, rtx cc_op1)
5683 rtx tmp = gen_reg_rtx (V2SFmode);
5684 rtx tmp1, max, min;
5686 gcc_assert (TARGET_PAIRED_FLOAT);
5687 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5689 switch (rcode)
5691 case LT:
5692 case LTU:
5693 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5694 return;
5695 case GE:
5696 case GEU:
5697 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5698 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5699 return;
5700 case LE:
5701 case LEU:
5702 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5703 return;
5704 case GT:
5705 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5706 return;
5707 case EQ:
5708 tmp1 = gen_reg_rtx (V2SFmode);
5709 max = gen_reg_rtx (V2SFmode);
5710 min = gen_reg_rtx (V2SFmode);
5711 gen_reg_rtx (V2SFmode);
5713 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5714 emit_insn (gen_selv2sf4
5715 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5716 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5717 emit_insn (gen_selv2sf4
5718 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5719 emit_insn (gen_subv2sf3 (tmp1, min, max));
5720 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5721 return;
5722 case NE:
5723 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5724 return;
5725 case UNLE:
5726 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5727 return;
5728 case UNLT:
5729 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5730 return;
5731 case UNGE:
5732 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5733 return;
5734 case UNGT:
5735 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5736 return;
5737 default:
5738 gcc_unreachable ();
5741 return;
5744 /* Emit vector conditional expression.
5745 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5746 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5749 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5750 rtx cond, rtx cc_op0, rtx cc_op1)
5752 enum rtx_code rcode = GET_CODE (cond);
5754 if (!TARGET_PAIRED_FLOAT)
5755 return 0;
5757 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5759 return 1;
5762 /* Initialize vector TARGET to VALS. */
5764 void
5765 rs6000_expand_vector_init (rtx target, rtx vals)
5767 machine_mode mode = GET_MODE (target);
5768 machine_mode inner_mode = GET_MODE_INNER (mode);
5769 int n_elts = GET_MODE_NUNITS (mode);
5770 int n_var = 0, one_var = -1;
5771 bool all_same = true, all_const_zero = true;
5772 rtx x, mem;
5773 int i;
5775 for (i = 0; i < n_elts; ++i)
5777 x = XVECEXP (vals, 0, i);
5778 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5779 ++n_var, one_var = i;
5780 else if (x != CONST0_RTX (inner_mode))
5781 all_const_zero = false;
5783 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5784 all_same = false;
5787 if (n_var == 0)
5789 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5790 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5791 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5793 /* Zero register. */
5794 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
5795 return;
5797 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5799 /* Splat immediate. */
5800 emit_insn (gen_rtx_SET (target, const_vec));
5801 return;
5803 else
5805 /* Load from constant pool. */
5806 emit_move_insn (target, const_vec);
5807 return;
5811 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5812 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5814 rtx op0 = XVECEXP (vals, 0, 0);
5815 rtx op1 = XVECEXP (vals, 0, 1);
5816 if (all_same)
5818 if (!MEM_P (op0) && !REG_P (op0))
5819 op0 = force_reg (inner_mode, op0);
5820 if (mode == V2DFmode)
5821 emit_insn (gen_vsx_splat_v2df (target, op0));
5822 else
5823 emit_insn (gen_vsx_splat_v2di (target, op0));
5825 else
5827 op0 = force_reg (inner_mode, op0);
5828 op1 = force_reg (inner_mode, op1);
5829 if (mode == V2DFmode)
5830 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5831 else
5832 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5834 return;
5837 /* With single precision floating point on VSX, know that internally single
5838 precision is actually represented as a double, and either make 2 V2DF
5839 vectors, and convert these vectors to single precision, or do one
5840 conversion, and splat the result to the other elements. */
5841 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5843 if (all_same)
5845 rtx freg = gen_reg_rtx (V4SFmode);
5846 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5847 rtx cvt = ((TARGET_XSCVDPSPN)
5848 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5849 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5851 emit_insn (cvt);
5852 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5854 else
5856 rtx dbl_even = gen_reg_rtx (V2DFmode);
5857 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5858 rtx flt_even = gen_reg_rtx (V4SFmode);
5859 rtx flt_odd = gen_reg_rtx (V4SFmode);
5860 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5861 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5862 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5863 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5865 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5866 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5867 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5868 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5869 rs6000_expand_extract_even (target, flt_even, flt_odd);
5871 return;
5874 /* Store value to stack temp. Load vector element. Splat. However, splat
5875 of 64-bit items is not supported on Altivec. */
5876 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5878 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5879 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5880 XVECEXP (vals, 0, 0));
5881 x = gen_rtx_UNSPEC (VOIDmode,
5882 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5883 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5884 gen_rtvec (2,
5885 gen_rtx_SET (target, mem),
5886 x)));
5887 x = gen_rtx_VEC_SELECT (inner_mode, target,
5888 gen_rtx_PARALLEL (VOIDmode,
5889 gen_rtvec (1, const0_rtx)));
5890 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
5891 return;
5894 /* One field is non-constant. Load constant then overwrite
5895 varying field. */
5896 if (n_var == 1)
5898 rtx copy = copy_rtx (vals);
5900 /* Load constant part of vector, substitute neighboring value for
5901 varying element. */
5902 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5903 rs6000_expand_vector_init (target, copy);
5905 /* Insert variable. */
5906 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5907 return;
5910 /* Construct the vector in memory one field at a time
5911 and load the whole vector. */
5912 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5913 for (i = 0; i < n_elts; i++)
5914 emit_move_insn (adjust_address_nv (mem, inner_mode,
5915 i * GET_MODE_SIZE (inner_mode)),
5916 XVECEXP (vals, 0, i));
5917 emit_move_insn (target, mem);
5920 /* Set field ELT of TARGET to VAL. */
5922 void
5923 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5925 machine_mode mode = GET_MODE (target);
5926 machine_mode inner_mode = GET_MODE_INNER (mode);
5927 rtx reg = gen_reg_rtx (mode);
5928 rtx mask, mem, x;
5929 int width = GET_MODE_SIZE (inner_mode);
5930 int i;
5932 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5934 rtx (*set_func) (rtx, rtx, rtx, rtx)
5935 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5936 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5937 return;
5940 /* Simplify setting single element vectors like V1TImode. */
5941 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5943 emit_move_insn (target, gen_lowpart (mode, val));
5944 return;
5947 /* Load single variable value. */
5948 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5949 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5950 x = gen_rtx_UNSPEC (VOIDmode,
5951 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5952 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5953 gen_rtvec (2,
5954 gen_rtx_SET (reg, mem),
5955 x)));
5957 /* Linear sequence. */
5958 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5959 for (i = 0; i < 16; ++i)
5960 XVECEXP (mask, 0, i) = GEN_INT (i);
5962 /* Set permute mask to insert element into target. */
5963 for (i = 0; i < width; ++i)
5964 XVECEXP (mask, 0, elt*width + i)
5965 = GEN_INT (i + 0x10);
5966 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5968 if (BYTES_BIG_ENDIAN)
5969 x = gen_rtx_UNSPEC (mode,
5970 gen_rtvec (3, target, reg,
5971 force_reg (V16QImode, x)),
5972 UNSPEC_VPERM);
5973 else
5975 /* Invert selector. We prefer to generate VNAND on P8 so
5976 that future fusion opportunities can kick in, but must
5977 generate VNOR elsewhere. */
5978 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5979 rtx iorx = (TARGET_P8_VECTOR
5980 ? gen_rtx_IOR (V16QImode, notx, notx)
5981 : gen_rtx_AND (V16QImode, notx, notx));
5982 rtx tmp = gen_reg_rtx (V16QImode);
5983 emit_insn (gen_rtx_SET (tmp, iorx));
5985 /* Permute with operands reversed and adjusted selector. */
5986 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5987 UNSPEC_VPERM);
5990 emit_insn (gen_rtx_SET (target, x));
5993 /* Extract field ELT from VEC into TARGET. */
5995 void
5996 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5998 machine_mode mode = GET_MODE (vec);
5999 machine_mode inner_mode = GET_MODE_INNER (mode);
6000 rtx mem;
6002 if (VECTOR_MEM_VSX_P (mode))
6004 switch (mode)
6006 default:
6007 break;
6008 case V1TImode:
6009 gcc_assert (elt == 0 && inner_mode == TImode);
6010 emit_move_insn (target, gen_lowpart (TImode, vec));
6011 break;
6012 case V2DFmode:
6013 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6014 return;
6015 case V2DImode:
6016 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6017 return;
6018 case V4SFmode:
6019 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6020 return;
6024 /* Allocate mode-sized buffer. */
6025 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6027 emit_move_insn (mem, vec);
6029 /* Add offset to field within buffer matching vector element. */
6030 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6032 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6035 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
6036 implement ANDing by the mask IN. */
6037 void
6038 build_mask64_2_operands (rtx in, rtx *out)
6040 unsigned HOST_WIDE_INT c, lsb, m1, m2;
6041 int shift;
6043 gcc_assert (GET_CODE (in) == CONST_INT);
6045 c = INTVAL (in);
6046 if (c & 1)
6048 /* Assume c initially something like 0x00fff000000fffff. The idea
6049 is to rotate the word so that the middle ^^^^^^ group of zeros
6050 is at the MS end and can be cleared with an rldicl mask. We then
6051 rotate back and clear off the MS ^^ group of zeros with a
6052 second rldicl. */
6053 c = ~c; /* c == 0xff000ffffff00000 */
6054 lsb = c & -c; /* lsb == 0x0000000000100000 */
6055 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
6056 c = ~c; /* c == 0x00fff000000fffff */
6057 c &= -lsb; /* c == 0x00fff00000000000 */
6058 lsb = c & -c; /* lsb == 0x0000100000000000 */
6059 c = ~c; /* c == 0xff000fffffffffff */
6060 c &= -lsb; /* c == 0xff00000000000000 */
6061 shift = 0;
6062 while ((lsb >>= 1) != 0)
6063 shift++; /* shift == 44 on exit from loop */
6064 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
6065 m1 = ~m1; /* m1 == 0x000000ffffffffff */
6066 m2 = ~c; /* m2 == 0x00ffffffffffffff */
6068 else
6070 /* Assume c initially something like 0xff000f0000000000. The idea
6071 is to rotate the word so that the ^^^ middle group of zeros
6072 is at the LS end and can be cleared with an rldicr mask. We then
6073 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
6074 a second rldicr. */
6075 lsb = c & -c; /* lsb == 0x0000010000000000 */
6076 m2 = -lsb; /* m2 == 0xffffff0000000000 */
6077 c = ~c; /* c == 0x00fff0ffffffffff */
6078 c &= -lsb; /* c == 0x00fff00000000000 */
6079 lsb = c & -c; /* lsb == 0x0000100000000000 */
6080 c = ~c; /* c == 0xff000fffffffffff */
6081 c &= -lsb; /* c == 0xff00000000000000 */
6082 shift = 0;
6083 while ((lsb >>= 1) != 0)
6084 shift++; /* shift == 44 on exit from loop */
6085 m1 = ~c; /* m1 == 0x00ffffffffffffff */
6086 m1 >>= shift; /* m1 == 0x0000000000000fff */
6087 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
6090 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
6091 masks will be all 1's. We are guaranteed more than one transition. */
6092 out[0] = GEN_INT (64 - shift);
6093 out[1] = GEN_INT (m1);
6094 out[2] = GEN_INT (shift);
6095 out[3] = GEN_INT (m2);
6098 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6100 bool
6101 invalid_e500_subreg (rtx op, machine_mode mode)
6103 if (TARGET_E500_DOUBLE)
6105 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6106 subreg:TI and reg:TF. Decimal float modes are like integer
6107 modes (only low part of each register used) for this
6108 purpose. */
6109 if (GET_CODE (op) == SUBREG
6110 && (mode == SImode || mode == DImode || mode == TImode
6111 || mode == DDmode || mode == TDmode || mode == PTImode)
6112 && REG_P (SUBREG_REG (op))
6113 && (GET_MODE (SUBREG_REG (op)) == DFmode
6114 || GET_MODE (SUBREG_REG (op)) == TFmode))
6115 return true;
6117 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6118 reg:TI. */
6119 if (GET_CODE (op) == SUBREG
6120 && (mode == DFmode || mode == TFmode)
6121 && REG_P (SUBREG_REG (op))
6122 && (GET_MODE (SUBREG_REG (op)) == DImode
6123 || GET_MODE (SUBREG_REG (op)) == TImode
6124 || GET_MODE (SUBREG_REG (op)) == PTImode
6125 || GET_MODE (SUBREG_REG (op)) == DDmode
6126 || GET_MODE (SUBREG_REG (op)) == TDmode))
6127 return true;
6130 if (TARGET_SPE
6131 && GET_CODE (op) == SUBREG
6132 && mode == SImode
6133 && REG_P (SUBREG_REG (op))
6134 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6135 return true;
6137 return false;
6140 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6141 selects whether the alignment is abi mandated, optional, or
6142 both abi and optional alignment. */
6144 unsigned int
6145 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6147 if (how != align_opt)
6149 if (TREE_CODE (type) == VECTOR_TYPE)
6151 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6152 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6154 if (align < 64)
6155 align = 64;
6157 else if (align < 128)
6158 align = 128;
6160 else if (TARGET_E500_DOUBLE
6161 && TREE_CODE (type) == REAL_TYPE
6162 && TYPE_MODE (type) == DFmode)
6164 if (align < 64)
6165 align = 64;
6169 if (how != align_abi)
6171 if (TREE_CODE (type) == ARRAY_TYPE
6172 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6174 if (align < BITS_PER_WORD)
6175 align = BITS_PER_WORD;
6179 return align;
6182 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6184 bool
6185 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6187 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6189 if (computed != 128)
6191 static bool warned;
6192 if (!warned && warn_psabi)
6194 warned = true;
6195 inform (input_location,
6196 "the layout of aggregates containing vectors with"
6197 " %d-byte alignment has changed in GCC 5",
6198 computed / BITS_PER_UNIT);
6201 /* In current GCC there is no special case. */
6202 return false;
6205 return false;
6208 /* AIX increases natural record alignment to doubleword if the first
6209 field is an FP double while the FP fields remain word aligned. */
6211 unsigned int
6212 rs6000_special_round_type_align (tree type, unsigned int computed,
6213 unsigned int specified)
6215 unsigned int align = MAX (computed, specified);
6216 tree field = TYPE_FIELDS (type);
6218 /* Skip all non field decls */
6219 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6220 field = DECL_CHAIN (field);
6222 if (field != NULL && field != type)
6224 type = TREE_TYPE (field);
6225 while (TREE_CODE (type) == ARRAY_TYPE)
6226 type = TREE_TYPE (type);
6228 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6229 align = MAX (align, 64);
6232 return align;
6235 /* Darwin increases record alignment to the natural alignment of
6236 the first field. */
6238 unsigned int
6239 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6240 unsigned int specified)
6242 unsigned int align = MAX (computed, specified);
6244 if (TYPE_PACKED (type))
6245 return align;
6247 /* Find the first field, looking down into aggregates. */
6248 do {
6249 tree field = TYPE_FIELDS (type);
6250 /* Skip all non field decls */
6251 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6252 field = DECL_CHAIN (field);
6253 if (! field)
6254 break;
6255 /* A packed field does not contribute any extra alignment. */
6256 if (DECL_PACKED (field))
6257 return align;
6258 type = TREE_TYPE (field);
6259 while (TREE_CODE (type) == ARRAY_TYPE)
6260 type = TREE_TYPE (type);
6261 } while (AGGREGATE_TYPE_P (type));
6263 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6264 align = MAX (align, TYPE_ALIGN (type));
6266 return align;
6269 /* Return 1 for an operand in small memory on V.4/eabi. */
6272 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6273 machine_mode mode ATTRIBUTE_UNUSED)
6275 #if TARGET_ELF
6276 rtx sym_ref;
6278 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6279 return 0;
6281 if (DEFAULT_ABI != ABI_V4)
6282 return 0;
6284 /* Vector and float memory instructions have a limited offset on the
6285 SPE, so using a vector or float variable directly as an operand is
6286 not useful. */
6287 if (TARGET_SPE
6288 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6289 return 0;
6291 if (GET_CODE (op) == SYMBOL_REF)
6292 sym_ref = op;
6294 else if (GET_CODE (op) != CONST
6295 || GET_CODE (XEXP (op, 0)) != PLUS
6296 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6297 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6298 return 0;
6300 else
6302 rtx sum = XEXP (op, 0);
6303 HOST_WIDE_INT summand;
6305 /* We have to be careful here, because it is the referenced address
6306 that must be 32k from _SDA_BASE_, not just the symbol. */
6307 summand = INTVAL (XEXP (sum, 1));
6308 if (summand < 0 || summand > g_switch_value)
6309 return 0;
6311 sym_ref = XEXP (sum, 0);
6314 return SYMBOL_REF_SMALL_P (sym_ref);
6315 #else
6316 return 0;
6317 #endif
6320 /* Return true if either operand is a general purpose register. */
6322 bool
6323 gpr_or_gpr_p (rtx op0, rtx op1)
6325 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6326 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6329 /* Return true if this is a move direct operation between GPR registers and
6330 floating point/VSX registers. */
6332 bool
6333 direct_move_p (rtx op0, rtx op1)
6335 int regno0, regno1;
6337 if (!REG_P (op0) || !REG_P (op1))
6338 return false;
6340 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6341 return false;
6343 regno0 = REGNO (op0);
6344 regno1 = REGNO (op1);
6345 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6346 return false;
6348 if (INT_REGNO_P (regno0))
6349 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6351 else if (INT_REGNO_P (regno1))
6353 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6354 return true;
6356 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6357 return true;
6360 return false;
6363 /* Return true if this is a load or store quad operation. This function does
6364 not handle the atomic quad memory instructions. */
6366 bool
6367 quad_load_store_p (rtx op0, rtx op1)
6369 bool ret;
6371 if (!TARGET_QUAD_MEMORY)
6372 ret = false;
6374 else if (REG_P (op0) && MEM_P (op1))
6375 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6376 && quad_memory_operand (op1, GET_MODE (op1))
6377 && !reg_overlap_mentioned_p (op0, op1));
6379 else if (MEM_P (op0) && REG_P (op1))
6380 ret = (quad_memory_operand (op0, GET_MODE (op0))
6381 && quad_int_reg_operand (op1, GET_MODE (op1)));
6383 else
6384 ret = false;
6386 if (TARGET_DEBUG_ADDR)
6388 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6389 ret ? "true" : "false");
6390 debug_rtx (gen_rtx_SET (op0, op1));
6393 return ret;
6396 /* Given an address, return a constant offset term if one exists. */
6398 static rtx
6399 address_offset (rtx op)
6401 if (GET_CODE (op) == PRE_INC
6402 || GET_CODE (op) == PRE_DEC)
6403 op = XEXP (op, 0);
6404 else if (GET_CODE (op) == PRE_MODIFY
6405 || GET_CODE (op) == LO_SUM)
6406 op = XEXP (op, 1);
6408 if (GET_CODE (op) == CONST)
6409 op = XEXP (op, 0);
6411 if (GET_CODE (op) == PLUS)
6412 op = XEXP (op, 1);
6414 if (CONST_INT_P (op))
6415 return op;
6417 return NULL_RTX;
6420 /* Return true if the MEM operand is a memory operand suitable for use
6421 with a (full width, possibly multiple) gpr load/store. On
6422 powerpc64 this means the offset must be divisible by 4.
6423 Implements 'Y' constraint.
6425 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6426 a constraint function we know the operand has satisfied a suitable
6427 memory predicate. Also accept some odd rtl generated by reload
6428 (see rs6000_legitimize_reload_address for various forms). It is
6429 important that reload rtl be accepted by appropriate constraints
6430 but not by the operand predicate.
6432 Offsetting a lo_sum should not be allowed, except where we know by
6433 alignment that a 32k boundary is not crossed, but see the ???
6434 comment in rs6000_legitimize_reload_address. Note that by
6435 "offsetting" here we mean a further offset to access parts of the
6436 MEM. It's fine to have a lo_sum where the inner address is offset
6437 from a sym, since the same sym+offset will appear in the high part
6438 of the address calculation. */
6440 bool
6441 mem_operand_gpr (rtx op, machine_mode mode)
6443 unsigned HOST_WIDE_INT offset;
6444 int extra;
6445 rtx addr = XEXP (op, 0);
6447 op = address_offset (addr);
6448 if (op == NULL_RTX)
6449 return true;
6451 offset = INTVAL (op);
6452 if (TARGET_POWERPC64 && (offset & 3) != 0)
6453 return false;
6455 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6456 if (extra < 0)
6457 extra = 0;
6459 if (GET_CODE (addr) == LO_SUM)
6460 /* For lo_sum addresses, we must allow any offset except one that
6461 causes a wrap, so test only the low 16 bits. */
6462 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6464 return offset + 0x8000 < 0x10000u - extra;
6467 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6469 static bool
6470 reg_offset_addressing_ok_p (machine_mode mode)
6472 switch (mode)
6474 case V16QImode:
6475 case V8HImode:
6476 case V4SFmode:
6477 case V4SImode:
6478 case V2DFmode:
6479 case V2DImode:
6480 case V1TImode:
6481 case TImode:
6482 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6483 TImode is not a vector mode, if we want to use the VSX registers to
6484 move it around, we need to restrict ourselves to reg+reg
6485 addressing. */
6486 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6487 return false;
6488 break;
6490 case V4HImode:
6491 case V2SImode:
6492 case V1DImode:
6493 case V2SFmode:
6494 /* Paired vector modes. Only reg+reg addressing is valid. */
6495 if (TARGET_PAIRED_FLOAT)
6496 return false;
6497 break;
6499 case SDmode:
6500 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6501 addressing for the LFIWZX and STFIWX instructions. */
6502 if (TARGET_NO_SDMODE_STACK)
6503 return false;
6504 break;
6506 default:
6507 break;
6510 return true;
6513 static bool
6514 virtual_stack_registers_memory_p (rtx op)
6516 int regnum;
6518 if (GET_CODE (op) == REG)
6519 regnum = REGNO (op);
6521 else if (GET_CODE (op) == PLUS
6522 && GET_CODE (XEXP (op, 0)) == REG
6523 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6524 regnum = REGNO (XEXP (op, 0));
6526 else
6527 return false;
6529 return (regnum >= FIRST_VIRTUAL_REGISTER
6530 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6533 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6534 is known to not straddle a 32k boundary. This function is used
6535 to determine whether -mcmodel=medium code can use TOC pointer
6536 relative addressing for OP. This means the alignment of the TOC
6537 pointer must also be taken into account, and unfortunately that is
6538 only 8 bytes. */
6540 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
6541 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
6542 #endif
6544 static bool
6545 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6546 machine_mode mode)
6548 tree decl;
6549 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6551 if (GET_CODE (op) != SYMBOL_REF)
6552 return false;
6554 dsize = GET_MODE_SIZE (mode);
6555 decl = SYMBOL_REF_DECL (op);
6556 if (!decl)
6558 if (dsize == 0)
6559 return false;
6561 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6562 replacing memory addresses with an anchor plus offset. We
6563 could find the decl by rummaging around in the block->objects
6564 VEC for the given offset but that seems like too much work. */
6565 dalign = BITS_PER_UNIT;
6566 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6567 && SYMBOL_REF_ANCHOR_P (op)
6568 && SYMBOL_REF_BLOCK (op) != NULL)
6570 struct object_block *block = SYMBOL_REF_BLOCK (op);
6572 dalign = block->alignment;
6573 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6575 else if (CONSTANT_POOL_ADDRESS_P (op))
6577 /* It would be nice to have get_pool_align().. */
6578 machine_mode cmode = get_pool_mode (op);
6580 dalign = GET_MODE_ALIGNMENT (cmode);
6583 else if (DECL_P (decl))
6585 dalign = DECL_ALIGN (decl);
6587 if (dsize == 0)
6589 /* Allow BLKmode when the entire object is known to not
6590 cross a 32k boundary. */
6591 if (!DECL_SIZE_UNIT (decl))
6592 return false;
6594 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6595 return false;
6597 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6598 if (dsize > 32768)
6599 return false;
6601 dalign /= BITS_PER_UNIT;
6602 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
6603 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
6604 return dalign >= dsize;
6607 else
6608 gcc_unreachable ();
6610 /* Find how many bits of the alignment we know for this access. */
6611 dalign /= BITS_PER_UNIT;
6612 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
6613 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
6614 mask = dalign - 1;
6615 lsb = offset & -offset;
6616 mask &= lsb - 1;
6617 dalign = mask + 1;
6619 return dalign >= dsize;
6622 static bool
6623 constant_pool_expr_p (rtx op)
6625 rtx base, offset;
6627 split_const (op, &base, &offset);
6628 return (GET_CODE (base) == SYMBOL_REF
6629 && CONSTANT_POOL_ADDRESS_P (base)
6630 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6633 static const_rtx tocrel_base, tocrel_offset;
6635 /* Return true if OP is a toc pointer relative address (the output
6636 of create_TOC_reference). If STRICT, do not match high part or
6637 non-split -mcmodel=large/medium toc pointer relative addresses. */
6639 bool
6640 toc_relative_expr_p (const_rtx op, bool strict)
6642 if (!TARGET_TOC)
6643 return false;
6645 if (TARGET_CMODEL != CMODEL_SMALL)
6647 /* Only match the low part. */
6648 if (GET_CODE (op) == LO_SUM
6649 && REG_P (XEXP (op, 0))
6650 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6651 op = XEXP (op, 1);
6652 else if (strict)
6653 return false;
6656 tocrel_base = op;
6657 tocrel_offset = const0_rtx;
6658 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6660 tocrel_base = XEXP (op, 0);
6661 tocrel_offset = XEXP (op, 1);
6664 return (GET_CODE (tocrel_base) == UNSPEC
6665 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6668 /* Return true if X is a constant pool address, and also for cmodel=medium
6669 if X is a toc-relative address known to be offsettable within MODE. */
6671 bool
6672 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6673 bool strict)
6675 return (toc_relative_expr_p (x, strict)
6676 && (TARGET_CMODEL != CMODEL_MEDIUM
6677 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6678 || mode == QImode
6679 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6680 INTVAL (tocrel_offset), mode)));
6683 static bool
6684 legitimate_small_data_p (machine_mode mode, rtx x)
6686 return (DEFAULT_ABI == ABI_V4
6687 && !flag_pic && !TARGET_TOC
6688 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6689 && small_data_operand (x, mode));
6692 /* SPE offset addressing is limited to 5-bits worth of double words. */
6693 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6695 bool
6696 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6697 bool strict, bool worst_case)
6699 unsigned HOST_WIDE_INT offset;
6700 unsigned int extra;
6702 if (GET_CODE (x) != PLUS)
6703 return false;
6704 if (!REG_P (XEXP (x, 0)))
6705 return false;
6706 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6707 return false;
6708 if (!reg_offset_addressing_ok_p (mode))
6709 return virtual_stack_registers_memory_p (x);
6710 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6711 return true;
6712 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6713 return false;
6715 offset = INTVAL (XEXP (x, 1));
6716 extra = 0;
6717 switch (mode)
6719 case V4HImode:
6720 case V2SImode:
6721 case V1DImode:
6722 case V2SFmode:
6723 /* SPE vector modes. */
6724 return SPE_CONST_OFFSET_OK (offset);
6726 case DFmode:
6727 case DDmode:
6728 case DImode:
6729 /* On e500v2, we may have:
6731 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6733 Which gets addressed with evldd instructions. */
6734 if (TARGET_E500_DOUBLE)
6735 return SPE_CONST_OFFSET_OK (offset);
6737 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6738 addressing. */
6739 if (VECTOR_MEM_VSX_P (mode))
6740 return false;
6742 if (!worst_case)
6743 break;
6744 if (!TARGET_POWERPC64)
6745 extra = 4;
6746 else if (offset & 3)
6747 return false;
6748 break;
6750 case TFmode:
6751 if (TARGET_E500_DOUBLE)
6752 return (SPE_CONST_OFFSET_OK (offset)
6753 && SPE_CONST_OFFSET_OK (offset + 8));
6754 /* fall through */
6756 case TDmode:
6757 case TImode:
6758 case PTImode:
6759 extra = 8;
6760 if (!worst_case)
6761 break;
6762 if (!TARGET_POWERPC64)
6763 extra = 12;
6764 else if (offset & 3)
6765 return false;
6766 break;
6768 default:
6769 break;
6772 offset += 0x8000;
6773 return offset < 0x10000 - extra;
6776 bool
6777 legitimate_indexed_address_p (rtx x, int strict)
6779 rtx op0, op1;
6781 if (GET_CODE (x) != PLUS)
6782 return false;
6784 op0 = XEXP (x, 0);
6785 op1 = XEXP (x, 1);
6787 /* Recognize the rtl generated by reload which we know will later be
6788 replaced with proper base and index regs. */
6789 if (!strict
6790 && reload_in_progress
6791 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6792 && REG_P (op1))
6793 return true;
6795 return (REG_P (op0) && REG_P (op1)
6796 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6797 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6798 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6799 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6802 bool
6803 avoiding_indexed_address_p (machine_mode mode)
6805 /* Avoid indexed addressing for modes that have non-indexed
6806 load/store instruction forms. */
6807 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6810 bool
6811 legitimate_indirect_address_p (rtx x, int strict)
6813 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6816 bool
6817 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6819 if (!TARGET_MACHO || !flag_pic
6820 || mode != SImode || GET_CODE (x) != MEM)
6821 return false;
6822 x = XEXP (x, 0);
6824 if (GET_CODE (x) != LO_SUM)
6825 return false;
6826 if (GET_CODE (XEXP (x, 0)) != REG)
6827 return false;
6828 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6829 return false;
6830 x = XEXP (x, 1);
6832 return CONSTANT_P (x);
6835 static bool
6836 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6838 if (GET_CODE (x) != LO_SUM)
6839 return false;
6840 if (GET_CODE (XEXP (x, 0)) != REG)
6841 return false;
6842 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6843 return false;
6844 /* Restrict addressing for DI because of our SUBREG hackery. */
6845 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6846 return false;
6847 x = XEXP (x, 1);
6849 if (TARGET_ELF || TARGET_MACHO)
6851 bool large_toc_ok;
6853 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6854 return false;
6855 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6856 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6857 recognizes some LO_SUM addresses as valid although this
6858 function says opposite. In most cases, LRA through different
6859 transformations can generate correct code for address reloads.
6860 It can not manage only some LO_SUM cases. So we need to add
6861 code analogous to one in rs6000_legitimize_reload_address for
6862 LOW_SUM here saying that some addresses are still valid. */
6863 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6864 && small_toc_ref (x, VOIDmode));
6865 if (TARGET_TOC && ! large_toc_ok)
6866 return false;
6867 if (GET_MODE_NUNITS (mode) != 1)
6868 return false;
6869 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6870 && !(/* ??? Assume floating point reg based on mode? */
6871 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6872 && (mode == DFmode || mode == DDmode)))
6873 return false;
6875 return CONSTANT_P (x) || large_toc_ok;
6878 return false;
6882 /* Try machine-dependent ways of modifying an illegitimate address
6883 to be legitimate. If we find one, return the new, valid address.
6884 This is used from only one place: `memory_address' in explow.c.
6886 OLDX is the address as it was before break_out_memory_refs was
6887 called. In some cases it is useful to look at this to decide what
6888 needs to be done.
6890 It is always safe for this function to do nothing. It exists to
6891 recognize opportunities to optimize the output.
6893 On RS/6000, first check for the sum of a register with a constant
6894 integer that is out of range. If so, generate code to add the
6895 constant with the low-order 16 bits masked to the register and force
6896 this result into another register (this can be done with `cau').
6897 Then generate an address of REG+(CONST&0xffff), allowing for the
6898 possibility of bit 16 being a one.
6900 Then check for the sum of a register and something not constant, try to
6901 load the other things into a register and return the sum. */
6903 static rtx
6904 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6905 machine_mode mode)
6907 unsigned int extra;
6909 if (!reg_offset_addressing_ok_p (mode))
6911 if (virtual_stack_registers_memory_p (x))
6912 return x;
6914 /* In theory we should not be seeing addresses of the form reg+0,
6915 but just in case it is generated, optimize it away. */
6916 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6917 return force_reg (Pmode, XEXP (x, 0));
6919 /* For TImode with load/store quad, restrict addresses to just a single
6920 pointer, so it works with both GPRs and VSX registers. */
6921 /* Make sure both operands are registers. */
6922 else if (GET_CODE (x) == PLUS
6923 && (mode != TImode || !TARGET_QUAD_MEMORY))
6924 return gen_rtx_PLUS (Pmode,
6925 force_reg (Pmode, XEXP (x, 0)),
6926 force_reg (Pmode, XEXP (x, 1)));
6927 else
6928 return force_reg (Pmode, x);
6930 if (GET_CODE (x) == SYMBOL_REF)
6932 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6933 if (model != 0)
6934 return rs6000_legitimize_tls_address (x, model);
6937 extra = 0;
6938 switch (mode)
6940 case TFmode:
6941 case TDmode:
6942 case TImode:
6943 case PTImode:
6944 /* As in legitimate_offset_address_p we do not assume
6945 worst-case. The mode here is just a hint as to the registers
6946 used. A TImode is usually in gprs, but may actually be in
6947 fprs. Leave worst-case scenario for reload to handle via
6948 insn constraints. PTImode is only GPRs. */
6949 extra = 8;
6950 break;
6951 default:
6952 break;
6955 if (GET_CODE (x) == PLUS
6956 && GET_CODE (XEXP (x, 0)) == REG
6957 && GET_CODE (XEXP (x, 1)) == CONST_INT
6958 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6959 >= 0x10000 - extra)
6960 && !(SPE_VECTOR_MODE (mode)
6961 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6963 HOST_WIDE_INT high_int, low_int;
6964 rtx sum;
6965 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6966 if (low_int >= 0x8000 - extra)
6967 low_int = 0;
6968 high_int = INTVAL (XEXP (x, 1)) - low_int;
6969 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6970 GEN_INT (high_int)), 0);
6971 return plus_constant (Pmode, sum, low_int);
6973 else if (GET_CODE (x) == PLUS
6974 && GET_CODE (XEXP (x, 0)) == REG
6975 && GET_CODE (XEXP (x, 1)) != CONST_INT
6976 && GET_MODE_NUNITS (mode) == 1
6977 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6978 || (/* ??? Assume floating point reg based on mode? */
6979 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6980 && (mode == DFmode || mode == DDmode)))
6981 && !avoiding_indexed_address_p (mode))
6983 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6984 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6986 else if (SPE_VECTOR_MODE (mode)
6987 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6989 if (mode == DImode)
6990 return x;
6991 /* We accept [reg + reg] and [reg + OFFSET]. */
6993 if (GET_CODE (x) == PLUS)
6995 rtx op1 = XEXP (x, 0);
6996 rtx op2 = XEXP (x, 1);
6997 rtx y;
6999 op1 = force_reg (Pmode, op1);
7001 if (GET_CODE (op2) != REG
7002 && (GET_CODE (op2) != CONST_INT
7003 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7004 || (GET_MODE_SIZE (mode) > 8
7005 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7006 op2 = force_reg (Pmode, op2);
7008 /* We can't always do [reg + reg] for these, because [reg +
7009 reg + offset] is not a legitimate addressing mode. */
7010 y = gen_rtx_PLUS (Pmode, op1, op2);
7012 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7013 return force_reg (Pmode, y);
7014 else
7015 return y;
7018 return force_reg (Pmode, x);
7020 else if ((TARGET_ELF
7021 #if TARGET_MACHO
7022 || !MACHO_DYNAMIC_NO_PIC_P
7023 #endif
7025 && TARGET_32BIT
7026 && TARGET_NO_TOC
7027 && ! flag_pic
7028 && GET_CODE (x) != CONST_INT
7029 && GET_CODE (x) != CONST_WIDE_INT
7030 && GET_CODE (x) != CONST_DOUBLE
7031 && CONSTANT_P (x)
7032 && GET_MODE_NUNITS (mode) == 1
7033 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7034 || (/* ??? Assume floating point reg based on mode? */
7035 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7036 && (mode == DFmode || mode == DDmode))))
7038 rtx reg = gen_reg_rtx (Pmode);
7039 if (TARGET_ELF)
7040 emit_insn (gen_elf_high (reg, x));
7041 else
7042 emit_insn (gen_macho_high (reg, x));
7043 return gen_rtx_LO_SUM (Pmode, reg, x);
7045 else if (TARGET_TOC
7046 && GET_CODE (x) == SYMBOL_REF
7047 && constant_pool_expr_p (x)
7048 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7049 return create_TOC_reference (x, NULL_RTX);
7050 else
7051 return x;
7054 /* Debug version of rs6000_legitimize_address. */
7055 static rtx
7056 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7058 rtx ret;
7059 rtx_insn *insns;
7061 start_sequence ();
7062 ret = rs6000_legitimize_address (x, oldx, mode);
7063 insns = get_insns ();
7064 end_sequence ();
7066 if (ret != x)
7068 fprintf (stderr,
7069 "\nrs6000_legitimize_address: mode %s, old code %s, "
7070 "new code %s, modified\n",
7071 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7072 GET_RTX_NAME (GET_CODE (ret)));
7074 fprintf (stderr, "Original address:\n");
7075 debug_rtx (x);
7077 fprintf (stderr, "oldx:\n");
7078 debug_rtx (oldx);
7080 fprintf (stderr, "New address:\n");
7081 debug_rtx (ret);
7083 if (insns)
7085 fprintf (stderr, "Insns added:\n");
7086 debug_rtx_list (insns, 20);
7089 else
7091 fprintf (stderr,
7092 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7093 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7095 debug_rtx (x);
7098 if (insns)
7099 emit_insn (insns);
7101 return ret;
7104 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7105 We need to emit DTP-relative relocations. */
7107 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7108 static void
7109 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7111 switch (size)
7113 case 4:
7114 fputs ("\t.long\t", file);
7115 break;
7116 case 8:
7117 fputs (DOUBLE_INT_ASM_OP, file);
7118 break;
7119 default:
7120 gcc_unreachable ();
7122 output_addr_const (file, x);
7123 fputs ("@dtprel+0x8000", file);
7126 /* Return true if X is a symbol that refers to real (rather than emulated)
7127 TLS. */
7129 static bool
7130 rs6000_real_tls_symbol_ref_p (rtx x)
7132 return (GET_CODE (x) == SYMBOL_REF
7133 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7136 /* In the name of slightly smaller debug output, and to cater to
7137 general assembler lossage, recognize various UNSPEC sequences
7138 and turn them back into a direct symbol reference. */
7140 static rtx
7141 rs6000_delegitimize_address (rtx orig_x)
7143 rtx x, y, offset;
7145 orig_x = delegitimize_mem_from_attrs (orig_x);
7146 x = orig_x;
7147 if (MEM_P (x))
7148 x = XEXP (x, 0);
7150 y = x;
7151 if (TARGET_CMODEL != CMODEL_SMALL
7152 && GET_CODE (y) == LO_SUM)
7153 y = XEXP (y, 1);
7155 offset = NULL_RTX;
7156 if (GET_CODE (y) == PLUS
7157 && GET_MODE (y) == Pmode
7158 && CONST_INT_P (XEXP (y, 1)))
7160 offset = XEXP (y, 1);
7161 y = XEXP (y, 0);
7164 if (GET_CODE (y) == UNSPEC
7165 && XINT (y, 1) == UNSPEC_TOCREL)
7167 y = XVECEXP (y, 0, 0);
7169 #ifdef HAVE_AS_TLS
7170 /* Do not associate thread-local symbols with the original
7171 constant pool symbol. */
7172 if (TARGET_XCOFF
7173 && GET_CODE (y) == SYMBOL_REF
7174 && CONSTANT_POOL_ADDRESS_P (y)
7175 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7176 return orig_x;
7177 #endif
7179 if (offset != NULL_RTX)
7180 y = gen_rtx_PLUS (Pmode, y, offset);
7181 if (!MEM_P (orig_x))
7182 return y;
7183 else
7184 return replace_equiv_address_nv (orig_x, y);
7187 if (TARGET_MACHO
7188 && GET_CODE (orig_x) == LO_SUM
7189 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7191 y = XEXP (XEXP (orig_x, 1), 0);
7192 if (GET_CODE (y) == UNSPEC
7193 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7194 return XVECEXP (y, 0, 0);
7197 return orig_x;
7200 /* Return true if X shouldn't be emitted into the debug info.
7201 The linker doesn't like .toc section references from
7202 .debug_* sections, so reject .toc section symbols. */
7204 static bool
7205 rs6000_const_not_ok_for_debug_p (rtx x)
7207 if (GET_CODE (x) == SYMBOL_REF
7208 && CONSTANT_POOL_ADDRESS_P (x))
7210 rtx c = get_pool_constant (x);
7211 machine_mode cmode = get_pool_mode (x);
7212 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7213 return true;
7216 return false;
7219 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7221 static GTY(()) rtx rs6000_tls_symbol;
7222 static rtx
7223 rs6000_tls_get_addr (void)
7225 if (!rs6000_tls_symbol)
7226 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7228 return rs6000_tls_symbol;
7231 /* Construct the SYMBOL_REF for TLS GOT references. */
7233 static GTY(()) rtx rs6000_got_symbol;
7234 static rtx
7235 rs6000_got_sym (void)
7237 if (!rs6000_got_symbol)
7239 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7240 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7241 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7244 return rs6000_got_symbol;
7247 /* AIX Thread-Local Address support. */
7249 static rtx
7250 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7252 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7253 const char *name;
7254 char *tlsname;
7256 name = XSTR (addr, 0);
7257 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7258 or the symbol will be in TLS private data section. */
7259 if (name[strlen (name) - 1] != ']'
7260 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7261 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7263 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7264 strcpy (tlsname, name);
7265 strcat (tlsname,
7266 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7267 tlsaddr = copy_rtx (addr);
7268 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7270 else
7271 tlsaddr = addr;
7273 /* Place addr into TOC constant pool. */
7274 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7276 /* Output the TOC entry and create the MEM referencing the value. */
7277 if (constant_pool_expr_p (XEXP (sym, 0))
7278 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7280 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7281 mem = gen_const_mem (Pmode, tocref);
7282 set_mem_alias_set (mem, get_TOC_alias_set ());
7284 else
7285 return sym;
7287 /* Use global-dynamic for local-dynamic. */
7288 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7289 || model == TLS_MODEL_LOCAL_DYNAMIC)
7291 /* Create new TOC reference for @m symbol. */
7292 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7293 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7294 strcpy (tlsname, "*LCM");
7295 strcat (tlsname, name + 3);
7296 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7297 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7298 tocref = create_TOC_reference (modaddr, NULL_RTX);
7299 rtx modmem = gen_const_mem (Pmode, tocref);
7300 set_mem_alias_set (modmem, get_TOC_alias_set ());
7302 rtx modreg = gen_reg_rtx (Pmode);
7303 emit_insn (gen_rtx_SET (modreg, modmem));
7305 tmpreg = gen_reg_rtx (Pmode);
7306 emit_insn (gen_rtx_SET (tmpreg, mem));
7308 dest = gen_reg_rtx (Pmode);
7309 if (TARGET_32BIT)
7310 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7311 else
7312 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7313 return dest;
7315 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7316 else if (TARGET_32BIT)
7318 tlsreg = gen_reg_rtx (SImode);
7319 emit_insn (gen_tls_get_tpointer (tlsreg));
7321 else
7322 tlsreg = gen_rtx_REG (DImode, 13);
7324 /* Load the TOC value into temporary register. */
7325 tmpreg = gen_reg_rtx (Pmode);
7326 emit_insn (gen_rtx_SET (tmpreg, mem));
7327 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7328 gen_rtx_MINUS (Pmode, addr, tlsreg));
7330 /* Add TOC symbol value to TLS pointer. */
7331 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7333 return dest;
7336 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7337 this (thread-local) address. */
7339 static rtx
7340 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7342 rtx dest, insn;
7344 if (TARGET_XCOFF)
7345 return rs6000_legitimize_tls_address_aix (addr, model);
7347 dest = gen_reg_rtx (Pmode);
7348 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7350 rtx tlsreg;
7352 if (TARGET_64BIT)
7354 tlsreg = gen_rtx_REG (Pmode, 13);
7355 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7357 else
7359 tlsreg = gen_rtx_REG (Pmode, 2);
7360 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7362 emit_insn (insn);
7364 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7366 rtx tlsreg, tmp;
7368 tmp = gen_reg_rtx (Pmode);
7369 if (TARGET_64BIT)
7371 tlsreg = gen_rtx_REG (Pmode, 13);
7372 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7374 else
7376 tlsreg = gen_rtx_REG (Pmode, 2);
7377 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7379 emit_insn (insn);
7380 if (TARGET_64BIT)
7381 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7382 else
7383 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7384 emit_insn (insn);
7386 else
7388 rtx r3, got, tga, tmp1, tmp2, call_insn;
7390 /* We currently use relocations like @got@tlsgd for tls, which
7391 means the linker will handle allocation of tls entries, placing
7392 them in the .got section. So use a pointer to the .got section,
7393 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7394 or to secondary GOT sections used by 32-bit -fPIC. */
7395 if (TARGET_64BIT)
7396 got = gen_rtx_REG (Pmode, 2);
7397 else
7399 if (flag_pic == 1)
7400 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7401 else
7403 rtx gsym = rs6000_got_sym ();
7404 got = gen_reg_rtx (Pmode);
7405 if (flag_pic == 0)
7406 rs6000_emit_move (got, gsym, Pmode);
7407 else
7409 rtx mem, lab, last;
7411 tmp1 = gen_reg_rtx (Pmode);
7412 tmp2 = gen_reg_rtx (Pmode);
7413 mem = gen_const_mem (Pmode, tmp1);
7414 lab = gen_label_rtx ();
7415 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7416 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7417 if (TARGET_LINK_STACK)
7418 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7419 emit_move_insn (tmp2, mem);
7420 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7421 set_unique_reg_note (last, REG_EQUAL, gsym);
7426 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7428 tga = rs6000_tls_get_addr ();
7429 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7430 1, const0_rtx, Pmode);
7432 r3 = gen_rtx_REG (Pmode, 3);
7433 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7435 if (TARGET_64BIT)
7436 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7437 else
7438 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7440 else if (DEFAULT_ABI == ABI_V4)
7441 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7442 else
7443 gcc_unreachable ();
7444 call_insn = last_call_insn ();
7445 PATTERN (call_insn) = insn;
7446 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7447 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7448 pic_offset_table_rtx);
7450 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7452 tga = rs6000_tls_get_addr ();
7453 tmp1 = gen_reg_rtx (Pmode);
7454 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7455 1, const0_rtx, Pmode);
7457 r3 = gen_rtx_REG (Pmode, 3);
7458 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7460 if (TARGET_64BIT)
7461 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7462 else
7463 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7465 else if (DEFAULT_ABI == ABI_V4)
7466 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7467 else
7468 gcc_unreachable ();
7469 call_insn = last_call_insn ();
7470 PATTERN (call_insn) = insn;
7471 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7472 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7473 pic_offset_table_rtx);
7475 if (rs6000_tls_size == 16)
7477 if (TARGET_64BIT)
7478 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7479 else
7480 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7482 else if (rs6000_tls_size == 32)
7484 tmp2 = gen_reg_rtx (Pmode);
7485 if (TARGET_64BIT)
7486 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7487 else
7488 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7489 emit_insn (insn);
7490 if (TARGET_64BIT)
7491 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7492 else
7493 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7495 else
7497 tmp2 = gen_reg_rtx (Pmode);
7498 if (TARGET_64BIT)
7499 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7500 else
7501 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7502 emit_insn (insn);
7503 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
7505 emit_insn (insn);
7507 else
7509 /* IE, or 64-bit offset LE. */
7510 tmp2 = gen_reg_rtx (Pmode);
7511 if (TARGET_64BIT)
7512 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7513 else
7514 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7515 emit_insn (insn);
7516 if (TARGET_64BIT)
7517 insn = gen_tls_tls_64 (dest, tmp2, addr);
7518 else
7519 insn = gen_tls_tls_32 (dest, tmp2, addr);
7520 emit_insn (insn);
7524 return dest;
7527 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7529 static bool
7530 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7532 if (GET_CODE (x) == HIGH
7533 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7534 return true;
7536 /* A TLS symbol in the TOC cannot contain a sum. */
7537 if (GET_CODE (x) == CONST
7538 && GET_CODE (XEXP (x, 0)) == PLUS
7539 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7540 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7541 return true;
7543 /* Do not place an ELF TLS symbol in the constant pool. */
7544 return TARGET_ELF && tls_referenced_p (x);
7547 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7548 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7549 can be addressed relative to the toc pointer. */
7551 static bool
7552 use_toc_relative_ref (rtx sym, machine_mode mode)
7554 return ((constant_pool_expr_p (sym)
7555 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7556 get_pool_mode (sym)))
7557 || (TARGET_CMODEL == CMODEL_MEDIUM
7558 && SYMBOL_REF_LOCAL_P (sym)
7559 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
7562 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7563 replace the input X, or the original X if no replacement is called for.
7564 The output parameter *WIN is 1 if the calling macro should goto WIN,
7565 0 if it should not.
7567 For RS/6000, we wish to handle large displacements off a base
7568 register by splitting the addend across an addiu/addis and the mem insn.
7569 This cuts number of extra insns needed from 3 to 1.
7571 On Darwin, we use this to generate code for floating point constants.
7572 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7573 The Darwin code is inside #if TARGET_MACHO because only then are the
7574 machopic_* functions defined. */
7575 static rtx
7576 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7577 int opnum, int type,
7578 int ind_levels ATTRIBUTE_UNUSED, int *win)
7580 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7582 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7583 DFmode/DImode MEM. */
7584 if (reg_offset_p
7585 && opnum == 1
7586 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7587 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7588 reg_offset_p = false;
7590 /* We must recognize output that we have already generated ourselves. */
7591 if (GET_CODE (x) == PLUS
7592 && GET_CODE (XEXP (x, 0)) == PLUS
7593 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7594 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7595 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7597 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7598 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7599 opnum, (enum reload_type) type);
7600 *win = 1;
7601 return x;
7604 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7605 if (GET_CODE (x) == LO_SUM
7606 && GET_CODE (XEXP (x, 0)) == HIGH)
7608 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7609 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7610 opnum, (enum reload_type) type);
7611 *win = 1;
7612 return x;
7615 #if TARGET_MACHO
7616 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7617 && GET_CODE (x) == LO_SUM
7618 && GET_CODE (XEXP (x, 0)) == PLUS
7619 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7620 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7621 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7622 && machopic_operand_p (XEXP (x, 1)))
7624 /* Result of previous invocation of this function on Darwin
7625 floating point constant. */
7626 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7627 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7628 opnum, (enum reload_type) type);
7629 *win = 1;
7630 return x;
7632 #endif
7634 if (TARGET_CMODEL != CMODEL_SMALL
7635 && reg_offset_p
7636 && small_toc_ref (x, VOIDmode))
7638 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7639 x = gen_rtx_LO_SUM (Pmode, hi, x);
7640 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7641 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7642 opnum, (enum reload_type) type);
7643 *win = 1;
7644 return x;
7647 if (GET_CODE (x) == PLUS
7648 && GET_CODE (XEXP (x, 0)) == REG
7649 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7650 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7651 && GET_CODE (XEXP (x, 1)) == CONST_INT
7652 && reg_offset_p
7653 && !SPE_VECTOR_MODE (mode)
7654 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7655 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7657 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7658 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7659 HOST_WIDE_INT high
7660 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7662 /* Check for 32-bit overflow. */
7663 if (high + low != val)
7665 *win = 0;
7666 return x;
7669 /* Reload the high part into a base reg; leave the low part
7670 in the mem directly. */
7672 x = gen_rtx_PLUS (GET_MODE (x),
7673 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7674 GEN_INT (high)),
7675 GEN_INT (low));
7677 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7678 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7679 opnum, (enum reload_type) type);
7680 *win = 1;
7681 return x;
7684 if (GET_CODE (x) == SYMBOL_REF
7685 && reg_offset_p
7686 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7687 && !SPE_VECTOR_MODE (mode)
7688 #if TARGET_MACHO
7689 && DEFAULT_ABI == ABI_DARWIN
7690 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7691 && machopic_symbol_defined_p (x)
7692 #else
7693 && DEFAULT_ABI == ABI_V4
7694 && !flag_pic
7695 #endif
7696 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7697 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7698 without fprs.
7699 ??? Assume floating point reg based on mode? This assumption is
7700 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7701 where reload ends up doing a DFmode load of a constant from
7702 mem using two gprs. Unfortunately, at this point reload
7703 hasn't yet selected regs so poking around in reload data
7704 won't help and even if we could figure out the regs reliably,
7705 we'd still want to allow this transformation when the mem is
7706 naturally aligned. Since we say the address is good here, we
7707 can't disable offsets from LO_SUMs in mem_operand_gpr.
7708 FIXME: Allow offset from lo_sum for other modes too, when
7709 mem is sufficiently aligned.
7711 Also disallow this if the type can go in VMX/Altivec registers, since
7712 those registers do not have d-form (reg+offset) address modes. */
7713 && !reg_addr[mode].scalar_in_vmx_p
7714 && mode != TFmode
7715 && mode != TDmode
7716 && (mode != TImode || !TARGET_VSX_TIMODE)
7717 && mode != PTImode
7718 && (mode != DImode || TARGET_POWERPC64)
7719 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7720 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7722 #if TARGET_MACHO
7723 if (flag_pic)
7725 rtx offset = machopic_gen_offset (x);
7726 x = gen_rtx_LO_SUM (GET_MODE (x),
7727 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7728 gen_rtx_HIGH (Pmode, offset)), offset);
7730 else
7731 #endif
7732 x = gen_rtx_LO_SUM (GET_MODE (x),
7733 gen_rtx_HIGH (Pmode, x), x);
7735 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7736 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7737 opnum, (enum reload_type) type);
7738 *win = 1;
7739 return x;
7742 /* Reload an offset address wrapped by an AND that represents the
7743 masking of the lower bits. Strip the outer AND and let reload
7744 convert the offset address into an indirect address. For VSX,
7745 force reload to create the address with an AND in a separate
7746 register, because we can't guarantee an altivec register will
7747 be used. */
7748 if (VECTOR_MEM_ALTIVEC_P (mode)
7749 && GET_CODE (x) == AND
7750 && GET_CODE (XEXP (x, 0)) == PLUS
7751 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7752 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7753 && GET_CODE (XEXP (x, 1)) == CONST_INT
7754 && INTVAL (XEXP (x, 1)) == -16)
7756 x = XEXP (x, 0);
7757 *win = 1;
7758 return x;
7761 if (TARGET_TOC
7762 && reg_offset_p
7763 && GET_CODE (x) == SYMBOL_REF
7764 && use_toc_relative_ref (x, mode))
7766 x = create_TOC_reference (x, NULL_RTX);
7767 if (TARGET_CMODEL != CMODEL_SMALL)
7768 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7769 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7770 opnum, (enum reload_type) type);
7771 *win = 1;
7772 return x;
7774 *win = 0;
7775 return x;
7778 /* Debug version of rs6000_legitimize_reload_address. */
7779 static rtx
7780 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7781 int opnum, int type,
7782 int ind_levels, int *win)
7784 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7785 ind_levels, win);
7786 fprintf (stderr,
7787 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7788 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7789 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7790 debug_rtx (x);
7792 if (x == ret)
7793 fprintf (stderr, "Same address returned\n");
7794 else if (!ret)
7795 fprintf (stderr, "NULL returned\n");
7796 else
7798 fprintf (stderr, "New address:\n");
7799 debug_rtx (ret);
7802 return ret;
7805 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7806 that is a valid memory address for an instruction.
7807 The MODE argument is the machine mode for the MEM expression
7808 that wants to use this address.
7810 On the RS/6000, there are four valid address: a SYMBOL_REF that
7811 refers to a constant pool entry of an address (or the sum of it
7812 plus a constant), a short (16-bit signed) constant plus a register,
7813 the sum of two registers, or a register indirect, possibly with an
7814 auto-increment. For DFmode, DDmode and DImode with a constant plus
7815 register, we must ensure that both words are addressable or PowerPC64
7816 with offset word aligned.
7818 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7819 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7820 because adjacent memory cells are accessed by adding word-sized offsets
7821 during assembly output. */
7822 static bool
7823 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7825 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7827 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7828 if (VECTOR_MEM_ALTIVEC_P (mode)
7829 && GET_CODE (x) == AND
7830 && GET_CODE (XEXP (x, 1)) == CONST_INT
7831 && INTVAL (XEXP (x, 1)) == -16)
7832 x = XEXP (x, 0);
7834 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7835 return 0;
7836 if (legitimate_indirect_address_p (x, reg_ok_strict))
7837 return 1;
7838 if (TARGET_UPDATE
7839 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7840 && mode_supports_pre_incdec_p (mode)
7841 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7842 return 1;
7843 if (virtual_stack_registers_memory_p (x))
7844 return 1;
7845 if (reg_offset_p && legitimate_small_data_p (mode, x))
7846 return 1;
7847 if (reg_offset_p
7848 && legitimate_constant_pool_address_p (x, mode,
7849 reg_ok_strict || lra_in_progress))
7850 return 1;
7851 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7852 allow register indirect addresses. This will allow the values to go in
7853 either GPRs or VSX registers without reloading. The vector types would
7854 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7855 somewhat split, in that some uses are GPR based, and some VSX based. */
7856 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7857 return 0;
7858 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7859 if (! reg_ok_strict
7860 && reg_offset_p
7861 && GET_CODE (x) == PLUS
7862 && GET_CODE (XEXP (x, 0)) == REG
7863 && (XEXP (x, 0) == virtual_stack_vars_rtx
7864 || XEXP (x, 0) == arg_pointer_rtx)
7865 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7866 return 1;
7867 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7868 return 1;
7869 if (mode != TFmode
7870 && mode != TDmode
7871 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7872 || TARGET_POWERPC64
7873 || (mode != DFmode && mode != DDmode)
7874 || (TARGET_E500_DOUBLE && mode != DDmode))
7875 && (TARGET_POWERPC64 || mode != DImode)
7876 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7877 && mode != PTImode
7878 && !avoiding_indexed_address_p (mode)
7879 && legitimate_indexed_address_p (x, reg_ok_strict))
7880 return 1;
7881 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7882 && mode_supports_pre_modify_p (mode)
7883 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7884 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7885 reg_ok_strict, false)
7886 || (!avoiding_indexed_address_p (mode)
7887 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7888 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7889 return 1;
7890 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7891 return 1;
7892 return 0;
7895 /* Debug version of rs6000_legitimate_address_p. */
7896 static bool
7897 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7898 bool reg_ok_strict)
7900 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7901 fprintf (stderr,
7902 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7903 "strict = %d, reload = %s, code = %s\n",
7904 ret ? "true" : "false",
7905 GET_MODE_NAME (mode),
7906 reg_ok_strict,
7907 (reload_completed
7908 ? "after"
7909 : (reload_in_progress ? "progress" : "before")),
7910 GET_RTX_NAME (GET_CODE (x)));
7911 debug_rtx (x);
7913 return ret;
7916 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7918 static bool
7919 rs6000_mode_dependent_address_p (const_rtx addr,
7920 addr_space_t as ATTRIBUTE_UNUSED)
7922 return rs6000_mode_dependent_address_ptr (addr);
7925 /* Go to LABEL if ADDR (a legitimate address expression)
7926 has an effect that depends on the machine mode it is used for.
7928 On the RS/6000 this is true of all integral offsets (since AltiVec
7929 and VSX modes don't allow them) or is a pre-increment or decrement.
7931 ??? Except that due to conceptual problems in offsettable_address_p
7932 we can't really report the problems of integral offsets. So leave
7933 this assuming that the adjustable offset must be valid for the
7934 sub-words of a TFmode operand, which is what we had before. */
7936 static bool
7937 rs6000_mode_dependent_address (const_rtx addr)
7939 switch (GET_CODE (addr))
7941 case PLUS:
7942 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7943 is considered a legitimate address before reload, so there
7944 are no offset restrictions in that case. Note that this
7945 condition is safe in strict mode because any address involving
7946 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7947 been rejected as illegitimate. */
7948 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7949 && XEXP (addr, 0) != arg_pointer_rtx
7950 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7952 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7953 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7955 break;
7957 case LO_SUM:
7958 /* Anything in the constant pool is sufficiently aligned that
7959 all bytes have the same high part address. */
7960 return !legitimate_constant_pool_address_p (addr, QImode, false);
7962 /* Auto-increment cases are now treated generically in recog.c. */
7963 case PRE_MODIFY:
7964 return TARGET_UPDATE;
7966 /* AND is only allowed in Altivec loads. */
7967 case AND:
7968 return true;
7970 default:
7971 break;
7974 return false;
7977 /* Debug version of rs6000_mode_dependent_address. */
7978 static bool
7979 rs6000_debug_mode_dependent_address (const_rtx addr)
7981 bool ret = rs6000_mode_dependent_address (addr);
7983 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7984 ret ? "true" : "false");
7985 debug_rtx (addr);
7987 return ret;
7990 /* Implement FIND_BASE_TERM. */
7993 rs6000_find_base_term (rtx op)
7995 rtx base;
7997 base = op;
7998 if (GET_CODE (base) == CONST)
7999 base = XEXP (base, 0);
8000 if (GET_CODE (base) == PLUS)
8001 base = XEXP (base, 0);
8002 if (GET_CODE (base) == UNSPEC)
8003 switch (XINT (base, 1))
8005 case UNSPEC_TOCREL:
8006 case UNSPEC_MACHOPIC_OFFSET:
8007 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8008 for aliasing purposes. */
8009 return XVECEXP (base, 0, 0);
8012 return op;
8015 /* More elaborate version of recog's offsettable_memref_p predicate
8016 that works around the ??? note of rs6000_mode_dependent_address.
8017 In particular it accepts
8019 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8021 in 32-bit mode, that the recog predicate rejects. */
8023 static bool
8024 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
8026 bool worst_case;
8028 if (!MEM_P (op))
8029 return false;
8031 /* First mimic offsettable_memref_p. */
8032 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
8033 return true;
8035 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8036 the latter predicate knows nothing about the mode of the memory
8037 reference and, therefore, assumes that it is the largest supported
8038 mode (TFmode). As a consequence, legitimate offsettable memory
8039 references are rejected. rs6000_legitimate_offset_address_p contains
8040 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8041 at least with a little bit of help here given that we know the
8042 actual registers used. */
8043 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8044 || GET_MODE_SIZE (reg_mode) == 4);
8045 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8046 true, worst_case);
8049 /* Change register usage conditional on target flags. */
8050 static void
8051 rs6000_conditional_register_usage (void)
8053 int i;
8055 if (TARGET_DEBUG_TARGET)
8056 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8058 /* Set MQ register fixed (already call_used) so that it will not be
8059 allocated. */
8060 fixed_regs[64] = 1;
8062 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8063 if (TARGET_64BIT)
8064 fixed_regs[13] = call_used_regs[13]
8065 = call_really_used_regs[13] = 1;
8067 /* Conditionally disable FPRs. */
8068 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8069 for (i = 32; i < 64; i++)
8070 fixed_regs[i] = call_used_regs[i]
8071 = call_really_used_regs[i] = 1;
8073 /* The TOC register is not killed across calls in a way that is
8074 visible to the compiler. */
8075 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8076 call_really_used_regs[2] = 0;
8078 if (DEFAULT_ABI == ABI_V4
8079 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8080 && flag_pic == 2)
8081 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8083 if (DEFAULT_ABI == ABI_V4
8084 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8085 && flag_pic == 1)
8086 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8087 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8088 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8090 if (DEFAULT_ABI == ABI_DARWIN
8091 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8092 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8093 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8094 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8096 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8097 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8098 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8100 if (TARGET_SPE)
8102 global_regs[SPEFSCR_REGNO] = 1;
8103 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8104 registers in prologues and epilogues. We no longer use r14
8105 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8106 pool for link-compatibility with older versions of GCC. Once
8107 "old" code has died out, we can return r14 to the allocation
8108 pool. */
8109 fixed_regs[14]
8110 = call_used_regs[14]
8111 = call_really_used_regs[14] = 1;
8114 if (!TARGET_ALTIVEC && !TARGET_VSX)
8116 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8117 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8118 call_really_used_regs[VRSAVE_REGNO] = 1;
8121 if (TARGET_ALTIVEC || TARGET_VSX)
8122 global_regs[VSCR_REGNO] = 1;
8124 if (TARGET_ALTIVEC_ABI)
8126 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8127 call_used_regs[i] = call_really_used_regs[i] = 1;
8129 /* AIX reserves VR20:31 in non-extended ABI mode. */
8130 if (TARGET_XCOFF)
8131 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8132 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8137 /* Output insns to set DEST equal to the constant SOURCE as a series of
8138 lis, ori and shl instructions and return TRUE. */
8140 bool
8141 rs6000_emit_set_const (rtx dest, rtx source)
8143 machine_mode mode = GET_MODE (dest);
8144 rtx temp, set;
8145 rtx_insn *insn;
8146 HOST_WIDE_INT c;
8148 gcc_checking_assert (CONST_INT_P (source));
8149 c = INTVAL (source);
8150 switch (mode)
8152 case QImode:
8153 case HImode:
8154 emit_insn (gen_rtx_SET (dest, source));
8155 return true;
8157 case SImode:
8158 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8160 emit_insn (gen_rtx_SET (copy_rtx (temp),
8161 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8162 emit_insn (gen_rtx_SET (dest,
8163 gen_rtx_IOR (SImode, copy_rtx (temp),
8164 GEN_INT (c & 0xffff))));
8165 break;
8167 case DImode:
8168 if (!TARGET_POWERPC64)
8170 rtx hi, lo;
8172 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8173 DImode);
8174 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8175 DImode);
8176 emit_move_insn (hi, GEN_INT (c >> 32));
8177 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8178 emit_move_insn (lo, GEN_INT (c));
8180 else
8181 rs6000_emit_set_long_const (dest, c);
8182 break;
8184 default:
8185 gcc_unreachable ();
8188 insn = get_last_insn ();
8189 set = single_set (insn);
8190 if (! CONSTANT_P (SET_SRC (set)))
8191 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8193 return true;
8196 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8197 Output insns to set DEST equal to the constant C as a series of
8198 lis, ori and shl instructions. */
8200 static void
8201 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8203 rtx temp;
8204 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8206 ud1 = c & 0xffff;
8207 c = c >> 16;
8208 ud2 = c & 0xffff;
8209 c = c >> 16;
8210 ud3 = c & 0xffff;
8211 c = c >> 16;
8212 ud4 = c & 0xffff;
8214 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8215 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8216 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8218 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8219 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8221 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8223 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8224 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8225 if (ud1 != 0)
8226 emit_move_insn (dest,
8227 gen_rtx_IOR (DImode, copy_rtx (temp),
8228 GEN_INT (ud1)));
8230 else if (ud3 == 0 && ud4 == 0)
8232 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8234 gcc_assert (ud2 & 0x8000);
8235 emit_move_insn (copy_rtx (temp),
8236 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8237 if (ud1 != 0)
8238 emit_move_insn (copy_rtx (temp),
8239 gen_rtx_IOR (DImode, copy_rtx (temp),
8240 GEN_INT (ud1)));
8241 emit_move_insn (dest,
8242 gen_rtx_ZERO_EXTEND (DImode,
8243 gen_lowpart (SImode,
8244 copy_rtx (temp))));
8246 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8247 || (ud4 == 0 && ! (ud3 & 0x8000)))
8249 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8251 emit_move_insn (copy_rtx (temp),
8252 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8253 if (ud2 != 0)
8254 emit_move_insn (copy_rtx (temp),
8255 gen_rtx_IOR (DImode, copy_rtx (temp),
8256 GEN_INT (ud2)));
8257 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8258 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8259 GEN_INT (16)));
8260 if (ud1 != 0)
8261 emit_move_insn (dest,
8262 gen_rtx_IOR (DImode, copy_rtx (temp),
8263 GEN_INT (ud1)));
8265 else
8267 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8269 emit_move_insn (copy_rtx (temp),
8270 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8271 if (ud3 != 0)
8272 emit_move_insn (copy_rtx (temp),
8273 gen_rtx_IOR (DImode, copy_rtx (temp),
8274 GEN_INT (ud3)));
8276 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8277 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8278 GEN_INT (32)));
8279 if (ud2 != 0)
8280 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8281 gen_rtx_IOR (DImode, copy_rtx (temp),
8282 GEN_INT (ud2 << 16)));
8283 if (ud1 != 0)
8284 emit_move_insn (dest,
8285 gen_rtx_IOR (DImode, copy_rtx (temp),
8286 GEN_INT (ud1)));
8290 /* Helper for the following. Get rid of [r+r] memory refs
8291 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8293 static void
8294 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8296 if (reload_in_progress)
8297 return;
8299 if (GET_CODE (operands[0]) == MEM
8300 && GET_CODE (XEXP (operands[0], 0)) != REG
8301 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8302 GET_MODE (operands[0]), false))
8303 operands[0]
8304 = replace_equiv_address (operands[0],
8305 copy_addr_to_reg (XEXP (operands[0], 0)));
8307 if (GET_CODE (operands[1]) == MEM
8308 && GET_CODE (XEXP (operands[1], 0)) != REG
8309 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8310 GET_MODE (operands[1]), false))
8311 operands[1]
8312 = replace_equiv_address (operands[1],
8313 copy_addr_to_reg (XEXP (operands[1], 0)));
8316 /* Generate a vector of constants to permute MODE for a little-endian
8317 storage operation by swapping the two halves of a vector. */
8318 static rtvec
8319 rs6000_const_vec (machine_mode mode)
8321 int i, subparts;
8322 rtvec v;
8324 switch (mode)
8326 case V1TImode:
8327 subparts = 1;
8328 break;
8329 case V2DFmode:
8330 case V2DImode:
8331 subparts = 2;
8332 break;
8333 case V4SFmode:
8334 case V4SImode:
8335 subparts = 4;
8336 break;
8337 case V8HImode:
8338 subparts = 8;
8339 break;
8340 case V16QImode:
8341 subparts = 16;
8342 break;
8343 default:
8344 gcc_unreachable();
8347 v = rtvec_alloc (subparts);
8349 for (i = 0; i < subparts / 2; ++i)
8350 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8351 for (i = subparts / 2; i < subparts; ++i)
8352 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8354 return v;
8357 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8358 for a VSX load or store operation. */
8360 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8362 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8363 return gen_rtx_VEC_SELECT (mode, source, par);
8366 /* Emit a little-endian load from vector memory location SOURCE to VSX
8367 register DEST in mode MODE. The load is done with two permuting
8368 insn's that represent an lxvd2x and xxpermdi. */
8369 void
8370 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8372 rtx tmp, permute_mem, permute_reg;
8374 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8375 V1TImode). */
8376 if (mode == TImode || mode == V1TImode)
8378 mode = V2DImode;
8379 dest = gen_lowpart (V2DImode, dest);
8380 source = adjust_address (source, V2DImode, 0);
8383 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8384 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8385 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8386 emit_insn (gen_rtx_SET (tmp, permute_mem));
8387 emit_insn (gen_rtx_SET (dest, permute_reg));
8390 /* Emit a little-endian store to vector memory location DEST from VSX
8391 register SOURCE in mode MODE. The store is done with two permuting
8392 insn's that represent an xxpermdi and an stxvd2x. */
8393 void
8394 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8396 rtx tmp, permute_src, permute_tmp;
8398 /* This should never be called during or after reload, because it does
8399 not re-permute the source register. It is intended only for use
8400 during expand. */
8401 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
8403 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8404 V1TImode). */
8405 if (mode == TImode || mode == V1TImode)
8407 mode = V2DImode;
8408 dest = adjust_address (dest, V2DImode, 0);
8409 source = gen_lowpart (V2DImode, source);
8412 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8413 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8414 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8415 emit_insn (gen_rtx_SET (tmp, permute_src));
8416 emit_insn (gen_rtx_SET (dest, permute_tmp));
8419 /* Emit a sequence representing a little-endian VSX load or store,
8420 moving data from SOURCE to DEST in mode MODE. This is done
8421 separately from rs6000_emit_move to ensure it is called only
8422 during expand. LE VSX loads and stores introduced later are
8423 handled with a split. The expand-time RTL generation allows
8424 us to optimize away redundant pairs of register-permutes. */
8425 void
8426 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8428 gcc_assert (!BYTES_BIG_ENDIAN
8429 && VECTOR_MEM_VSX_P (mode)
8430 && !gpr_or_gpr_p (dest, source)
8431 && (MEM_P (source) ^ MEM_P (dest)));
8433 if (MEM_P (source))
8435 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8436 rs6000_emit_le_vsx_load (dest, source, mode);
8438 else
8440 if (!REG_P (source))
8441 source = force_reg (mode, source);
8442 rs6000_emit_le_vsx_store (dest, source, mode);
8446 /* Emit a move from SOURCE to DEST in mode MODE. */
8447 void
8448 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8450 rtx operands[2];
8451 operands[0] = dest;
8452 operands[1] = source;
8454 if (TARGET_DEBUG_ADDR)
8456 fprintf (stderr,
8457 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8458 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8459 GET_MODE_NAME (mode),
8460 reload_in_progress,
8461 reload_completed,
8462 can_create_pseudo_p ());
8463 debug_rtx (dest);
8464 fprintf (stderr, "source:\n");
8465 debug_rtx (source);
8468 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8469 if (CONST_WIDE_INT_P (operands[1])
8470 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8472 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8473 gcc_unreachable ();
8476 /* Check if GCC is setting up a block move that will end up using FP
8477 registers as temporaries. We must make sure this is acceptable. */
8478 if (GET_CODE (operands[0]) == MEM
8479 && GET_CODE (operands[1]) == MEM
8480 && mode == DImode
8481 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8482 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8483 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8484 ? 32 : MEM_ALIGN (operands[0])))
8485 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8486 ? 32
8487 : MEM_ALIGN (operands[1]))))
8488 && ! MEM_VOLATILE_P (operands [0])
8489 && ! MEM_VOLATILE_P (operands [1]))
8491 emit_move_insn (adjust_address (operands[0], SImode, 0),
8492 adjust_address (operands[1], SImode, 0));
8493 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8494 adjust_address (copy_rtx (operands[1]), SImode, 4));
8495 return;
8498 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8499 && !gpc_reg_operand (operands[1], mode))
8500 operands[1] = force_reg (mode, operands[1]);
8502 /* Recognize the case where operand[1] is a reference to thread-local
8503 data and load its address to a register. */
8504 if (tls_referenced_p (operands[1]))
8506 enum tls_model model;
8507 rtx tmp = operands[1];
8508 rtx addend = NULL;
8510 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8512 addend = XEXP (XEXP (tmp, 0), 1);
8513 tmp = XEXP (XEXP (tmp, 0), 0);
8516 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8517 model = SYMBOL_REF_TLS_MODEL (tmp);
8518 gcc_assert (model != 0);
8520 tmp = rs6000_legitimize_tls_address (tmp, model);
8521 if (addend)
8523 tmp = gen_rtx_PLUS (mode, tmp, addend);
8524 tmp = force_operand (tmp, operands[0]);
8526 operands[1] = tmp;
8529 /* Handle the case where reload calls us with an invalid address. */
8530 if (reload_in_progress && mode == Pmode
8531 && (! general_operand (operands[1], mode)
8532 || ! nonimmediate_operand (operands[0], mode)))
8533 goto emit_set;
8535 /* 128-bit constant floating-point values on Darwin should really be loaded
8536 as two parts. However, this premature splitting is a problem when DFmode
8537 values can go into Altivec registers. */
8538 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8539 && !reg_addr[DFmode].scalar_in_vmx_p
8540 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8542 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8543 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8544 DFmode);
8545 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8546 GET_MODE_SIZE (DFmode)),
8547 simplify_gen_subreg (DFmode, operands[1], mode,
8548 GET_MODE_SIZE (DFmode)),
8549 DFmode);
8550 return;
8553 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8554 cfun->machine->sdmode_stack_slot =
8555 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8558 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8559 p1:SD) if p1 is not of floating point class and p0 is spilled as
8560 we can have no analogous movsd_store for this. */
8561 if (lra_in_progress && mode == DDmode
8562 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8563 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8564 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8565 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8567 enum reg_class cl;
8568 int regno = REGNO (SUBREG_REG (operands[1]));
8570 if (regno >= FIRST_PSEUDO_REGISTER)
8572 cl = reg_preferred_class (regno);
8573 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8575 if (regno >= 0 && ! FP_REGNO_P (regno))
8577 mode = SDmode;
8578 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8579 operands[1] = SUBREG_REG (operands[1]);
8582 if (lra_in_progress
8583 && mode == SDmode
8584 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8585 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8586 && (REG_P (operands[1])
8587 || (GET_CODE (operands[1]) == SUBREG
8588 && REG_P (SUBREG_REG (operands[1])))))
8590 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8591 ? SUBREG_REG (operands[1]) : operands[1]);
8592 enum reg_class cl;
8594 if (regno >= FIRST_PSEUDO_REGISTER)
8596 cl = reg_preferred_class (regno);
8597 gcc_assert (cl != NO_REGS);
8598 regno = ira_class_hard_regs[cl][0];
8600 if (FP_REGNO_P (regno))
8602 if (GET_MODE (operands[0]) != DDmode)
8603 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8604 emit_insn (gen_movsd_store (operands[0], operands[1]));
8606 else if (INT_REGNO_P (regno))
8607 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8608 else
8609 gcc_unreachable();
8610 return;
8612 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8613 p:DD)) if p0 is not of floating point class and p1 is spilled as
8614 we can have no analogous movsd_load for this. */
8615 if (lra_in_progress && mode == DDmode
8616 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8617 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8618 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8619 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8621 enum reg_class cl;
8622 int regno = REGNO (SUBREG_REG (operands[0]));
8624 if (regno >= FIRST_PSEUDO_REGISTER)
8626 cl = reg_preferred_class (regno);
8627 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8629 if (regno >= 0 && ! FP_REGNO_P (regno))
8631 mode = SDmode;
8632 operands[0] = SUBREG_REG (operands[0]);
8633 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8636 if (lra_in_progress
8637 && mode == SDmode
8638 && (REG_P (operands[0])
8639 || (GET_CODE (operands[0]) == SUBREG
8640 && REG_P (SUBREG_REG (operands[0]))))
8641 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8642 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8644 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8645 ? SUBREG_REG (operands[0]) : operands[0]);
8646 enum reg_class cl;
8648 if (regno >= FIRST_PSEUDO_REGISTER)
8650 cl = reg_preferred_class (regno);
8651 gcc_assert (cl != NO_REGS);
8652 regno = ira_class_hard_regs[cl][0];
8654 if (FP_REGNO_P (regno))
8656 if (GET_MODE (operands[1]) != DDmode)
8657 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8658 emit_insn (gen_movsd_load (operands[0], operands[1]));
8660 else if (INT_REGNO_P (regno))
8661 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8662 else
8663 gcc_unreachable();
8664 return;
8667 if (reload_in_progress
8668 && mode == SDmode
8669 && cfun->machine->sdmode_stack_slot != NULL_RTX
8670 && MEM_P (operands[0])
8671 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8672 && REG_P (operands[1]))
8674 if (FP_REGNO_P (REGNO (operands[1])))
8676 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8677 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8678 emit_insn (gen_movsd_store (mem, operands[1]));
8680 else if (INT_REGNO_P (REGNO (operands[1])))
8682 rtx mem = operands[0];
8683 if (BYTES_BIG_ENDIAN)
8684 mem = adjust_address_nv (mem, mode, 4);
8685 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8686 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8688 else
8689 gcc_unreachable();
8690 return;
8692 if (reload_in_progress
8693 && mode == SDmode
8694 && REG_P (operands[0])
8695 && MEM_P (operands[1])
8696 && cfun->machine->sdmode_stack_slot != NULL_RTX
8697 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8699 if (FP_REGNO_P (REGNO (operands[0])))
8701 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8702 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8703 emit_insn (gen_movsd_load (operands[0], mem));
8705 else if (INT_REGNO_P (REGNO (operands[0])))
8707 rtx mem = operands[1];
8708 if (BYTES_BIG_ENDIAN)
8709 mem = adjust_address_nv (mem, mode, 4);
8710 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8711 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8713 else
8714 gcc_unreachable();
8715 return;
8718 /* FIXME: In the long term, this switch statement should go away
8719 and be replaced by a sequence of tests based on things like
8720 mode == Pmode. */
8721 switch (mode)
8723 case HImode:
8724 case QImode:
8725 if (CONSTANT_P (operands[1])
8726 && GET_CODE (operands[1]) != CONST_INT)
8727 operands[1] = force_const_mem (mode, operands[1]);
8728 break;
8730 case TFmode:
8731 case TDmode:
8732 rs6000_eliminate_indexed_memrefs (operands);
8733 /* fall through */
8735 case DFmode:
8736 case DDmode:
8737 case SFmode:
8738 case SDmode:
8739 if (CONSTANT_P (operands[1])
8740 && ! easy_fp_constant (operands[1], mode))
8741 operands[1] = force_const_mem (mode, operands[1]);
8742 break;
8744 case V16QImode:
8745 case V8HImode:
8746 case V4SFmode:
8747 case V4SImode:
8748 case V4HImode:
8749 case V2SFmode:
8750 case V2SImode:
8751 case V1DImode:
8752 case V2DFmode:
8753 case V2DImode:
8754 case V1TImode:
8755 if (CONSTANT_P (operands[1])
8756 && !easy_vector_constant (operands[1], mode))
8757 operands[1] = force_const_mem (mode, operands[1]);
8758 break;
8760 case SImode:
8761 case DImode:
8762 /* Use default pattern for address of ELF small data */
8763 if (TARGET_ELF
8764 && mode == Pmode
8765 && DEFAULT_ABI == ABI_V4
8766 && (GET_CODE (operands[1]) == SYMBOL_REF
8767 || GET_CODE (operands[1]) == CONST)
8768 && small_data_operand (operands[1], mode))
8770 emit_insn (gen_rtx_SET (operands[0], operands[1]));
8771 return;
8774 if (DEFAULT_ABI == ABI_V4
8775 && mode == Pmode && mode == SImode
8776 && flag_pic == 1 && got_operand (operands[1], mode))
8778 emit_insn (gen_movsi_got (operands[0], operands[1]));
8779 return;
8782 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8783 && TARGET_NO_TOC
8784 && ! flag_pic
8785 && mode == Pmode
8786 && CONSTANT_P (operands[1])
8787 && GET_CODE (operands[1]) != HIGH
8788 && GET_CODE (operands[1]) != CONST_INT)
8790 rtx target = (!can_create_pseudo_p ()
8791 ? operands[0]
8792 : gen_reg_rtx (mode));
8794 /* If this is a function address on -mcall-aixdesc,
8795 convert it to the address of the descriptor. */
8796 if (DEFAULT_ABI == ABI_AIX
8797 && GET_CODE (operands[1]) == SYMBOL_REF
8798 && XSTR (operands[1], 0)[0] == '.')
8800 const char *name = XSTR (operands[1], 0);
8801 rtx new_ref;
8802 while (*name == '.')
8803 name++;
8804 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8805 CONSTANT_POOL_ADDRESS_P (new_ref)
8806 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8807 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8808 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8809 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8810 operands[1] = new_ref;
8813 if (DEFAULT_ABI == ABI_DARWIN)
8815 #if TARGET_MACHO
8816 if (MACHO_DYNAMIC_NO_PIC_P)
8818 /* Take care of any required data indirection. */
8819 operands[1] = rs6000_machopic_legitimize_pic_address (
8820 operands[1], mode, operands[0]);
8821 if (operands[0] != operands[1])
8822 emit_insn (gen_rtx_SET (operands[0], operands[1]));
8823 return;
8825 #endif
8826 emit_insn (gen_macho_high (target, operands[1]));
8827 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8828 return;
8831 emit_insn (gen_elf_high (target, operands[1]));
8832 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8833 return;
8836 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8837 and we have put it in the TOC, we just need to make a TOC-relative
8838 reference to it. */
8839 if (TARGET_TOC
8840 && GET_CODE (operands[1]) == SYMBOL_REF
8841 && use_toc_relative_ref (operands[1], mode))
8842 operands[1] = create_TOC_reference (operands[1], operands[0]);
8843 else if (mode == Pmode
8844 && CONSTANT_P (operands[1])
8845 && GET_CODE (operands[1]) != HIGH
8846 && ((GET_CODE (operands[1]) != CONST_INT
8847 && ! easy_fp_constant (operands[1], mode))
8848 || (GET_CODE (operands[1]) == CONST_INT
8849 && (num_insns_constant (operands[1], mode)
8850 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8851 || (GET_CODE (operands[0]) == REG
8852 && FP_REGNO_P (REGNO (operands[0]))))
8853 && !toc_relative_expr_p (operands[1], false)
8854 && (TARGET_CMODEL == CMODEL_SMALL
8855 || can_create_pseudo_p ()
8856 || (REG_P (operands[0])
8857 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8860 #if TARGET_MACHO
8861 /* Darwin uses a special PIC legitimizer. */
8862 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8864 operands[1] =
8865 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8866 operands[0]);
8867 if (operands[0] != operands[1])
8868 emit_insn (gen_rtx_SET (operands[0], operands[1]));
8869 return;
8871 #endif
8873 /* If we are to limit the number of things we put in the TOC and
8874 this is a symbol plus a constant we can add in one insn,
8875 just put the symbol in the TOC and add the constant. Don't do
8876 this if reload is in progress. */
8877 if (GET_CODE (operands[1]) == CONST
8878 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8879 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8880 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8881 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8882 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8883 && ! side_effects_p (operands[0]))
8885 rtx sym =
8886 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8887 rtx other = XEXP (XEXP (operands[1], 0), 1);
8889 sym = force_reg (mode, sym);
8890 emit_insn (gen_add3_insn (operands[0], sym, other));
8891 return;
8894 operands[1] = force_const_mem (mode, operands[1]);
8896 if (TARGET_TOC
8897 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8898 && constant_pool_expr_p (XEXP (operands[1], 0))
8899 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8900 get_pool_constant (XEXP (operands[1], 0)),
8901 get_pool_mode (XEXP (operands[1], 0))))
8903 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8904 operands[0]);
8905 operands[1] = gen_const_mem (mode, tocref);
8906 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8909 break;
8911 case TImode:
8912 if (!VECTOR_MEM_VSX_P (TImode))
8913 rs6000_eliminate_indexed_memrefs (operands);
8914 break;
8916 case PTImode:
8917 rs6000_eliminate_indexed_memrefs (operands);
8918 break;
8920 default:
8921 fatal_insn ("bad move", gen_rtx_SET (dest, source));
8924 /* Above, we may have called force_const_mem which may have returned
8925 an invalid address. If we can, fix this up; otherwise, reload will
8926 have to deal with it. */
8927 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8928 operands[1] = validize_mem (operands[1]);
8930 emit_set:
8931 emit_insn (gen_rtx_SET (operands[0], operands[1]));
8934 /* Return true if a structure, union or array containing FIELD should be
8935 accessed using `BLKMODE'.
8937 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8938 entire thing in a DI and use subregs to access the internals.
8939 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8940 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8941 best thing to do is set structs to BLKmode and avoid Severe Tire
8942 Damage.
8944 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8945 fit into 1, whereas DI still needs two. */
8947 static bool
8948 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8950 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8951 || (TARGET_E500_DOUBLE && mode == DFmode));
8954 /* Nonzero if we can use a floating-point register to pass this arg. */
8955 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8956 (SCALAR_FLOAT_MODE_P (MODE) \
8957 && (CUM)->fregno <= FP_ARG_MAX_REG \
8958 && TARGET_HARD_FLOAT && TARGET_FPRS)
8960 /* Nonzero if we can use an AltiVec register to pass this arg. */
8961 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8962 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8963 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8964 && TARGET_ALTIVEC_ABI \
8965 && (NAMED))
8967 /* Walk down the type tree of TYPE counting consecutive base elements.
8968 If *MODEP is VOIDmode, then set it to the first valid floating point
8969 or vector type. If a non-floating point or vector type is found, or
8970 if a floating point or vector type that doesn't match a non-VOIDmode
8971 *MODEP is found, then return -1, otherwise return the count in the
8972 sub-tree. */
8974 static int
8975 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8977 machine_mode mode;
8978 HOST_WIDE_INT size;
8980 switch (TREE_CODE (type))
8982 case REAL_TYPE:
8983 mode = TYPE_MODE (type);
8984 if (!SCALAR_FLOAT_MODE_P (mode))
8985 return -1;
8987 if (*modep == VOIDmode)
8988 *modep = mode;
8990 if (*modep == mode)
8991 return 1;
8993 break;
8995 case COMPLEX_TYPE:
8996 mode = TYPE_MODE (TREE_TYPE (type));
8997 if (!SCALAR_FLOAT_MODE_P (mode))
8998 return -1;
9000 if (*modep == VOIDmode)
9001 *modep = mode;
9003 if (*modep == mode)
9004 return 2;
9006 break;
9008 case VECTOR_TYPE:
9009 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9010 return -1;
9012 /* Use V4SImode as representative of all 128-bit vector types. */
9013 size = int_size_in_bytes (type);
9014 switch (size)
9016 case 16:
9017 mode = V4SImode;
9018 break;
9019 default:
9020 return -1;
9023 if (*modep == VOIDmode)
9024 *modep = mode;
9026 /* Vector modes are considered to be opaque: two vectors are
9027 equivalent for the purposes of being homogeneous aggregates
9028 if they are the same size. */
9029 if (*modep == mode)
9030 return 1;
9032 break;
9034 case ARRAY_TYPE:
9036 int count;
9037 tree index = TYPE_DOMAIN (type);
9039 /* Can't handle incomplete types nor sizes that are not
9040 fixed. */
9041 if (!COMPLETE_TYPE_P (type)
9042 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9043 return -1;
9045 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9046 if (count == -1
9047 || !index
9048 || !TYPE_MAX_VALUE (index)
9049 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9050 || !TYPE_MIN_VALUE (index)
9051 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9052 || count < 0)
9053 return -1;
9055 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9056 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9058 /* There must be no padding. */
9059 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9060 return -1;
9062 return count;
9065 case RECORD_TYPE:
9067 int count = 0;
9068 int sub_count;
9069 tree field;
9071 /* Can't handle incomplete types nor sizes that are not
9072 fixed. */
9073 if (!COMPLETE_TYPE_P (type)
9074 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9075 return -1;
9077 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9079 if (TREE_CODE (field) != FIELD_DECL)
9080 continue;
9082 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9083 if (sub_count < 0)
9084 return -1;
9085 count += sub_count;
9088 /* There must be no padding. */
9089 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9090 return -1;
9092 return count;
9095 case UNION_TYPE:
9096 case QUAL_UNION_TYPE:
9098 /* These aren't very interesting except in a degenerate case. */
9099 int count = 0;
9100 int sub_count;
9101 tree field;
9103 /* Can't handle incomplete types nor sizes that are not
9104 fixed. */
9105 if (!COMPLETE_TYPE_P (type)
9106 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9107 return -1;
9109 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9111 if (TREE_CODE (field) != FIELD_DECL)
9112 continue;
9114 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9115 if (sub_count < 0)
9116 return -1;
9117 count = count > sub_count ? count : sub_count;
9120 /* There must be no padding. */
9121 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9122 return -1;
9124 return count;
9127 default:
9128 break;
9131 return -1;
9134 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9135 float or vector aggregate that shall be passed in FP/vector registers
9136 according to the ELFv2 ABI, return the homogeneous element mode in
9137 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9139 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9141 static bool
9142 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9143 machine_mode *elt_mode,
9144 int *n_elts)
9146 /* Note that we do not accept complex types at the top level as
9147 homogeneous aggregates; these types are handled via the
9148 targetm.calls.split_complex_arg mechanism. Complex types
9149 can be elements of homogeneous aggregates, however. */
9150 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9152 machine_mode field_mode = VOIDmode;
9153 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9155 if (field_count > 0)
9157 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9158 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9160 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9161 up to AGGR_ARG_NUM_REG registers. */
9162 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9164 if (elt_mode)
9165 *elt_mode = field_mode;
9166 if (n_elts)
9167 *n_elts = field_count;
9168 return true;
9173 if (elt_mode)
9174 *elt_mode = mode;
9175 if (n_elts)
9176 *n_elts = 1;
9177 return false;
9180 /* Return a nonzero value to say to return the function value in
9181 memory, just as large structures are always returned. TYPE will be
9182 the data type of the value, and FNTYPE will be the type of the
9183 function doing the returning, or @code{NULL} for libcalls.
9185 The AIX ABI for the RS/6000 specifies that all structures are
9186 returned in memory. The Darwin ABI does the same.
9188 For the Darwin 64 Bit ABI, a function result can be returned in
9189 registers or in memory, depending on the size of the return data
9190 type. If it is returned in registers, the value occupies the same
9191 registers as it would if it were the first and only function
9192 argument. Otherwise, the function places its result in memory at
9193 the location pointed to by GPR3.
9195 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9196 but a draft put them in memory, and GCC used to implement the draft
9197 instead of the final standard. Therefore, aix_struct_return
9198 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9199 compatibility can change DRAFT_V4_STRUCT_RET to override the
9200 default, and -m switches get the final word. See
9201 rs6000_option_override_internal for more details.
9203 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9204 long double support is enabled. These values are returned in memory.
9206 int_size_in_bytes returns -1 for variable size objects, which go in
9207 memory always. The cast to unsigned makes -1 > 8. */
9209 static bool
9210 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9212 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9213 if (TARGET_MACHO
9214 && rs6000_darwin64_abi
9215 && TREE_CODE (type) == RECORD_TYPE
9216 && int_size_in_bytes (type) > 0)
9218 CUMULATIVE_ARGS valcum;
9219 rtx valret;
9221 valcum.words = 0;
9222 valcum.fregno = FP_ARG_MIN_REG;
9223 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9224 /* Do a trial code generation as if this were going to be passed
9225 as an argument; if any part goes in memory, we return NULL. */
9226 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9227 if (valret)
9228 return false;
9229 /* Otherwise fall through to more conventional ABI rules. */
9232 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9233 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9234 NULL, NULL))
9235 return false;
9237 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9238 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9239 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9240 return false;
9242 if (AGGREGATE_TYPE_P (type)
9243 && (aix_struct_return
9244 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9245 return true;
9247 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9248 modes only exist for GCC vector types if -maltivec. */
9249 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9250 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9251 return false;
9253 /* Return synthetic vectors in memory. */
9254 if (TREE_CODE (type) == VECTOR_TYPE
9255 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9257 static bool warned_for_return_big_vectors = false;
9258 if (!warned_for_return_big_vectors)
9260 warning (0, "GCC vector returned by reference: "
9261 "non-standard ABI extension with no compatibility guarantee");
9262 warned_for_return_big_vectors = true;
9264 return true;
9267 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9268 return true;
9270 return false;
9273 /* Specify whether values returned in registers should be at the most
9274 significant end of a register. We want aggregates returned by
9275 value to match the way aggregates are passed to functions. */
9277 static bool
9278 rs6000_return_in_msb (const_tree valtype)
9280 return (DEFAULT_ABI == ABI_ELFv2
9281 && BYTES_BIG_ENDIAN
9282 && AGGREGATE_TYPE_P (valtype)
9283 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9286 #ifdef HAVE_AS_GNU_ATTRIBUTE
9287 /* Return TRUE if a call to function FNDECL may be one that
9288 potentially affects the function calling ABI of the object file. */
9290 static bool
9291 call_ABI_of_interest (tree fndecl)
9293 if (symtab->state == EXPANSION)
9295 struct cgraph_node *c_node;
9297 /* Libcalls are always interesting. */
9298 if (fndecl == NULL_TREE)
9299 return true;
9301 /* Any call to an external function is interesting. */
9302 if (DECL_EXTERNAL (fndecl))
9303 return true;
9305 /* Interesting functions that we are emitting in this object file. */
9306 c_node = cgraph_node::get (fndecl);
9307 c_node = c_node->ultimate_alias_target ();
9308 return !c_node->only_called_directly_p ();
9310 return false;
9312 #endif
9314 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9315 for a call to a function whose data type is FNTYPE.
9316 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9318 For incoming args we set the number of arguments in the prototype large
9319 so we never return a PARALLEL. */
9321 void
9322 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9323 rtx libname ATTRIBUTE_UNUSED, int incoming,
9324 int libcall, int n_named_args,
9325 tree fndecl ATTRIBUTE_UNUSED,
9326 machine_mode return_mode ATTRIBUTE_UNUSED)
9328 static CUMULATIVE_ARGS zero_cumulative;
9330 *cum = zero_cumulative;
9331 cum->words = 0;
9332 cum->fregno = FP_ARG_MIN_REG;
9333 cum->vregno = ALTIVEC_ARG_MIN_REG;
9334 cum->prototype = (fntype && prototype_p (fntype));
9335 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9336 ? CALL_LIBCALL : CALL_NORMAL);
9337 cum->sysv_gregno = GP_ARG_MIN_REG;
9338 cum->stdarg = stdarg_p (fntype);
9340 cum->nargs_prototype = 0;
9341 if (incoming || cum->prototype)
9342 cum->nargs_prototype = n_named_args;
9344 /* Check for a longcall attribute. */
9345 if ((!fntype && rs6000_default_long_calls)
9346 || (fntype
9347 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9348 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9349 cum->call_cookie |= CALL_LONG;
9351 if (TARGET_DEBUG_ARG)
9353 fprintf (stderr, "\ninit_cumulative_args:");
9354 if (fntype)
9356 tree ret_type = TREE_TYPE (fntype);
9357 fprintf (stderr, " ret code = %s,",
9358 get_tree_code_name (TREE_CODE (ret_type)));
9361 if (cum->call_cookie & CALL_LONG)
9362 fprintf (stderr, " longcall,");
9364 fprintf (stderr, " proto = %d, nargs = %d\n",
9365 cum->prototype, cum->nargs_prototype);
9368 #ifdef HAVE_AS_GNU_ATTRIBUTE
9369 if (DEFAULT_ABI == ABI_V4)
9371 cum->escapes = call_ABI_of_interest (fndecl);
9372 if (cum->escapes)
9374 tree return_type;
9376 if (fntype)
9378 return_type = TREE_TYPE (fntype);
9379 return_mode = TYPE_MODE (return_type);
9381 else
9382 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9384 if (return_type != NULL)
9386 if (TREE_CODE (return_type) == RECORD_TYPE
9387 && TYPE_TRANSPARENT_AGGR (return_type))
9389 return_type = TREE_TYPE (first_field (return_type));
9390 return_mode = TYPE_MODE (return_type);
9392 if (AGGREGATE_TYPE_P (return_type)
9393 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9394 <= 8))
9395 rs6000_returns_struct = true;
9397 if (SCALAR_FLOAT_MODE_P (return_mode))
9398 rs6000_passes_float = true;
9399 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9400 || SPE_VECTOR_MODE (return_mode))
9401 rs6000_passes_vector = true;
9404 #endif
9406 if (fntype
9407 && !TARGET_ALTIVEC
9408 && TARGET_ALTIVEC_ABI
9409 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9411 error ("cannot return value in vector register because"
9412 " altivec instructions are disabled, use -maltivec"
9413 " to enable them");
9417 /* The mode the ABI uses for a word. This is not the same as word_mode
9418 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9420 static machine_mode
9421 rs6000_abi_word_mode (void)
9423 return TARGET_32BIT ? SImode : DImode;
9426 /* On rs6000, function arguments are promoted, as are function return
9427 values. */
9429 static machine_mode
9430 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9431 machine_mode mode,
9432 int *punsignedp ATTRIBUTE_UNUSED,
9433 const_tree, int)
9435 PROMOTE_MODE (mode, *punsignedp, type);
9437 return mode;
9440 /* Return true if TYPE must be passed on the stack and not in registers. */
9442 static bool
9443 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9445 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9446 return must_pass_in_stack_var_size (mode, type);
9447 else
9448 return must_pass_in_stack_var_size_or_pad (mode, type);
9451 /* If defined, a C expression which determines whether, and in which
9452 direction, to pad out an argument with extra space. The value
9453 should be of type `enum direction': either `upward' to pad above
9454 the argument, `downward' to pad below, or `none' to inhibit
9455 padding.
9457 For the AIX ABI structs are always stored left shifted in their
9458 argument slot. */
9460 enum direction
9461 function_arg_padding (machine_mode mode, const_tree type)
9463 #ifndef AGGREGATE_PADDING_FIXED
9464 #define AGGREGATE_PADDING_FIXED 0
9465 #endif
9466 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9467 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9468 #endif
9470 if (!AGGREGATE_PADDING_FIXED)
9472 /* GCC used to pass structures of the same size as integer types as
9473 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9474 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9475 passed padded downward, except that -mstrict-align further
9476 muddied the water in that multi-component structures of 2 and 4
9477 bytes in size were passed padded upward.
9479 The following arranges for best compatibility with previous
9480 versions of gcc, but removes the -mstrict-align dependency. */
9481 if (BYTES_BIG_ENDIAN)
9483 HOST_WIDE_INT size = 0;
9485 if (mode == BLKmode)
9487 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9488 size = int_size_in_bytes (type);
9490 else
9491 size = GET_MODE_SIZE (mode);
9493 if (size == 1 || size == 2 || size == 4)
9494 return downward;
9496 return upward;
9499 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9501 if (type != 0 && AGGREGATE_TYPE_P (type))
9502 return upward;
9505 /* Fall back to the default. */
9506 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9509 /* If defined, a C expression that gives the alignment boundary, in bits,
9510 of an argument with the specified mode and type. If it is not defined,
9511 PARM_BOUNDARY is used for all arguments.
9513 V.4 wants long longs and doubles to be double word aligned. Just
9514 testing the mode size is a boneheaded way to do this as it means
9515 that other types such as complex int are also double word aligned.
9516 However, we're stuck with this because changing the ABI might break
9517 existing library interfaces.
9519 Doubleword align SPE vectors.
9520 Quadword align Altivec/VSX vectors.
9521 Quadword align large synthetic vector types. */
9523 static unsigned int
9524 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9526 machine_mode elt_mode;
9527 int n_elts;
9529 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9531 if (DEFAULT_ABI == ABI_V4
9532 && (GET_MODE_SIZE (mode) == 8
9533 || (TARGET_HARD_FLOAT
9534 && TARGET_FPRS
9535 && (mode == TFmode || mode == TDmode))))
9536 return 64;
9537 else if (SPE_VECTOR_MODE (mode)
9538 || (type && TREE_CODE (type) == VECTOR_TYPE
9539 && int_size_in_bytes (type) >= 8
9540 && int_size_in_bytes (type) < 16))
9541 return 64;
9542 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9543 || (type && TREE_CODE (type) == VECTOR_TYPE
9544 && int_size_in_bytes (type) >= 16))
9545 return 128;
9547 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9548 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9549 -mcompat-align-parm is used. */
9550 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9551 || DEFAULT_ABI == ABI_ELFv2)
9552 && type && TYPE_ALIGN (type) > 64)
9554 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9555 or homogeneous float/vector aggregates here. We already handled
9556 vector aggregates above, but still need to check for float here. */
9557 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9558 && !SCALAR_FLOAT_MODE_P (elt_mode));
9560 /* We used to check for BLKmode instead of the above aggregate type
9561 check. Warn when this results in any difference to the ABI. */
9562 if (aggregate_p != (mode == BLKmode))
9564 static bool warned;
9565 if (!warned && warn_psabi)
9567 warned = true;
9568 inform (input_location,
9569 "the ABI of passing aggregates with %d-byte alignment"
9570 " has changed in GCC 5",
9571 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9575 if (aggregate_p)
9576 return 128;
9579 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9580 implement the "aggregate type" check as a BLKmode check here; this
9581 means certain aggregate types are in fact not aligned. */
9582 if (TARGET_MACHO && rs6000_darwin64_abi
9583 && mode == BLKmode
9584 && type && TYPE_ALIGN (type) > 64)
9585 return 128;
9587 return PARM_BOUNDARY;
9590 /* The offset in words to the start of the parameter save area. */
9592 static unsigned int
9593 rs6000_parm_offset (void)
9595 return (DEFAULT_ABI == ABI_V4 ? 2
9596 : DEFAULT_ABI == ABI_ELFv2 ? 4
9597 : 6);
9600 /* For a function parm of MODE and TYPE, return the starting word in
9601 the parameter area. NWORDS of the parameter area are already used. */
9603 static unsigned int
9604 rs6000_parm_start (machine_mode mode, const_tree type,
9605 unsigned int nwords)
9607 unsigned int align;
9609 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9610 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9613 /* Compute the size (in words) of a function argument. */
9615 static unsigned long
9616 rs6000_arg_size (machine_mode mode, const_tree type)
9618 unsigned long size;
9620 if (mode != BLKmode)
9621 size = GET_MODE_SIZE (mode);
9622 else
9623 size = int_size_in_bytes (type);
9625 if (TARGET_32BIT)
9626 return (size + 3) >> 2;
9627 else
9628 return (size + 7) >> 3;
9631 /* Use this to flush pending int fields. */
9633 static void
9634 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9635 HOST_WIDE_INT bitpos, int final)
9637 unsigned int startbit, endbit;
9638 int intregs, intoffset;
9639 machine_mode mode;
9641 /* Handle the situations where a float is taking up the first half
9642 of the GPR, and the other half is empty (typically due to
9643 alignment restrictions). We can detect this by a 8-byte-aligned
9644 int field, or by seeing that this is the final flush for this
9645 argument. Count the word and continue on. */
9646 if (cum->floats_in_gpr == 1
9647 && (cum->intoffset % 64 == 0
9648 || (cum->intoffset == -1 && final)))
9650 cum->words++;
9651 cum->floats_in_gpr = 0;
9654 if (cum->intoffset == -1)
9655 return;
9657 intoffset = cum->intoffset;
9658 cum->intoffset = -1;
9659 cum->floats_in_gpr = 0;
9661 if (intoffset % BITS_PER_WORD != 0)
9663 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9664 MODE_INT, 0);
9665 if (mode == BLKmode)
9667 /* We couldn't find an appropriate mode, which happens,
9668 e.g., in packed structs when there are 3 bytes to load.
9669 Back intoffset back to the beginning of the word in this
9670 case. */
9671 intoffset = intoffset & -BITS_PER_WORD;
9675 startbit = intoffset & -BITS_PER_WORD;
9676 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9677 intregs = (endbit - startbit) / BITS_PER_WORD;
9678 cum->words += intregs;
9679 /* words should be unsigned. */
9680 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9682 int pad = (endbit/BITS_PER_WORD) - cum->words;
9683 cum->words += pad;
9687 /* The darwin64 ABI calls for us to recurse down through structs,
9688 looking for elements passed in registers. Unfortunately, we have
9689 to track int register count here also because of misalignments
9690 in powerpc alignment mode. */
9692 static void
9693 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9694 const_tree type,
9695 HOST_WIDE_INT startbitpos)
9697 tree f;
9699 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9700 if (TREE_CODE (f) == FIELD_DECL)
9702 HOST_WIDE_INT bitpos = startbitpos;
9703 tree ftype = TREE_TYPE (f);
9704 machine_mode mode;
9705 if (ftype == error_mark_node)
9706 continue;
9707 mode = TYPE_MODE (ftype);
9709 if (DECL_SIZE (f) != 0
9710 && tree_fits_uhwi_p (bit_position (f)))
9711 bitpos += int_bit_position (f);
9713 /* ??? FIXME: else assume zero offset. */
9715 if (TREE_CODE (ftype) == RECORD_TYPE)
9716 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9717 else if (USE_FP_FOR_ARG_P (cum, mode))
9719 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9720 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9721 cum->fregno += n_fpregs;
9722 /* Single-precision floats present a special problem for
9723 us, because they are smaller than an 8-byte GPR, and so
9724 the structure-packing rules combined with the standard
9725 varargs behavior mean that we want to pack float/float
9726 and float/int combinations into a single register's
9727 space. This is complicated by the arg advance flushing,
9728 which works on arbitrarily large groups of int-type
9729 fields. */
9730 if (mode == SFmode)
9732 if (cum->floats_in_gpr == 1)
9734 /* Two floats in a word; count the word and reset
9735 the float count. */
9736 cum->words++;
9737 cum->floats_in_gpr = 0;
9739 else if (bitpos % 64 == 0)
9741 /* A float at the beginning of an 8-byte word;
9742 count it and put off adjusting cum->words until
9743 we see if a arg advance flush is going to do it
9744 for us. */
9745 cum->floats_in_gpr++;
9747 else
9749 /* The float is at the end of a word, preceded
9750 by integer fields, so the arg advance flush
9751 just above has already set cum->words and
9752 everything is taken care of. */
9755 else
9756 cum->words += n_fpregs;
9758 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9760 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9761 cum->vregno++;
9762 cum->words += 2;
9764 else if (cum->intoffset == -1)
9765 cum->intoffset = bitpos;
9769 /* Check for an item that needs to be considered specially under the darwin 64
9770 bit ABI. These are record types where the mode is BLK or the structure is
9771 8 bytes in size. */
9772 static int
9773 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9775 return rs6000_darwin64_abi
9776 && ((mode == BLKmode
9777 && TREE_CODE (type) == RECORD_TYPE
9778 && int_size_in_bytes (type) > 0)
9779 || (type && TREE_CODE (type) == RECORD_TYPE
9780 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9783 /* Update the data in CUM to advance over an argument
9784 of mode MODE and data type TYPE.
9785 (TYPE is null for libcalls where that information may not be available.)
9787 Note that for args passed by reference, function_arg will be called
9788 with MODE and TYPE set to that of the pointer to the arg, not the arg
9789 itself. */
9791 static void
9792 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9793 const_tree type, bool named, int depth)
9795 machine_mode elt_mode;
9796 int n_elts;
9798 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9800 /* Only tick off an argument if we're not recursing. */
9801 if (depth == 0)
9802 cum->nargs_prototype--;
9804 #ifdef HAVE_AS_GNU_ATTRIBUTE
9805 if (DEFAULT_ABI == ABI_V4
9806 && cum->escapes)
9808 if (SCALAR_FLOAT_MODE_P (mode))
9809 rs6000_passes_float = true;
9810 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9811 rs6000_passes_vector = true;
9812 else if (SPE_VECTOR_MODE (mode)
9813 && !cum->stdarg
9814 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9815 rs6000_passes_vector = true;
9817 #endif
9819 if (TARGET_ALTIVEC_ABI
9820 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9821 || (type && TREE_CODE (type) == VECTOR_TYPE
9822 && int_size_in_bytes (type) == 16)))
9824 bool stack = false;
9826 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9828 cum->vregno += n_elts;
9830 if (!TARGET_ALTIVEC)
9831 error ("cannot pass argument in vector register because"
9832 " altivec instructions are disabled, use -maltivec"
9833 " to enable them");
9835 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9836 even if it is going to be passed in a vector register.
9837 Darwin does the same for variable-argument functions. */
9838 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9839 && TARGET_64BIT)
9840 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9841 stack = true;
9843 else
9844 stack = true;
9846 if (stack)
9848 int align;
9850 /* Vector parameters must be 16-byte aligned. In 32-bit
9851 mode this means we need to take into account the offset
9852 to the parameter save area. In 64-bit mode, they just
9853 have to start on an even word, since the parameter save
9854 area is 16-byte aligned. */
9855 if (TARGET_32BIT)
9856 align = -(rs6000_parm_offset () + cum->words) & 3;
9857 else
9858 align = cum->words & 1;
9859 cum->words += align + rs6000_arg_size (mode, type);
9861 if (TARGET_DEBUG_ARG)
9863 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9864 cum->words, align);
9865 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9866 cum->nargs_prototype, cum->prototype,
9867 GET_MODE_NAME (mode));
9871 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9872 && !cum->stdarg
9873 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9874 cum->sysv_gregno++;
9876 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9878 int size = int_size_in_bytes (type);
9879 /* Variable sized types have size == -1 and are
9880 treated as if consisting entirely of ints.
9881 Pad to 16 byte boundary if needed. */
9882 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9883 && (cum->words % 2) != 0)
9884 cum->words++;
9885 /* For varargs, we can just go up by the size of the struct. */
9886 if (!named)
9887 cum->words += (size + 7) / 8;
9888 else
9890 /* It is tempting to say int register count just goes up by
9891 sizeof(type)/8, but this is wrong in a case such as
9892 { int; double; int; } [powerpc alignment]. We have to
9893 grovel through the fields for these too. */
9894 cum->intoffset = 0;
9895 cum->floats_in_gpr = 0;
9896 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9897 rs6000_darwin64_record_arg_advance_flush (cum,
9898 size * BITS_PER_UNIT, 1);
9900 if (TARGET_DEBUG_ARG)
9902 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9903 cum->words, TYPE_ALIGN (type), size);
9904 fprintf (stderr,
9905 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9906 cum->nargs_prototype, cum->prototype,
9907 GET_MODE_NAME (mode));
9910 else if (DEFAULT_ABI == ABI_V4)
9912 if (TARGET_HARD_FLOAT && TARGET_FPRS
9913 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9914 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9915 || (mode == TFmode && !TARGET_IEEEQUAD)
9916 || mode == SDmode || mode == DDmode || mode == TDmode))
9918 /* _Decimal128 must use an even/odd register pair. This assumes
9919 that the register number is odd when fregno is odd. */
9920 if (mode == TDmode && (cum->fregno % 2) == 1)
9921 cum->fregno++;
9923 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9924 <= FP_ARG_V4_MAX_REG)
9925 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9926 else
9928 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9929 if (mode == DFmode || mode == TFmode
9930 || mode == DDmode || mode == TDmode)
9931 cum->words += cum->words & 1;
9932 cum->words += rs6000_arg_size (mode, type);
9935 else
9937 int n_words = rs6000_arg_size (mode, type);
9938 int gregno = cum->sysv_gregno;
9940 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9941 (r7,r8) or (r9,r10). As does any other 2 word item such
9942 as complex int due to a historical mistake. */
9943 if (n_words == 2)
9944 gregno += (1 - gregno) & 1;
9946 /* Multi-reg args are not split between registers and stack. */
9947 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9949 /* Long long and SPE vectors are aligned on the stack.
9950 So are other 2 word items such as complex int due to
9951 a historical mistake. */
9952 if (n_words == 2)
9953 cum->words += cum->words & 1;
9954 cum->words += n_words;
9957 /* Note: continuing to accumulate gregno past when we've started
9958 spilling to the stack indicates the fact that we've started
9959 spilling to the stack to expand_builtin_saveregs. */
9960 cum->sysv_gregno = gregno + n_words;
9963 if (TARGET_DEBUG_ARG)
9965 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9966 cum->words, cum->fregno);
9967 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9968 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9969 fprintf (stderr, "mode = %4s, named = %d\n",
9970 GET_MODE_NAME (mode), named);
9973 else
9975 int n_words = rs6000_arg_size (mode, type);
9976 int start_words = cum->words;
9977 int align_words = rs6000_parm_start (mode, type, start_words);
9979 cum->words = align_words + n_words;
9981 if (SCALAR_FLOAT_MODE_P (elt_mode)
9982 && TARGET_HARD_FLOAT && TARGET_FPRS)
9984 /* _Decimal128 must be passed in an even/odd float register pair.
9985 This assumes that the register number is odd when fregno is
9986 odd. */
9987 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9988 cum->fregno++;
9989 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9992 if (TARGET_DEBUG_ARG)
9994 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9995 cum->words, cum->fregno);
9996 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9997 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9998 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9999 named, align_words - start_words, depth);
10004 static void
10005 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
10006 const_tree type, bool named)
10008 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
10012 static rtx
10013 spe_build_register_parallel (machine_mode mode, int gregno)
10015 rtx r1, r3, r5, r7;
10017 switch (mode)
10019 case DFmode:
10020 r1 = gen_rtx_REG (DImode, gregno);
10021 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10022 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
10024 case DCmode:
10025 case TFmode:
10026 r1 = gen_rtx_REG (DImode, gregno);
10027 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10028 r3 = gen_rtx_REG (DImode, gregno + 2);
10029 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10030 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
10032 case TCmode:
10033 r1 = gen_rtx_REG (DImode, gregno);
10034 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10035 r3 = gen_rtx_REG (DImode, gregno + 2);
10036 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10037 r5 = gen_rtx_REG (DImode, gregno + 4);
10038 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
10039 r7 = gen_rtx_REG (DImode, gregno + 6);
10040 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
10041 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
10043 default:
10044 gcc_unreachable ();
10048 /* Determine where to put a SIMD argument on the SPE. */
10049 static rtx
10050 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
10051 const_tree type)
10053 int gregno = cum->sysv_gregno;
10055 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
10056 are passed and returned in a pair of GPRs for ABI compatibility. */
10057 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
10058 || mode == DCmode || mode == TCmode))
10060 int n_words = rs6000_arg_size (mode, type);
10062 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10063 if (mode == DFmode)
10064 gregno += (1 - gregno) & 1;
10066 /* Multi-reg args are not split between registers and stack. */
10067 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10068 return NULL_RTX;
10070 return spe_build_register_parallel (mode, gregno);
10072 if (cum->stdarg)
10074 int n_words = rs6000_arg_size (mode, type);
10076 /* SPE vectors are put in odd registers. */
10077 if (n_words == 2 && (gregno & 1) == 0)
10078 gregno += 1;
10080 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10082 rtx r1, r2;
10083 machine_mode m = SImode;
10085 r1 = gen_rtx_REG (m, gregno);
10086 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10087 r2 = gen_rtx_REG (m, gregno + 1);
10088 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10089 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10091 else
10092 return NULL_RTX;
10094 else
10096 if (gregno <= GP_ARG_MAX_REG)
10097 return gen_rtx_REG (mode, gregno);
10098 else
10099 return NULL_RTX;
10103 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10104 structure between cum->intoffset and bitpos to integer registers. */
10106 static void
10107 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10108 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10110 machine_mode mode;
10111 unsigned int regno;
10112 unsigned int startbit, endbit;
10113 int this_regno, intregs, intoffset;
10114 rtx reg;
10116 if (cum->intoffset == -1)
10117 return;
10119 intoffset = cum->intoffset;
10120 cum->intoffset = -1;
10122 /* If this is the trailing part of a word, try to only load that
10123 much into the register. Otherwise load the whole register. Note
10124 that in the latter case we may pick up unwanted bits. It's not a
10125 problem at the moment but may wish to revisit. */
10127 if (intoffset % BITS_PER_WORD != 0)
10129 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10130 MODE_INT, 0);
10131 if (mode == BLKmode)
10133 /* We couldn't find an appropriate mode, which happens,
10134 e.g., in packed structs when there are 3 bytes to load.
10135 Back intoffset back to the beginning of the word in this
10136 case. */
10137 intoffset = intoffset & -BITS_PER_WORD;
10138 mode = word_mode;
10141 else
10142 mode = word_mode;
10144 startbit = intoffset & -BITS_PER_WORD;
10145 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
10146 intregs = (endbit - startbit) / BITS_PER_WORD;
10147 this_regno = cum->words + intoffset / BITS_PER_WORD;
10149 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10150 cum->use_stack = 1;
10152 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10153 if (intregs <= 0)
10154 return;
10156 intoffset /= BITS_PER_UNIT;
10159 regno = GP_ARG_MIN_REG + this_regno;
10160 reg = gen_rtx_REG (mode, regno);
10161 rvec[(*k)++] =
10162 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10164 this_regno += 1;
10165 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10166 mode = word_mode;
10167 intregs -= 1;
10169 while (intregs > 0);
10172 /* Recursive workhorse for the following. */
10174 static void
10175 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10176 HOST_WIDE_INT startbitpos, rtx rvec[],
10177 int *k)
10179 tree f;
10181 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10182 if (TREE_CODE (f) == FIELD_DECL)
10184 HOST_WIDE_INT bitpos = startbitpos;
10185 tree ftype = TREE_TYPE (f);
10186 machine_mode mode;
10187 if (ftype == error_mark_node)
10188 continue;
10189 mode = TYPE_MODE (ftype);
10191 if (DECL_SIZE (f) != 0
10192 && tree_fits_uhwi_p (bit_position (f)))
10193 bitpos += int_bit_position (f);
10195 /* ??? FIXME: else assume zero offset. */
10197 if (TREE_CODE (ftype) == RECORD_TYPE)
10198 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10199 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10201 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10202 #if 0
10203 switch (mode)
10205 case SCmode: mode = SFmode; break;
10206 case DCmode: mode = DFmode; break;
10207 case TCmode: mode = TFmode; break;
10208 default: break;
10210 #endif
10211 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10212 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10214 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10215 && (mode == TFmode || mode == TDmode));
10216 /* Long double or _Decimal128 split over regs and memory. */
10217 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10218 cum->use_stack=1;
10220 rvec[(*k)++]
10221 = gen_rtx_EXPR_LIST (VOIDmode,
10222 gen_rtx_REG (mode, cum->fregno++),
10223 GEN_INT (bitpos / BITS_PER_UNIT));
10224 if (mode == TFmode || mode == TDmode)
10225 cum->fregno++;
10227 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10229 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10230 rvec[(*k)++]
10231 = gen_rtx_EXPR_LIST (VOIDmode,
10232 gen_rtx_REG (mode, cum->vregno++),
10233 GEN_INT (bitpos / BITS_PER_UNIT));
10235 else if (cum->intoffset == -1)
10236 cum->intoffset = bitpos;
10240 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10241 the register(s) to be used for each field and subfield of a struct
10242 being passed by value, along with the offset of where the
10243 register's value may be found in the block. FP fields go in FP
10244 register, vector fields go in vector registers, and everything
10245 else goes in int registers, packed as in memory.
10247 This code is also used for function return values. RETVAL indicates
10248 whether this is the case.
10250 Much of this is taken from the SPARC V9 port, which has a similar
10251 calling convention. */
10253 static rtx
10254 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10255 bool named, bool retval)
10257 rtx rvec[FIRST_PSEUDO_REGISTER];
10258 int k = 1, kbase = 1;
10259 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10260 /* This is a copy; modifications are not visible to our caller. */
10261 CUMULATIVE_ARGS copy_cum = *orig_cum;
10262 CUMULATIVE_ARGS *cum = &copy_cum;
10264 /* Pad to 16 byte boundary if needed. */
10265 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10266 && (cum->words % 2) != 0)
10267 cum->words++;
10269 cum->intoffset = 0;
10270 cum->use_stack = 0;
10271 cum->named = named;
10273 /* Put entries into rvec[] for individual FP and vector fields, and
10274 for the chunks of memory that go in int regs. Note we start at
10275 element 1; 0 is reserved for an indication of using memory, and
10276 may or may not be filled in below. */
10277 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10278 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10280 /* If any part of the struct went on the stack put all of it there.
10281 This hack is because the generic code for
10282 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10283 parts of the struct are not at the beginning. */
10284 if (cum->use_stack)
10286 if (retval)
10287 return NULL_RTX; /* doesn't go in registers at all */
10288 kbase = 0;
10289 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10291 if (k > 1 || cum->use_stack)
10292 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10293 else
10294 return NULL_RTX;
10297 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10299 static rtx
10300 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10301 int align_words)
10303 int n_units;
10304 int i, k;
10305 rtx rvec[GP_ARG_NUM_REG + 1];
10307 if (align_words >= GP_ARG_NUM_REG)
10308 return NULL_RTX;
10310 n_units = rs6000_arg_size (mode, type);
10312 /* Optimize the simple case where the arg fits in one gpr, except in
10313 the case of BLKmode due to assign_parms assuming that registers are
10314 BITS_PER_WORD wide. */
10315 if (n_units == 0
10316 || (n_units == 1 && mode != BLKmode))
10317 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10319 k = 0;
10320 if (align_words + n_units > GP_ARG_NUM_REG)
10321 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10322 using a magic NULL_RTX component.
10323 This is not strictly correct. Only some of the arg belongs in
10324 memory, not all of it. However, the normal scheme using
10325 function_arg_partial_nregs can result in unusual subregs, eg.
10326 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10327 store the whole arg to memory is often more efficient than code
10328 to store pieces, and we know that space is available in the right
10329 place for the whole arg. */
10330 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10332 i = 0;
10335 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10336 rtx off = GEN_INT (i++ * 4);
10337 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10339 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10341 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10344 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10345 but must also be copied into the parameter save area starting at
10346 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10347 to the GPRs and/or memory. Return the number of elements used. */
10349 static int
10350 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10351 int align_words, rtx *rvec)
10353 int k = 0;
10355 if (align_words < GP_ARG_NUM_REG)
10357 int n_words = rs6000_arg_size (mode, type);
10359 if (align_words + n_words > GP_ARG_NUM_REG
10360 || mode == BLKmode
10361 || (TARGET_32BIT && TARGET_POWERPC64))
10363 /* If this is partially on the stack, then we only
10364 include the portion actually in registers here. */
10365 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10366 int i = 0;
10368 if (align_words + n_words > GP_ARG_NUM_REG)
10370 /* Not all of the arg fits in gprs. Say that it goes in memory
10371 too, using a magic NULL_RTX component. Also see comment in
10372 rs6000_mixed_function_arg for why the normal
10373 function_arg_partial_nregs scheme doesn't work in this case. */
10374 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10379 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10380 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10381 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10383 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10385 else
10387 /* The whole arg fits in gprs. */
10388 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10389 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10392 else
10394 /* It's entirely in memory. */
10395 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10398 return k;
10401 /* RVEC is a vector of K components of an argument of mode MODE.
10402 Construct the final function_arg return value from it. */
10404 static rtx
10405 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10407 gcc_assert (k >= 1);
10409 /* Avoid returning a PARALLEL in the trivial cases. */
10410 if (k == 1)
10412 if (XEXP (rvec[0], 0) == NULL_RTX)
10413 return NULL_RTX;
10415 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10416 return XEXP (rvec[0], 0);
10419 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10422 /* Determine where to put an argument to a function.
10423 Value is zero to push the argument on the stack,
10424 or a hard register in which to store the argument.
10426 MODE is the argument's machine mode.
10427 TYPE is the data type of the argument (as a tree).
10428 This is null for libcalls where that information may
10429 not be available.
10430 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10431 the preceding args and about the function being called. It is
10432 not modified in this routine.
10433 NAMED is nonzero if this argument is a named parameter
10434 (otherwise it is an extra parameter matching an ellipsis).
10436 On RS/6000 the first eight words of non-FP are normally in registers
10437 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10438 Under V.4, the first 8 FP args are in registers.
10440 If this is floating-point and no prototype is specified, we use
10441 both an FP and integer register (or possibly FP reg and stack). Library
10442 functions (when CALL_LIBCALL is set) always have the proper types for args,
10443 so we can pass the FP value just in one register. emit_library_function
10444 doesn't support PARALLEL anyway.
10446 Note that for args passed by reference, function_arg will be called
10447 with MODE and TYPE set to that of the pointer to the arg, not the arg
10448 itself. */
10450 static rtx
10451 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10452 const_tree type, bool named)
10454 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10455 enum rs6000_abi abi = DEFAULT_ABI;
10456 machine_mode elt_mode;
10457 int n_elts;
10459 /* Return a marker to indicate whether CR1 needs to set or clear the
10460 bit that V.4 uses to say fp args were passed in registers.
10461 Assume that we don't need the marker for software floating point,
10462 or compiler generated library calls. */
10463 if (mode == VOIDmode)
10465 if (abi == ABI_V4
10466 && (cum->call_cookie & CALL_LIBCALL) == 0
10467 && (cum->stdarg
10468 || (cum->nargs_prototype < 0
10469 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10471 /* For the SPE, we need to crxor CR6 always. */
10472 if (TARGET_SPE_ABI)
10473 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10474 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10475 return GEN_INT (cum->call_cookie
10476 | ((cum->fregno == FP_ARG_MIN_REG)
10477 ? CALL_V4_SET_FP_ARGS
10478 : CALL_V4_CLEAR_FP_ARGS));
10481 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10484 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10486 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10488 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10489 if (rslt != NULL_RTX)
10490 return rslt;
10491 /* Else fall through to usual handling. */
10494 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10496 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10497 rtx r, off;
10498 int i, k = 0;
10500 /* Do we also need to pass this argument in the parameter
10501 save area? */
10502 if (TARGET_64BIT && ! cum->prototype)
10504 int align_words = (cum->words + 1) & ~1;
10505 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10508 /* Describe where this argument goes in the vector registers. */
10509 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10511 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10512 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10513 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10516 return rs6000_finish_function_arg (mode, rvec, k);
10518 else if (TARGET_ALTIVEC_ABI
10519 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10520 || (type && TREE_CODE (type) == VECTOR_TYPE
10521 && int_size_in_bytes (type) == 16)))
10523 if (named || abi == ABI_V4)
10524 return NULL_RTX;
10525 else
10527 /* Vector parameters to varargs functions under AIX or Darwin
10528 get passed in memory and possibly also in GPRs. */
10529 int align, align_words, n_words;
10530 machine_mode part_mode;
10532 /* Vector parameters must be 16-byte aligned. In 32-bit
10533 mode this means we need to take into account the offset
10534 to the parameter save area. In 64-bit mode, they just
10535 have to start on an even word, since the parameter save
10536 area is 16-byte aligned. */
10537 if (TARGET_32BIT)
10538 align = -(rs6000_parm_offset () + cum->words) & 3;
10539 else
10540 align = cum->words & 1;
10541 align_words = cum->words + align;
10543 /* Out of registers? Memory, then. */
10544 if (align_words >= GP_ARG_NUM_REG)
10545 return NULL_RTX;
10547 if (TARGET_32BIT && TARGET_POWERPC64)
10548 return rs6000_mixed_function_arg (mode, type, align_words);
10550 /* The vector value goes in GPRs. Only the part of the
10551 value in GPRs is reported here. */
10552 part_mode = mode;
10553 n_words = rs6000_arg_size (mode, type);
10554 if (align_words + n_words > GP_ARG_NUM_REG)
10555 /* Fortunately, there are only two possibilities, the value
10556 is either wholly in GPRs or half in GPRs and half not. */
10557 part_mode = DImode;
10559 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10562 else if (TARGET_SPE_ABI && TARGET_SPE
10563 && (SPE_VECTOR_MODE (mode)
10564 || (TARGET_E500_DOUBLE && (mode == DFmode
10565 || mode == DCmode
10566 || mode == TFmode
10567 || mode == TCmode))))
10568 return rs6000_spe_function_arg (cum, mode, type);
10570 else if (abi == ABI_V4)
10572 if (TARGET_HARD_FLOAT && TARGET_FPRS
10573 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10574 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10575 || (mode == TFmode && !TARGET_IEEEQUAD)
10576 || mode == SDmode || mode == DDmode || mode == TDmode))
10578 /* _Decimal128 must use an even/odd register pair. This assumes
10579 that the register number is odd when fregno is odd. */
10580 if (mode == TDmode && (cum->fregno % 2) == 1)
10581 cum->fregno++;
10583 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10584 <= FP_ARG_V4_MAX_REG)
10585 return gen_rtx_REG (mode, cum->fregno);
10586 else
10587 return NULL_RTX;
10589 else
10591 int n_words = rs6000_arg_size (mode, type);
10592 int gregno = cum->sysv_gregno;
10594 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10595 (r7,r8) or (r9,r10). As does any other 2 word item such
10596 as complex int due to a historical mistake. */
10597 if (n_words == 2)
10598 gregno += (1 - gregno) & 1;
10600 /* Multi-reg args are not split between registers and stack. */
10601 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10602 return NULL_RTX;
10604 if (TARGET_32BIT && TARGET_POWERPC64)
10605 return rs6000_mixed_function_arg (mode, type,
10606 gregno - GP_ARG_MIN_REG);
10607 return gen_rtx_REG (mode, gregno);
10610 else
10612 int align_words = rs6000_parm_start (mode, type, cum->words);
10614 /* _Decimal128 must be passed in an even/odd float register pair.
10615 This assumes that the register number is odd when fregno is odd. */
10616 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10617 cum->fregno++;
10619 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10621 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10622 rtx r, off;
10623 int i, k = 0;
10624 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10625 int fpr_words;
10627 /* Do we also need to pass this argument in the parameter
10628 save area? */
10629 if (type && (cum->nargs_prototype <= 0
10630 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10631 && TARGET_XL_COMPAT
10632 && align_words >= GP_ARG_NUM_REG)))
10633 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10635 /* Describe where this argument goes in the fprs. */
10636 for (i = 0; i < n_elts
10637 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10639 /* Check if the argument is split over registers and memory.
10640 This can only ever happen for long double or _Decimal128;
10641 complex types are handled via split_complex_arg. */
10642 machine_mode fmode = elt_mode;
10643 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10645 gcc_assert (fmode == TFmode || fmode == TDmode);
10646 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10649 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10650 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10651 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10654 /* If there were not enough FPRs to hold the argument, the rest
10655 usually goes into memory. However, if the current position
10656 is still within the register parameter area, a portion may
10657 actually have to go into GPRs.
10659 Note that it may happen that the portion of the argument
10660 passed in the first "half" of the first GPR was already
10661 passed in the last FPR as well.
10663 For unnamed arguments, we already set up GPRs to cover the
10664 whole argument in rs6000_psave_function_arg, so there is
10665 nothing further to do at this point. */
10666 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10667 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10668 && cum->nargs_prototype > 0)
10670 static bool warned;
10672 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10673 int n_words = rs6000_arg_size (mode, type);
10675 align_words += fpr_words;
10676 n_words -= fpr_words;
10680 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10681 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10682 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10684 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10686 if (!warned && warn_psabi)
10688 warned = true;
10689 inform (input_location,
10690 "the ABI of passing homogeneous float aggregates"
10691 " has changed in GCC 5");
10695 return rs6000_finish_function_arg (mode, rvec, k);
10697 else if (align_words < GP_ARG_NUM_REG)
10699 if (TARGET_32BIT && TARGET_POWERPC64)
10700 return rs6000_mixed_function_arg (mode, type, align_words);
10702 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10704 else
10705 return NULL_RTX;
10709 /* For an arg passed partly in registers and partly in memory, this is
10710 the number of bytes passed in registers. For args passed entirely in
10711 registers or entirely in memory, zero. When an arg is described by a
10712 PARALLEL, perhaps using more than one register type, this function
10713 returns the number of bytes used by the first element of the PARALLEL. */
10715 static int
10716 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10717 tree type, bool named)
10719 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10720 bool passed_in_gprs = true;
10721 int ret = 0;
10722 int align_words;
10723 machine_mode elt_mode;
10724 int n_elts;
10726 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10728 if (DEFAULT_ABI == ABI_V4)
10729 return 0;
10731 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10733 /* If we are passing this arg in the fixed parameter save area
10734 (gprs or memory) as well as VRs, we do not use the partial
10735 bytes mechanism; instead, rs6000_function_arg will return a
10736 PARALLEL including a memory element as necessary. */
10737 if (TARGET_64BIT && ! cum->prototype)
10738 return 0;
10740 /* Otherwise, we pass in VRs only. Check for partial copies. */
10741 passed_in_gprs = false;
10742 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10743 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10746 /* In this complicated case we just disable the partial_nregs code. */
10747 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10748 return 0;
10750 align_words = rs6000_parm_start (mode, type, cum->words);
10752 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10754 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10756 /* If we are passing this arg in the fixed parameter save area
10757 (gprs or memory) as well as FPRs, we do not use the partial
10758 bytes mechanism; instead, rs6000_function_arg will return a
10759 PARALLEL including a memory element as necessary. */
10760 if (type
10761 && (cum->nargs_prototype <= 0
10762 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10763 && TARGET_XL_COMPAT
10764 && align_words >= GP_ARG_NUM_REG)))
10765 return 0;
10767 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10768 passed_in_gprs = false;
10769 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10771 /* Compute number of bytes / words passed in FPRs. If there
10772 is still space available in the register parameter area
10773 *after* that amount, a part of the argument will be passed
10774 in GPRs. In that case, the total amount passed in any
10775 registers is equal to the amount that would have been passed
10776 in GPRs if everything were passed there, so we fall back to
10777 the GPR code below to compute the appropriate value. */
10778 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10779 * MIN (8, GET_MODE_SIZE (elt_mode)));
10780 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10782 if (align_words + fpr_words < GP_ARG_NUM_REG)
10783 passed_in_gprs = true;
10784 else
10785 ret = fpr;
10789 if (passed_in_gprs
10790 && align_words < GP_ARG_NUM_REG
10791 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10792 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10794 if (ret != 0 && TARGET_DEBUG_ARG)
10795 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10797 return ret;
10800 /* A C expression that indicates when an argument must be passed by
10801 reference. If nonzero for an argument, a copy of that argument is
10802 made in memory and a pointer to the argument is passed instead of
10803 the argument itself. The pointer is passed in whatever way is
10804 appropriate for passing a pointer to that type.
10806 Under V.4, aggregates and long double are passed by reference.
10808 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10809 reference unless the AltiVec vector extension ABI is in force.
10811 As an extension to all ABIs, variable sized types are passed by
10812 reference. */
10814 static bool
10815 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10816 machine_mode mode, const_tree type,
10817 bool named ATTRIBUTE_UNUSED)
10819 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10821 if (TARGET_DEBUG_ARG)
10822 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10823 return 1;
10826 if (!type)
10827 return 0;
10829 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10831 if (TARGET_DEBUG_ARG)
10832 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10833 return 1;
10836 if (int_size_in_bytes (type) < 0)
10838 if (TARGET_DEBUG_ARG)
10839 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10840 return 1;
10843 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10844 modes only exist for GCC vector types if -maltivec. */
10845 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10847 if (TARGET_DEBUG_ARG)
10848 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10849 return 1;
10852 /* Pass synthetic vectors in memory. */
10853 if (TREE_CODE (type) == VECTOR_TYPE
10854 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10856 static bool warned_for_pass_big_vectors = false;
10857 if (TARGET_DEBUG_ARG)
10858 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10859 if (!warned_for_pass_big_vectors)
10861 warning (0, "GCC vector passed by reference: "
10862 "non-standard ABI extension with no compatibility guarantee");
10863 warned_for_pass_big_vectors = true;
10865 return 1;
10868 return 0;
10871 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10872 already processes. Return true if the parameter must be passed
10873 (fully or partially) on the stack. */
10875 static bool
10876 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10878 machine_mode mode;
10879 int unsignedp;
10880 rtx entry_parm;
10882 /* Catch errors. */
10883 if (type == NULL || type == error_mark_node)
10884 return true;
10886 /* Handle types with no storage requirement. */
10887 if (TYPE_MODE (type) == VOIDmode)
10888 return false;
10890 /* Handle complex types. */
10891 if (TREE_CODE (type) == COMPLEX_TYPE)
10892 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10893 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10895 /* Handle transparent aggregates. */
10896 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10897 && TYPE_TRANSPARENT_AGGR (type))
10898 type = TREE_TYPE (first_field (type));
10900 /* See if this arg was passed by invisible reference. */
10901 if (pass_by_reference (get_cumulative_args (args_so_far),
10902 TYPE_MODE (type), type, true))
10903 type = build_pointer_type (type);
10905 /* Find mode as it is passed by the ABI. */
10906 unsignedp = TYPE_UNSIGNED (type);
10907 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10909 /* If we must pass in stack, we need a stack. */
10910 if (rs6000_must_pass_in_stack (mode, type))
10911 return true;
10913 /* If there is no incoming register, we need a stack. */
10914 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10915 if (entry_parm == NULL)
10916 return true;
10918 /* Likewise if we need to pass both in registers and on the stack. */
10919 if (GET_CODE (entry_parm) == PARALLEL
10920 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10921 return true;
10923 /* Also true if we're partially in registers and partially not. */
10924 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10925 return true;
10927 /* Update info on where next arg arrives in registers. */
10928 rs6000_function_arg_advance (args_so_far, mode, type, true);
10929 return false;
10932 /* Return true if FUN has no prototype, has a variable argument
10933 list, or passes any parameter in memory. */
10935 static bool
10936 rs6000_function_parms_need_stack (tree fun, bool incoming)
10938 tree fntype, result;
10939 CUMULATIVE_ARGS args_so_far_v;
10940 cumulative_args_t args_so_far;
10942 if (!fun)
10943 /* Must be a libcall, all of which only use reg parms. */
10944 return false;
10946 fntype = fun;
10947 if (!TYPE_P (fun))
10948 fntype = TREE_TYPE (fun);
10950 /* Varargs functions need the parameter save area. */
10951 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10952 return true;
10954 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10955 args_so_far = pack_cumulative_args (&args_so_far_v);
10957 /* When incoming, we will have been passed the function decl.
10958 It is necessary to use the decl to handle K&R style functions,
10959 where TYPE_ARG_TYPES may not be available. */
10960 if (incoming)
10962 gcc_assert (DECL_P (fun));
10963 result = DECL_RESULT (fun);
10965 else
10966 result = TREE_TYPE (fntype);
10968 if (result && aggregate_value_p (result, fntype))
10970 if (!TYPE_P (result))
10971 result = TREE_TYPE (result);
10972 result = build_pointer_type (result);
10973 rs6000_parm_needs_stack (args_so_far, result);
10976 if (incoming)
10978 tree parm;
10980 for (parm = DECL_ARGUMENTS (fun);
10981 parm && parm != void_list_node;
10982 parm = TREE_CHAIN (parm))
10983 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10984 return true;
10986 else
10988 function_args_iterator args_iter;
10989 tree arg_type;
10991 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10992 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10993 return true;
10996 return false;
10999 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11000 usually a constant depending on the ABI. However, in the ELFv2 ABI
11001 the register parameter area is optional when calling a function that
11002 has a prototype is scope, has no variable argument list, and passes
11003 all parameters in registers. */
11006 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11008 int reg_parm_stack_space;
11010 switch (DEFAULT_ABI)
11012 default:
11013 reg_parm_stack_space = 0;
11014 break;
11016 case ABI_AIX:
11017 case ABI_DARWIN:
11018 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11019 break;
11021 case ABI_ELFv2:
11022 /* ??? Recomputing this every time is a bit expensive. Is there
11023 a place to cache this information? */
11024 if (rs6000_function_parms_need_stack (fun, incoming))
11025 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11026 else
11027 reg_parm_stack_space = 0;
11028 break;
11031 return reg_parm_stack_space;
11034 static void
11035 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11037 int i;
11038 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11040 if (nregs == 0)
11041 return;
11043 for (i = 0; i < nregs; i++)
11045 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11046 if (reload_completed)
11048 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11049 tem = NULL_RTX;
11050 else
11051 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11052 i * GET_MODE_SIZE (reg_mode));
11054 else
11055 tem = replace_equiv_address (tem, XEXP (tem, 0));
11057 gcc_assert (tem);
11059 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11063 /* Perform any needed actions needed for a function that is receiving a
11064 variable number of arguments.
11066 CUM is as above.
11068 MODE and TYPE are the mode and type of the current parameter.
11070 PRETEND_SIZE is a variable that should be set to the amount of stack
11071 that must be pushed by the prolog to pretend that our caller pushed
11074 Normally, this macro will push all remaining incoming registers on the
11075 stack and set PRETEND_SIZE to the length of the registers pushed. */
11077 static void
11078 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11079 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11080 int no_rtl)
11082 CUMULATIVE_ARGS next_cum;
11083 int reg_size = TARGET_32BIT ? 4 : 8;
11084 rtx save_area = NULL_RTX, mem;
11085 int first_reg_offset;
11086 alias_set_type set;
11088 /* Skip the last named argument. */
11089 next_cum = *get_cumulative_args (cum);
11090 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11092 if (DEFAULT_ABI == ABI_V4)
11094 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11096 if (! no_rtl)
11098 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11099 HOST_WIDE_INT offset = 0;
11101 /* Try to optimize the size of the varargs save area.
11102 The ABI requires that ap.reg_save_area is doubleword
11103 aligned, but we don't need to allocate space for all
11104 the bytes, only those to which we actually will save
11105 anything. */
11106 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11107 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11108 if (TARGET_HARD_FLOAT && TARGET_FPRS
11109 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11110 && cfun->va_list_fpr_size)
11112 if (gpr_reg_num)
11113 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11114 * UNITS_PER_FP_WORD;
11115 if (cfun->va_list_fpr_size
11116 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11117 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11118 else
11119 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11120 * UNITS_PER_FP_WORD;
11122 if (gpr_reg_num)
11124 offset = -((first_reg_offset * reg_size) & ~7);
11125 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11127 gpr_reg_num = cfun->va_list_gpr_size;
11128 if (reg_size == 4 && (first_reg_offset & 1))
11129 gpr_reg_num++;
11131 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11133 else if (fpr_size)
11134 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11135 * UNITS_PER_FP_WORD
11136 - (int) (GP_ARG_NUM_REG * reg_size);
11138 if (gpr_size + fpr_size)
11140 rtx reg_save_area
11141 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11142 gcc_assert (GET_CODE (reg_save_area) == MEM);
11143 reg_save_area = XEXP (reg_save_area, 0);
11144 if (GET_CODE (reg_save_area) == PLUS)
11146 gcc_assert (XEXP (reg_save_area, 0)
11147 == virtual_stack_vars_rtx);
11148 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11149 offset += INTVAL (XEXP (reg_save_area, 1));
11151 else
11152 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11155 cfun->machine->varargs_save_offset = offset;
11156 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11159 else
11161 first_reg_offset = next_cum.words;
11162 save_area = crtl->args.internal_arg_pointer;
11164 if (targetm.calls.must_pass_in_stack (mode, type))
11165 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11168 set = get_varargs_alias_set ();
11169 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11170 && cfun->va_list_gpr_size)
11172 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11174 if (va_list_gpr_counter_field)
11175 /* V4 va_list_gpr_size counts number of registers needed. */
11176 n_gpr = cfun->va_list_gpr_size;
11177 else
11178 /* char * va_list instead counts number of bytes needed. */
11179 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11181 if (nregs > n_gpr)
11182 nregs = n_gpr;
11184 mem = gen_rtx_MEM (BLKmode,
11185 plus_constant (Pmode, save_area,
11186 first_reg_offset * reg_size));
11187 MEM_NOTRAP_P (mem) = 1;
11188 set_mem_alias_set (mem, set);
11189 set_mem_align (mem, BITS_PER_WORD);
11191 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11192 nregs);
11195 /* Save FP registers if needed. */
11196 if (DEFAULT_ABI == ABI_V4
11197 && TARGET_HARD_FLOAT && TARGET_FPRS
11198 && ! no_rtl
11199 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11200 && cfun->va_list_fpr_size)
11202 int fregno = next_cum.fregno, nregs;
11203 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11204 rtx lab = gen_label_rtx ();
11205 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11206 * UNITS_PER_FP_WORD);
11208 emit_jump_insn
11209 (gen_rtx_SET (pc_rtx,
11210 gen_rtx_IF_THEN_ELSE (VOIDmode,
11211 gen_rtx_NE (VOIDmode, cr1,
11212 const0_rtx),
11213 gen_rtx_LABEL_REF (VOIDmode, lab),
11214 pc_rtx)));
11216 for (nregs = 0;
11217 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11218 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11220 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11221 ? DFmode : SFmode,
11222 plus_constant (Pmode, save_area, off));
11223 MEM_NOTRAP_P (mem) = 1;
11224 set_mem_alias_set (mem, set);
11225 set_mem_align (mem, GET_MODE_ALIGNMENT (
11226 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11227 ? DFmode : SFmode));
11228 emit_move_insn (mem, gen_rtx_REG (
11229 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11230 ? DFmode : SFmode, fregno));
11233 emit_label (lab);
11237 /* Create the va_list data type. */
11239 static tree
11240 rs6000_build_builtin_va_list (void)
11242 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11244 /* For AIX, prefer 'char *' because that's what the system
11245 header files like. */
11246 if (DEFAULT_ABI != ABI_V4)
11247 return build_pointer_type (char_type_node);
11249 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11250 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11251 get_identifier ("__va_list_tag"), record);
11253 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11254 unsigned_char_type_node);
11255 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11256 unsigned_char_type_node);
11257 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11258 every user file. */
11259 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11260 get_identifier ("reserved"), short_unsigned_type_node);
11261 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11262 get_identifier ("overflow_arg_area"),
11263 ptr_type_node);
11264 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11265 get_identifier ("reg_save_area"),
11266 ptr_type_node);
11268 va_list_gpr_counter_field = f_gpr;
11269 va_list_fpr_counter_field = f_fpr;
11271 DECL_FIELD_CONTEXT (f_gpr) = record;
11272 DECL_FIELD_CONTEXT (f_fpr) = record;
11273 DECL_FIELD_CONTEXT (f_res) = record;
11274 DECL_FIELD_CONTEXT (f_ovf) = record;
11275 DECL_FIELD_CONTEXT (f_sav) = record;
11277 TYPE_STUB_DECL (record) = type_decl;
11278 TYPE_NAME (record) = type_decl;
11279 TYPE_FIELDS (record) = f_gpr;
11280 DECL_CHAIN (f_gpr) = f_fpr;
11281 DECL_CHAIN (f_fpr) = f_res;
11282 DECL_CHAIN (f_res) = f_ovf;
11283 DECL_CHAIN (f_ovf) = f_sav;
11285 layout_type (record);
11287 /* The correct type is an array type of one element. */
11288 return build_array_type (record, build_index_type (size_zero_node));
11291 /* Implement va_start. */
11293 static void
11294 rs6000_va_start (tree valist, rtx nextarg)
11296 HOST_WIDE_INT words, n_gpr, n_fpr;
11297 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11298 tree gpr, fpr, ovf, sav, t;
11300 /* Only SVR4 needs something special. */
11301 if (DEFAULT_ABI != ABI_V4)
11303 std_expand_builtin_va_start (valist, nextarg);
11304 return;
11307 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11308 f_fpr = DECL_CHAIN (f_gpr);
11309 f_res = DECL_CHAIN (f_fpr);
11310 f_ovf = DECL_CHAIN (f_res);
11311 f_sav = DECL_CHAIN (f_ovf);
11313 valist = build_simple_mem_ref (valist);
11314 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11315 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11316 f_fpr, NULL_TREE);
11317 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11318 f_ovf, NULL_TREE);
11319 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11320 f_sav, NULL_TREE);
11322 /* Count number of gp and fp argument registers used. */
11323 words = crtl->args.info.words;
11324 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11325 GP_ARG_NUM_REG);
11326 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11327 FP_ARG_NUM_REG);
11329 if (TARGET_DEBUG_ARG)
11330 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11331 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
11332 words, n_gpr, n_fpr);
11334 if (cfun->va_list_gpr_size)
11336 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11337 build_int_cst (NULL_TREE, n_gpr));
11338 TREE_SIDE_EFFECTS (t) = 1;
11339 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11342 if (cfun->va_list_fpr_size)
11344 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11345 build_int_cst (NULL_TREE, n_fpr));
11346 TREE_SIDE_EFFECTS (t) = 1;
11347 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11349 #ifdef HAVE_AS_GNU_ATTRIBUTE
11350 if (call_ABI_of_interest (cfun->decl))
11351 rs6000_passes_float = true;
11352 #endif
11355 /* Find the overflow area. */
11356 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
11357 if (words != 0)
11358 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11359 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11360 TREE_SIDE_EFFECTS (t) = 1;
11361 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11363 /* If there were no va_arg invocations, don't set up the register
11364 save area. */
11365 if (!cfun->va_list_gpr_size
11366 && !cfun->va_list_fpr_size
11367 && n_gpr < GP_ARG_NUM_REG
11368 && n_fpr < FP_ARG_V4_MAX_REG)
11369 return;
11371 /* Find the register save area. */
11372 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11373 if (cfun->machine->varargs_save_offset)
11374 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11375 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11376 TREE_SIDE_EFFECTS (t) = 1;
11377 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11380 /* Implement va_arg. */
11382 static tree
11383 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11384 gimple_seq *post_p)
11386 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11387 tree gpr, fpr, ovf, sav, reg, t, u;
11388 int size, rsize, n_reg, sav_ofs, sav_scale;
11389 tree lab_false, lab_over, addr;
11390 int align;
11391 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11392 int regalign = 0;
11393 gimple stmt;
11395 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11397 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11398 return build_va_arg_indirect_ref (t);
11401 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11402 earlier version of gcc, with the property that it always applied alignment
11403 adjustments to the va-args (even for zero-sized types). The cheapest way
11404 to deal with this is to replicate the effect of the part of
11405 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11406 of relevance.
11407 We don't need to check for pass-by-reference because of the test above.
11408 We can return a simplifed answer, since we know there's no offset to add. */
11410 if (((TARGET_MACHO
11411 && rs6000_darwin64_abi)
11412 || DEFAULT_ABI == ABI_ELFv2
11413 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11414 && integer_zerop (TYPE_SIZE (type)))
11416 unsigned HOST_WIDE_INT align, boundary;
11417 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11418 align = PARM_BOUNDARY / BITS_PER_UNIT;
11419 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11420 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11421 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11422 boundary /= BITS_PER_UNIT;
11423 if (boundary > align)
11425 tree t ;
11426 /* This updates arg ptr by the amount that would be necessary
11427 to align the zero-sized (but not zero-alignment) item. */
11428 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11429 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11430 gimplify_and_add (t, pre_p);
11432 t = fold_convert (sizetype, valist_tmp);
11433 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11434 fold_convert (TREE_TYPE (valist),
11435 fold_build2 (BIT_AND_EXPR, sizetype, t,
11436 size_int (-boundary))));
11437 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11438 gimplify_and_add (t, pre_p);
11440 /* Since it is zero-sized there's no increment for the item itself. */
11441 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11442 return build_va_arg_indirect_ref (valist_tmp);
11445 if (DEFAULT_ABI != ABI_V4)
11447 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11449 tree elem_type = TREE_TYPE (type);
11450 machine_mode elem_mode = TYPE_MODE (elem_type);
11451 int elem_size = GET_MODE_SIZE (elem_mode);
11453 if (elem_size < UNITS_PER_WORD)
11455 tree real_part, imag_part;
11456 gimple_seq post = NULL;
11458 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11459 &post);
11460 /* Copy the value into a temporary, lest the formal temporary
11461 be reused out from under us. */
11462 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11463 gimple_seq_add_seq (pre_p, post);
11465 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11466 post_p);
11468 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11472 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11475 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11476 f_fpr = DECL_CHAIN (f_gpr);
11477 f_res = DECL_CHAIN (f_fpr);
11478 f_ovf = DECL_CHAIN (f_res);
11479 f_sav = DECL_CHAIN (f_ovf);
11481 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11482 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11483 f_fpr, NULL_TREE);
11484 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11485 f_ovf, NULL_TREE);
11486 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11487 f_sav, NULL_TREE);
11489 size = int_size_in_bytes (type);
11490 rsize = (size + 3) / 4;
11491 align = 1;
11493 if (TARGET_HARD_FLOAT && TARGET_FPRS
11494 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11495 || (TARGET_DOUBLE_FLOAT
11496 && (TYPE_MODE (type) == DFmode
11497 || TYPE_MODE (type) == TFmode
11498 || TYPE_MODE (type) == SDmode
11499 || TYPE_MODE (type) == DDmode
11500 || TYPE_MODE (type) == TDmode))))
11502 /* FP args go in FP registers, if present. */
11503 reg = fpr;
11504 n_reg = (size + 7) / 8;
11505 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11506 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11507 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11508 align = 8;
11510 else
11512 /* Otherwise into GP registers. */
11513 reg = gpr;
11514 n_reg = rsize;
11515 sav_ofs = 0;
11516 sav_scale = 4;
11517 if (n_reg == 2)
11518 align = 8;
11521 /* Pull the value out of the saved registers.... */
11523 lab_over = NULL;
11524 addr = create_tmp_var (ptr_type_node, "addr");
11526 /* AltiVec vectors never go in registers when -mabi=altivec. */
11527 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11528 align = 16;
11529 else
11531 lab_false = create_artificial_label (input_location);
11532 lab_over = create_artificial_label (input_location);
11534 /* Long long and SPE vectors are aligned in the registers.
11535 As are any other 2 gpr item such as complex int due to a
11536 historical mistake. */
11537 u = reg;
11538 if (n_reg == 2 && reg == gpr)
11540 regalign = 1;
11541 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11542 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11543 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11544 unshare_expr (reg), u);
11546 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11547 reg number is 0 for f1, so we want to make it odd. */
11548 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11550 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11551 build_int_cst (TREE_TYPE (reg), 1));
11552 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11555 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11556 t = build2 (GE_EXPR, boolean_type_node, u, t);
11557 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11558 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11559 gimplify_and_add (t, pre_p);
11561 t = sav;
11562 if (sav_ofs)
11563 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11565 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11566 build_int_cst (TREE_TYPE (reg), n_reg));
11567 u = fold_convert (sizetype, u);
11568 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11569 t = fold_build_pointer_plus (t, u);
11571 /* _Decimal32 varargs are located in the second word of the 64-bit
11572 FP register for 32-bit binaries. */
11573 if (TARGET_32BIT
11574 && TARGET_HARD_FLOAT && TARGET_FPRS
11575 && TYPE_MODE (type) == SDmode)
11576 t = fold_build_pointer_plus_hwi (t, size);
11578 gimplify_assign (addr, t, pre_p);
11580 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11582 stmt = gimple_build_label (lab_false);
11583 gimple_seq_add_stmt (pre_p, stmt);
11585 if ((n_reg == 2 && !regalign) || n_reg > 2)
11587 /* Ensure that we don't find any more args in regs.
11588 Alignment has taken care of for special cases. */
11589 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11593 /* ... otherwise out of the overflow area. */
11595 /* Care for on-stack alignment if needed. */
11596 t = ovf;
11597 if (align != 1)
11599 t = fold_build_pointer_plus_hwi (t, align - 1);
11600 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11601 build_int_cst (TREE_TYPE (t), -align));
11603 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11605 gimplify_assign (unshare_expr (addr), t, pre_p);
11607 t = fold_build_pointer_plus_hwi (t, size);
11608 gimplify_assign (unshare_expr (ovf), t, pre_p);
11610 if (lab_over)
11612 stmt = gimple_build_label (lab_over);
11613 gimple_seq_add_stmt (pre_p, stmt);
11616 if (STRICT_ALIGNMENT
11617 && (TYPE_ALIGN (type)
11618 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11620 /* The value (of type complex double, for example) may not be
11621 aligned in memory in the saved registers, so copy via a
11622 temporary. (This is the same code as used for SPARC.) */
11623 tree tmp = create_tmp_var (type, "va_arg_tmp");
11624 tree dest_addr = build_fold_addr_expr (tmp);
11626 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11627 3, dest_addr, addr, size_int (rsize * 4));
11629 gimplify_and_add (copy, pre_p);
11630 addr = dest_addr;
11633 addr = fold_convert (ptrtype, addr);
11634 return build_va_arg_indirect_ref (addr);
11637 /* Builtins. */
11639 static void
11640 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11642 tree t;
11643 unsigned classify = rs6000_builtin_info[(int)code].attr;
11644 const char *attr_string = "";
11646 gcc_assert (name != NULL);
11647 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11649 if (rs6000_builtin_decls[(int)code])
11650 fatal_error (input_location,
11651 "internal error: builtin function %s already processed", name);
11653 rs6000_builtin_decls[(int)code] = t =
11654 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11656 /* Set any special attributes. */
11657 if ((classify & RS6000_BTC_CONST) != 0)
11659 /* const function, function only depends on the inputs. */
11660 TREE_READONLY (t) = 1;
11661 TREE_NOTHROW (t) = 1;
11662 attr_string = ", pure";
11664 else if ((classify & RS6000_BTC_PURE) != 0)
11666 /* pure function, function can read global memory, but does not set any
11667 external state. */
11668 DECL_PURE_P (t) = 1;
11669 TREE_NOTHROW (t) = 1;
11670 attr_string = ", const";
11672 else if ((classify & RS6000_BTC_FP) != 0)
11674 /* Function is a math function. If rounding mode is on, then treat the
11675 function as not reading global memory, but it can have arbitrary side
11676 effects. If it is off, then assume the function is a const function.
11677 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11678 builtin-attribute.def that is used for the math functions. */
11679 TREE_NOTHROW (t) = 1;
11680 if (flag_rounding_math)
11682 DECL_PURE_P (t) = 1;
11683 DECL_IS_NOVOPS (t) = 1;
11684 attr_string = ", fp, pure";
11686 else
11688 TREE_READONLY (t) = 1;
11689 attr_string = ", fp, const";
11692 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11693 gcc_unreachable ();
11695 if (TARGET_DEBUG_BUILTIN)
11696 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11697 (int)code, name, attr_string);
11700 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11702 #undef RS6000_BUILTIN_1
11703 #undef RS6000_BUILTIN_2
11704 #undef RS6000_BUILTIN_3
11705 #undef RS6000_BUILTIN_A
11706 #undef RS6000_BUILTIN_D
11707 #undef RS6000_BUILTIN_E
11708 #undef RS6000_BUILTIN_H
11709 #undef RS6000_BUILTIN_P
11710 #undef RS6000_BUILTIN_Q
11711 #undef RS6000_BUILTIN_S
11712 #undef RS6000_BUILTIN_X
11714 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11715 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11716 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11717 { MASK, ICODE, NAME, ENUM },
11719 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11720 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11721 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11722 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11723 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11724 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11725 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11726 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11728 static const struct builtin_description bdesc_3arg[] =
11730 #include "rs6000-builtin.def"
11733 /* DST operations: void foo (void *, const int, const char). */
11735 #undef RS6000_BUILTIN_1
11736 #undef RS6000_BUILTIN_2
11737 #undef RS6000_BUILTIN_3
11738 #undef RS6000_BUILTIN_A
11739 #undef RS6000_BUILTIN_D
11740 #undef RS6000_BUILTIN_E
11741 #undef RS6000_BUILTIN_H
11742 #undef RS6000_BUILTIN_P
11743 #undef RS6000_BUILTIN_Q
11744 #undef RS6000_BUILTIN_S
11745 #undef RS6000_BUILTIN_X
11747 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11748 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11749 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11750 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11751 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11752 { MASK, ICODE, NAME, ENUM },
11754 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11755 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11756 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11757 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11758 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11759 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11761 static const struct builtin_description bdesc_dst[] =
11763 #include "rs6000-builtin.def"
11766 /* Simple binary operations: VECc = foo (VECa, VECb). */
11768 #undef RS6000_BUILTIN_1
11769 #undef RS6000_BUILTIN_2
11770 #undef RS6000_BUILTIN_3
11771 #undef RS6000_BUILTIN_A
11772 #undef RS6000_BUILTIN_D
11773 #undef RS6000_BUILTIN_E
11774 #undef RS6000_BUILTIN_H
11775 #undef RS6000_BUILTIN_P
11776 #undef RS6000_BUILTIN_Q
11777 #undef RS6000_BUILTIN_S
11778 #undef RS6000_BUILTIN_X
11780 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11781 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11782 { MASK, ICODE, NAME, ENUM },
11784 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11785 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11786 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11787 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11788 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11789 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11790 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11791 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11792 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11794 static const struct builtin_description bdesc_2arg[] =
11796 #include "rs6000-builtin.def"
11799 #undef RS6000_BUILTIN_1
11800 #undef RS6000_BUILTIN_2
11801 #undef RS6000_BUILTIN_3
11802 #undef RS6000_BUILTIN_A
11803 #undef RS6000_BUILTIN_D
11804 #undef RS6000_BUILTIN_E
11805 #undef RS6000_BUILTIN_H
11806 #undef RS6000_BUILTIN_P
11807 #undef RS6000_BUILTIN_Q
11808 #undef RS6000_BUILTIN_S
11809 #undef RS6000_BUILTIN_X
11811 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11812 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11813 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11814 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11815 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11816 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11817 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11818 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11819 { MASK, ICODE, NAME, ENUM },
11821 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11822 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11823 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11825 /* AltiVec predicates. */
11827 static const struct builtin_description bdesc_altivec_preds[] =
11829 #include "rs6000-builtin.def"
11832 /* SPE predicates. */
11833 #undef RS6000_BUILTIN_1
11834 #undef RS6000_BUILTIN_2
11835 #undef RS6000_BUILTIN_3
11836 #undef RS6000_BUILTIN_A
11837 #undef RS6000_BUILTIN_D
11838 #undef RS6000_BUILTIN_E
11839 #undef RS6000_BUILTIN_H
11840 #undef RS6000_BUILTIN_P
11841 #undef RS6000_BUILTIN_Q
11842 #undef RS6000_BUILTIN_S
11843 #undef RS6000_BUILTIN_X
11845 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11846 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11847 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11848 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11849 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11850 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11851 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11852 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11853 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11854 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11855 { MASK, ICODE, NAME, ENUM },
11857 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11859 static const struct builtin_description bdesc_spe_predicates[] =
11861 #include "rs6000-builtin.def"
11864 /* SPE evsel predicates. */
11865 #undef RS6000_BUILTIN_1
11866 #undef RS6000_BUILTIN_2
11867 #undef RS6000_BUILTIN_3
11868 #undef RS6000_BUILTIN_A
11869 #undef RS6000_BUILTIN_D
11870 #undef RS6000_BUILTIN_E
11871 #undef RS6000_BUILTIN_H
11872 #undef RS6000_BUILTIN_P
11873 #undef RS6000_BUILTIN_Q
11874 #undef RS6000_BUILTIN_S
11875 #undef RS6000_BUILTIN_X
11877 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11878 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11879 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11880 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11881 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11882 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11883 { MASK, ICODE, NAME, ENUM },
11885 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11886 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11887 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11888 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11889 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11891 static const struct builtin_description bdesc_spe_evsel[] =
11893 #include "rs6000-builtin.def"
11896 /* PAIRED predicates. */
11897 #undef RS6000_BUILTIN_1
11898 #undef RS6000_BUILTIN_2
11899 #undef RS6000_BUILTIN_3
11900 #undef RS6000_BUILTIN_A
11901 #undef RS6000_BUILTIN_D
11902 #undef RS6000_BUILTIN_E
11903 #undef RS6000_BUILTIN_H
11904 #undef RS6000_BUILTIN_P
11905 #undef RS6000_BUILTIN_Q
11906 #undef RS6000_BUILTIN_S
11907 #undef RS6000_BUILTIN_X
11909 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11910 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11911 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11912 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11913 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11914 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11915 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11916 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11917 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11918 { MASK, ICODE, NAME, ENUM },
11920 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11921 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11923 static const struct builtin_description bdesc_paired_preds[] =
11925 #include "rs6000-builtin.def"
11928 /* ABS* operations. */
11930 #undef RS6000_BUILTIN_1
11931 #undef RS6000_BUILTIN_2
11932 #undef RS6000_BUILTIN_3
11933 #undef RS6000_BUILTIN_A
11934 #undef RS6000_BUILTIN_D
11935 #undef RS6000_BUILTIN_E
11936 #undef RS6000_BUILTIN_H
11937 #undef RS6000_BUILTIN_P
11938 #undef RS6000_BUILTIN_Q
11939 #undef RS6000_BUILTIN_S
11940 #undef RS6000_BUILTIN_X
11942 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11943 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11944 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11945 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11946 { MASK, ICODE, NAME, ENUM },
11948 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11949 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11950 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11951 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11952 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11953 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11954 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11956 static const struct builtin_description bdesc_abs[] =
11958 #include "rs6000-builtin.def"
11961 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11962 foo (VECa). */
11964 #undef RS6000_BUILTIN_1
11965 #undef RS6000_BUILTIN_2
11966 #undef RS6000_BUILTIN_3
11967 #undef RS6000_BUILTIN_A
11968 #undef RS6000_BUILTIN_D
11969 #undef RS6000_BUILTIN_E
11970 #undef RS6000_BUILTIN_H
11971 #undef RS6000_BUILTIN_P
11972 #undef RS6000_BUILTIN_Q
11973 #undef RS6000_BUILTIN_S
11974 #undef RS6000_BUILTIN_X
11976 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11977 { MASK, ICODE, NAME, ENUM },
11979 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11980 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11981 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11982 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11983 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11984 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11985 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11986 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11987 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11988 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11990 static const struct builtin_description bdesc_1arg[] =
11992 #include "rs6000-builtin.def"
11995 /* HTM builtins. */
11996 #undef RS6000_BUILTIN_1
11997 #undef RS6000_BUILTIN_2
11998 #undef RS6000_BUILTIN_3
11999 #undef RS6000_BUILTIN_A
12000 #undef RS6000_BUILTIN_D
12001 #undef RS6000_BUILTIN_E
12002 #undef RS6000_BUILTIN_H
12003 #undef RS6000_BUILTIN_P
12004 #undef RS6000_BUILTIN_Q
12005 #undef RS6000_BUILTIN_S
12006 #undef RS6000_BUILTIN_X
12008 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12009 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12010 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12011 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12012 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12013 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12014 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12015 { MASK, ICODE, NAME, ENUM },
12017 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12018 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12019 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12020 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12022 static const struct builtin_description bdesc_htm[] =
12024 #include "rs6000-builtin.def"
12027 #undef RS6000_BUILTIN_1
12028 #undef RS6000_BUILTIN_2
12029 #undef RS6000_BUILTIN_3
12030 #undef RS6000_BUILTIN_A
12031 #undef RS6000_BUILTIN_D
12032 #undef RS6000_BUILTIN_E
12033 #undef RS6000_BUILTIN_H
12034 #undef RS6000_BUILTIN_P
12035 #undef RS6000_BUILTIN_Q
12036 #undef RS6000_BUILTIN_S
12038 /* Return true if a builtin function is overloaded. */
12039 bool
12040 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12042 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12045 /* Expand an expression EXP that calls a builtin without arguments. */
12046 static rtx
12047 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12049 rtx pat;
12050 machine_mode tmode = insn_data[icode].operand[0].mode;
12052 if (icode == CODE_FOR_nothing)
12053 /* Builtin not supported on this processor. */
12054 return 0;
12056 if (target == 0
12057 || GET_MODE (target) != tmode
12058 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12059 target = gen_reg_rtx (tmode);
12061 pat = GEN_FCN (icode) (target);
12062 if (! pat)
12063 return 0;
12064 emit_insn (pat);
12066 return target;
12070 static rtx
12071 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12073 rtx pat;
12074 tree arg0 = CALL_EXPR_ARG (exp, 0);
12075 tree arg1 = CALL_EXPR_ARG (exp, 1);
12076 rtx op0 = expand_normal (arg0);
12077 rtx op1 = expand_normal (arg1);
12078 machine_mode mode0 = insn_data[icode].operand[0].mode;
12079 machine_mode mode1 = insn_data[icode].operand[1].mode;
12081 if (icode == CODE_FOR_nothing)
12082 /* Builtin not supported on this processor. */
12083 return 0;
12085 /* If we got invalid arguments bail out before generating bad rtl. */
12086 if (arg0 == error_mark_node || arg1 == error_mark_node)
12087 return const0_rtx;
12089 if (GET_CODE (op0) != CONST_INT
12090 || INTVAL (op0) > 255
12091 || INTVAL (op0) < 0)
12093 error ("argument 1 must be an 8-bit field value");
12094 return const0_rtx;
12097 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12098 op0 = copy_to_mode_reg (mode0, op0);
12100 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12101 op1 = copy_to_mode_reg (mode1, op1);
12103 pat = GEN_FCN (icode) (op0, op1);
12104 if (! pat)
12105 return const0_rtx;
12106 emit_insn (pat);
12108 return NULL_RTX;
12112 static rtx
12113 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12115 rtx pat;
12116 tree arg0 = CALL_EXPR_ARG (exp, 0);
12117 rtx op0 = expand_normal (arg0);
12118 machine_mode tmode = insn_data[icode].operand[0].mode;
12119 machine_mode mode0 = insn_data[icode].operand[1].mode;
12121 if (icode == CODE_FOR_nothing)
12122 /* Builtin not supported on this processor. */
12123 return 0;
12125 /* If we got invalid arguments bail out before generating bad rtl. */
12126 if (arg0 == error_mark_node)
12127 return const0_rtx;
12129 if (icode == CODE_FOR_altivec_vspltisb
12130 || icode == CODE_FOR_altivec_vspltish
12131 || icode == CODE_FOR_altivec_vspltisw
12132 || icode == CODE_FOR_spe_evsplatfi
12133 || icode == CODE_FOR_spe_evsplati)
12135 /* Only allow 5-bit *signed* literals. */
12136 if (GET_CODE (op0) != CONST_INT
12137 || INTVAL (op0) > 15
12138 || INTVAL (op0) < -16)
12140 error ("argument 1 must be a 5-bit signed literal");
12141 return const0_rtx;
12145 if (target == 0
12146 || GET_MODE (target) != tmode
12147 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12148 target = gen_reg_rtx (tmode);
12150 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12151 op0 = copy_to_mode_reg (mode0, op0);
12153 pat = GEN_FCN (icode) (target, op0);
12154 if (! pat)
12155 return 0;
12156 emit_insn (pat);
12158 return target;
12161 static rtx
12162 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12164 rtx pat, scratch1, scratch2;
12165 tree arg0 = CALL_EXPR_ARG (exp, 0);
12166 rtx op0 = expand_normal (arg0);
12167 machine_mode tmode = insn_data[icode].operand[0].mode;
12168 machine_mode mode0 = insn_data[icode].operand[1].mode;
12170 /* If we have invalid arguments, bail out before generating bad rtl. */
12171 if (arg0 == error_mark_node)
12172 return const0_rtx;
12174 if (target == 0
12175 || GET_MODE (target) != tmode
12176 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12177 target = gen_reg_rtx (tmode);
12179 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12180 op0 = copy_to_mode_reg (mode0, op0);
12182 scratch1 = gen_reg_rtx (mode0);
12183 scratch2 = gen_reg_rtx (mode0);
12185 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12186 if (! pat)
12187 return 0;
12188 emit_insn (pat);
12190 return target;
12193 static rtx
12194 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12196 rtx pat;
12197 tree arg0 = CALL_EXPR_ARG (exp, 0);
12198 tree arg1 = CALL_EXPR_ARG (exp, 1);
12199 rtx op0 = expand_normal (arg0);
12200 rtx op1 = expand_normal (arg1);
12201 machine_mode tmode = insn_data[icode].operand[0].mode;
12202 machine_mode mode0 = insn_data[icode].operand[1].mode;
12203 machine_mode mode1 = insn_data[icode].operand[2].mode;
12205 if (icode == CODE_FOR_nothing)
12206 /* Builtin not supported on this processor. */
12207 return 0;
12209 /* If we got invalid arguments bail out before generating bad rtl. */
12210 if (arg0 == error_mark_node || arg1 == error_mark_node)
12211 return const0_rtx;
12213 if (icode == CODE_FOR_altivec_vcfux
12214 || icode == CODE_FOR_altivec_vcfsx
12215 || icode == CODE_FOR_altivec_vctsxs
12216 || icode == CODE_FOR_altivec_vctuxs
12217 || icode == CODE_FOR_altivec_vspltb
12218 || icode == CODE_FOR_altivec_vsplth
12219 || icode == CODE_FOR_altivec_vspltw
12220 || icode == CODE_FOR_spe_evaddiw
12221 || icode == CODE_FOR_spe_evldd
12222 || icode == CODE_FOR_spe_evldh
12223 || icode == CODE_FOR_spe_evldw
12224 || icode == CODE_FOR_spe_evlhhesplat
12225 || icode == CODE_FOR_spe_evlhhossplat
12226 || icode == CODE_FOR_spe_evlhhousplat
12227 || icode == CODE_FOR_spe_evlwhe
12228 || icode == CODE_FOR_spe_evlwhos
12229 || icode == CODE_FOR_spe_evlwhou
12230 || icode == CODE_FOR_spe_evlwhsplat
12231 || icode == CODE_FOR_spe_evlwwsplat
12232 || icode == CODE_FOR_spe_evrlwi
12233 || icode == CODE_FOR_spe_evslwi
12234 || icode == CODE_FOR_spe_evsrwis
12235 || icode == CODE_FOR_spe_evsubifw
12236 || icode == CODE_FOR_spe_evsrwiu)
12238 /* Only allow 5-bit unsigned literals. */
12239 STRIP_NOPS (arg1);
12240 if (TREE_CODE (arg1) != INTEGER_CST
12241 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12243 error ("argument 2 must be a 5-bit unsigned literal");
12244 return const0_rtx;
12248 if (target == 0
12249 || GET_MODE (target) != tmode
12250 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12251 target = gen_reg_rtx (tmode);
12253 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12254 op0 = copy_to_mode_reg (mode0, op0);
12255 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12256 op1 = copy_to_mode_reg (mode1, op1);
12258 pat = GEN_FCN (icode) (target, op0, op1);
12259 if (! pat)
12260 return 0;
12261 emit_insn (pat);
12263 return target;
12266 static rtx
12267 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12269 rtx pat, scratch;
12270 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12271 tree arg0 = CALL_EXPR_ARG (exp, 1);
12272 tree arg1 = CALL_EXPR_ARG (exp, 2);
12273 rtx op0 = expand_normal (arg0);
12274 rtx op1 = expand_normal (arg1);
12275 machine_mode tmode = SImode;
12276 machine_mode mode0 = insn_data[icode].operand[1].mode;
12277 machine_mode mode1 = insn_data[icode].operand[2].mode;
12278 int cr6_form_int;
12280 if (TREE_CODE (cr6_form) != INTEGER_CST)
12282 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12283 return const0_rtx;
12285 else
12286 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12288 gcc_assert (mode0 == mode1);
12290 /* If we have invalid arguments, bail out before generating bad rtl. */
12291 if (arg0 == error_mark_node || arg1 == error_mark_node)
12292 return const0_rtx;
12294 if (target == 0
12295 || GET_MODE (target) != tmode
12296 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12297 target = gen_reg_rtx (tmode);
12299 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12300 op0 = copy_to_mode_reg (mode0, op0);
12301 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12302 op1 = copy_to_mode_reg (mode1, op1);
12304 scratch = gen_reg_rtx (mode0);
12306 pat = GEN_FCN (icode) (scratch, op0, op1);
12307 if (! pat)
12308 return 0;
12309 emit_insn (pat);
12311 /* The vec_any* and vec_all* predicates use the same opcodes for two
12312 different operations, but the bits in CR6 will be different
12313 depending on what information we want. So we have to play tricks
12314 with CR6 to get the right bits out.
12316 If you think this is disgusting, look at the specs for the
12317 AltiVec predicates. */
12319 switch (cr6_form_int)
12321 case 0:
12322 emit_insn (gen_cr6_test_for_zero (target));
12323 break;
12324 case 1:
12325 emit_insn (gen_cr6_test_for_zero_reverse (target));
12326 break;
12327 case 2:
12328 emit_insn (gen_cr6_test_for_lt (target));
12329 break;
12330 case 3:
12331 emit_insn (gen_cr6_test_for_lt_reverse (target));
12332 break;
12333 default:
12334 error ("argument 1 of __builtin_altivec_predicate is out of range");
12335 break;
12338 return target;
12341 static rtx
12342 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12344 rtx pat, addr;
12345 tree arg0 = CALL_EXPR_ARG (exp, 0);
12346 tree arg1 = CALL_EXPR_ARG (exp, 1);
12347 machine_mode tmode = insn_data[icode].operand[0].mode;
12348 machine_mode mode0 = Pmode;
12349 machine_mode mode1 = Pmode;
12350 rtx op0 = expand_normal (arg0);
12351 rtx op1 = expand_normal (arg1);
12353 if (icode == CODE_FOR_nothing)
12354 /* Builtin not supported on this processor. */
12355 return 0;
12357 /* If we got invalid arguments bail out before generating bad rtl. */
12358 if (arg0 == error_mark_node || arg1 == error_mark_node)
12359 return const0_rtx;
12361 if (target == 0
12362 || GET_MODE (target) != tmode
12363 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12364 target = gen_reg_rtx (tmode);
12366 op1 = copy_to_mode_reg (mode1, op1);
12368 if (op0 == const0_rtx)
12370 addr = gen_rtx_MEM (tmode, op1);
12372 else
12374 op0 = copy_to_mode_reg (mode0, op0);
12375 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12378 pat = GEN_FCN (icode) (target, addr);
12380 if (! pat)
12381 return 0;
12382 emit_insn (pat);
12384 return target;
12387 /* Return a constant vector for use as a little-endian permute control vector
12388 to reverse the order of elements of the given vector mode. */
12389 static rtx
12390 swap_selector_for_mode (machine_mode mode)
12392 /* These are little endian vectors, so their elements are reversed
12393 from what you would normally expect for a permute control vector. */
12394 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12395 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12396 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12397 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12398 unsigned int *swaparray, i;
12399 rtx perm[16];
12401 switch (mode)
12403 case V2DFmode:
12404 case V2DImode:
12405 swaparray = swap2;
12406 break;
12407 case V4SFmode:
12408 case V4SImode:
12409 swaparray = swap4;
12410 break;
12411 case V8HImode:
12412 swaparray = swap8;
12413 break;
12414 case V16QImode:
12415 swaparray = swap16;
12416 break;
12417 default:
12418 gcc_unreachable ();
12421 for (i = 0; i < 16; ++i)
12422 perm[i] = GEN_INT (swaparray[i]);
12424 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12427 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12428 with -maltivec=be specified. Issue the load followed by an element-reversing
12429 permute. */
12430 void
12431 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12433 rtx tmp = gen_reg_rtx (mode);
12434 rtx load = gen_rtx_SET (tmp, op1);
12435 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12436 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12437 rtx sel = swap_selector_for_mode (mode);
12438 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12440 gcc_assert (REG_P (op0));
12441 emit_insn (par);
12442 emit_insn (gen_rtx_SET (op0, vperm));
12445 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12446 with -maltivec=be specified. Issue the store preceded by an element-reversing
12447 permute. */
12448 void
12449 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12451 rtx tmp = gen_reg_rtx (mode);
12452 rtx store = gen_rtx_SET (op0, tmp);
12453 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12454 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12455 rtx sel = swap_selector_for_mode (mode);
12456 rtx vperm;
12458 gcc_assert (REG_P (op1));
12459 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12460 emit_insn (gen_rtx_SET (tmp, vperm));
12461 emit_insn (par);
12464 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12465 specified. Issue the store preceded by an element-reversing permute. */
12466 void
12467 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12469 machine_mode inner_mode = GET_MODE_INNER (mode);
12470 rtx tmp = gen_reg_rtx (mode);
12471 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12472 rtx sel = swap_selector_for_mode (mode);
12473 rtx vperm;
12475 gcc_assert (REG_P (op1));
12476 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12477 emit_insn (gen_rtx_SET (tmp, vperm));
12478 emit_insn (gen_rtx_SET (op0, stvx));
12481 static rtx
12482 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12484 rtx pat, addr;
12485 tree arg0 = CALL_EXPR_ARG (exp, 0);
12486 tree arg1 = CALL_EXPR_ARG (exp, 1);
12487 machine_mode tmode = insn_data[icode].operand[0].mode;
12488 machine_mode mode0 = Pmode;
12489 machine_mode mode1 = Pmode;
12490 rtx op0 = expand_normal (arg0);
12491 rtx op1 = expand_normal (arg1);
12493 if (icode == CODE_FOR_nothing)
12494 /* Builtin not supported on this processor. */
12495 return 0;
12497 /* If we got invalid arguments bail out before generating bad rtl. */
12498 if (arg0 == error_mark_node || arg1 == error_mark_node)
12499 return const0_rtx;
12501 if (target == 0
12502 || GET_MODE (target) != tmode
12503 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12504 target = gen_reg_rtx (tmode);
12506 op1 = copy_to_mode_reg (mode1, op1);
12508 if (op0 == const0_rtx)
12510 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12512 else
12514 op0 = copy_to_mode_reg (mode0, op0);
12515 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12518 pat = GEN_FCN (icode) (target, addr);
12520 if (! pat)
12521 return 0;
12522 emit_insn (pat);
12524 return target;
12527 static rtx
12528 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12530 tree arg0 = CALL_EXPR_ARG (exp, 0);
12531 tree arg1 = CALL_EXPR_ARG (exp, 1);
12532 tree arg2 = CALL_EXPR_ARG (exp, 2);
12533 rtx op0 = expand_normal (arg0);
12534 rtx op1 = expand_normal (arg1);
12535 rtx op2 = expand_normal (arg2);
12536 rtx pat;
12537 machine_mode mode0 = insn_data[icode].operand[0].mode;
12538 machine_mode mode1 = insn_data[icode].operand[1].mode;
12539 machine_mode mode2 = insn_data[icode].operand[2].mode;
12541 /* Invalid arguments. Bail before doing anything stoopid! */
12542 if (arg0 == error_mark_node
12543 || arg1 == error_mark_node
12544 || arg2 == error_mark_node)
12545 return const0_rtx;
12547 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12548 op0 = copy_to_mode_reg (mode2, op0);
12549 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12550 op1 = copy_to_mode_reg (mode0, op1);
12551 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12552 op2 = copy_to_mode_reg (mode1, op2);
12554 pat = GEN_FCN (icode) (op1, op2, op0);
12555 if (pat)
12556 emit_insn (pat);
12557 return NULL_RTX;
12560 static rtx
12561 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12563 tree arg0 = CALL_EXPR_ARG (exp, 0);
12564 tree arg1 = CALL_EXPR_ARG (exp, 1);
12565 tree arg2 = CALL_EXPR_ARG (exp, 2);
12566 rtx op0 = expand_normal (arg0);
12567 rtx op1 = expand_normal (arg1);
12568 rtx op2 = expand_normal (arg2);
12569 rtx pat, addr;
12570 machine_mode tmode = insn_data[icode].operand[0].mode;
12571 machine_mode mode1 = Pmode;
12572 machine_mode mode2 = Pmode;
12574 /* Invalid arguments. Bail before doing anything stoopid! */
12575 if (arg0 == error_mark_node
12576 || arg1 == error_mark_node
12577 || arg2 == error_mark_node)
12578 return const0_rtx;
12580 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12581 op0 = copy_to_mode_reg (tmode, op0);
12583 op2 = copy_to_mode_reg (mode2, op2);
12585 if (op1 == const0_rtx)
12587 addr = gen_rtx_MEM (tmode, op2);
12589 else
12591 op1 = copy_to_mode_reg (mode1, op1);
12592 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12595 pat = GEN_FCN (icode) (addr, op0);
12596 if (pat)
12597 emit_insn (pat);
12598 return NULL_RTX;
12601 static rtx
12602 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12604 tree arg0 = CALL_EXPR_ARG (exp, 0);
12605 tree arg1 = CALL_EXPR_ARG (exp, 1);
12606 tree arg2 = CALL_EXPR_ARG (exp, 2);
12607 rtx op0 = expand_normal (arg0);
12608 rtx op1 = expand_normal (arg1);
12609 rtx op2 = expand_normal (arg2);
12610 rtx pat, addr;
12611 machine_mode tmode = insn_data[icode].operand[0].mode;
12612 machine_mode smode = insn_data[icode].operand[1].mode;
12613 machine_mode mode1 = Pmode;
12614 machine_mode mode2 = Pmode;
12616 /* Invalid arguments. Bail before doing anything stoopid! */
12617 if (arg0 == error_mark_node
12618 || arg1 == error_mark_node
12619 || arg2 == error_mark_node)
12620 return const0_rtx;
12622 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12623 op0 = copy_to_mode_reg (smode, op0);
12625 op2 = copy_to_mode_reg (mode2, op2);
12627 if (op1 == const0_rtx)
12629 addr = gen_rtx_MEM (tmode, op2);
12631 else
12633 op1 = copy_to_mode_reg (mode1, op1);
12634 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12637 pat = GEN_FCN (icode) (addr, op0);
12638 if (pat)
12639 emit_insn (pat);
12640 return NULL_RTX;
12643 /* Return the appropriate SPR number associated with the given builtin. */
12644 static inline HOST_WIDE_INT
12645 htm_spr_num (enum rs6000_builtins code)
12647 if (code == HTM_BUILTIN_GET_TFHAR
12648 || code == HTM_BUILTIN_SET_TFHAR)
12649 return TFHAR_SPR;
12650 else if (code == HTM_BUILTIN_GET_TFIAR
12651 || code == HTM_BUILTIN_SET_TFIAR)
12652 return TFIAR_SPR;
12653 else if (code == HTM_BUILTIN_GET_TEXASR
12654 || code == HTM_BUILTIN_SET_TEXASR)
12655 return TEXASR_SPR;
12656 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12657 || code == HTM_BUILTIN_SET_TEXASRU);
12658 return TEXASRU_SPR;
12661 /* Return the appropriate SPR regno associated with the given builtin. */
12662 static inline HOST_WIDE_INT
12663 htm_spr_regno (enum rs6000_builtins code)
12665 if (code == HTM_BUILTIN_GET_TFHAR
12666 || code == HTM_BUILTIN_SET_TFHAR)
12667 return TFHAR_REGNO;
12668 else if (code == HTM_BUILTIN_GET_TFIAR
12669 || code == HTM_BUILTIN_SET_TFIAR)
12670 return TFIAR_REGNO;
12671 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12672 || code == HTM_BUILTIN_SET_TEXASR
12673 || code == HTM_BUILTIN_GET_TEXASRU
12674 || code == HTM_BUILTIN_SET_TEXASRU);
12675 return TEXASR_REGNO;
12678 /* Return the correct ICODE value depending on whether we are
12679 setting or reading the HTM SPRs. */
12680 static inline enum insn_code
12681 rs6000_htm_spr_icode (bool nonvoid)
12683 if (nonvoid)
12684 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12685 else
12686 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12689 /* Expand the HTM builtin in EXP and store the result in TARGET.
12690 Store true in *EXPANDEDP if we found a builtin to expand. */
12691 static rtx
12692 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12694 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12695 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12696 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12697 const struct builtin_description *d;
12698 size_t i;
12700 *expandedp = true;
12702 if (!TARGET_POWERPC64
12703 && (fcode == HTM_BUILTIN_TABORTDC
12704 || fcode == HTM_BUILTIN_TABORTDCI))
12706 size_t uns_fcode = (size_t)fcode;
12707 const char *name = rs6000_builtin_info[uns_fcode].name;
12708 error ("builtin %s is only valid in 64-bit mode", name);
12709 return const0_rtx;
12712 /* Expand the HTM builtins. */
12713 d = bdesc_htm;
12714 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12715 if (d->code == fcode)
12717 rtx op[MAX_HTM_OPERANDS], pat;
12718 int nopnds = 0;
12719 tree arg;
12720 call_expr_arg_iterator iter;
12721 unsigned attr = rs6000_builtin_info[fcode].attr;
12722 enum insn_code icode = d->icode;
12723 const struct insn_operand_data *insn_op;
12724 bool uses_spr = (attr & RS6000_BTC_SPR);
12725 rtx cr = NULL_RTX;
12727 if (uses_spr)
12728 icode = rs6000_htm_spr_icode (nonvoid);
12729 insn_op = &insn_data[icode].operand[0];
12731 if (nonvoid)
12733 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
12734 if (!target
12735 || GET_MODE (target) != tmode
12736 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
12737 target = gen_reg_rtx (tmode);
12738 if (uses_spr)
12739 op[nopnds++] = target;
12742 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12744 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12745 return const0_rtx;
12747 insn_op = &insn_data[icode].operand[nopnds];
12749 op[nopnds] = expand_normal (arg);
12751 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12753 if (!strcmp (insn_op->constraint, "n"))
12755 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12756 if (!CONST_INT_P (op[nopnds]))
12757 error ("argument %d must be an unsigned literal", arg_num);
12758 else
12759 error ("argument %d is an unsigned literal that is "
12760 "out of range", arg_num);
12761 return const0_rtx;
12763 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12766 nopnds++;
12769 /* Handle the builtins for extended mnemonics. These accept
12770 no arguments, but map to builtins that take arguments. */
12771 switch (fcode)
12773 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12774 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12775 op[nopnds++] = GEN_INT (1);
12776 #ifdef ENABLE_CHECKING
12777 attr |= RS6000_BTC_UNARY;
12778 #endif
12779 break;
12780 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12781 op[nopnds++] = GEN_INT (0);
12782 #ifdef ENABLE_CHECKING
12783 attr |= RS6000_BTC_UNARY;
12784 #endif
12785 break;
12786 default:
12787 break;
12790 /* If this builtin accesses SPRs, then pass in the appropriate
12791 SPR number and SPR regno as the last two operands. */
12792 if (uses_spr)
12794 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
12795 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
12796 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
12798 /* If this builtin accesses a CR, then pass in a scratch
12799 CR as the last operand. */
12800 else if (attr & RS6000_BTC_CR)
12801 { cr = gen_reg_rtx (CCmode);
12802 op[nopnds++] = cr;
12805 #ifdef ENABLE_CHECKING
12806 int expected_nopnds = 0;
12807 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12808 expected_nopnds = 1;
12809 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12810 expected_nopnds = 2;
12811 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12812 expected_nopnds = 3;
12813 if (!(attr & RS6000_BTC_VOID))
12814 expected_nopnds += 1;
12815 if (uses_spr)
12816 expected_nopnds += 2;
12818 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12819 #endif
12821 switch (nopnds)
12823 case 1:
12824 pat = GEN_FCN (icode) (op[0]);
12825 break;
12826 case 2:
12827 pat = GEN_FCN (icode) (op[0], op[1]);
12828 break;
12829 case 3:
12830 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12831 break;
12832 case 4:
12833 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12834 break;
12835 default:
12836 gcc_unreachable ();
12838 if (!pat)
12839 return NULL_RTX;
12840 emit_insn (pat);
12842 if (attr & RS6000_BTC_CR)
12844 if (fcode == HTM_BUILTIN_TBEGIN)
12846 /* Emit code to set TARGET to true or false depending on
12847 whether the tbegin. instruction successfully or failed
12848 to start a transaction. We do this by placing the 1's
12849 complement of CR's EQ bit into TARGET. */
12850 rtx scratch = gen_reg_rtx (SImode);
12851 emit_insn (gen_rtx_SET (scratch,
12852 gen_rtx_EQ (SImode, cr,
12853 const0_rtx)));
12854 emit_insn (gen_rtx_SET (target,
12855 gen_rtx_XOR (SImode, scratch,
12856 GEN_INT (1))));
12858 else
12860 /* Emit code to copy the 4-bit condition register field
12861 CR into the least significant end of register TARGET. */
12862 rtx scratch1 = gen_reg_rtx (SImode);
12863 rtx scratch2 = gen_reg_rtx (SImode);
12864 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
12865 emit_insn (gen_movcc (subreg, cr));
12866 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
12867 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
12871 if (nonvoid)
12872 return target;
12873 return const0_rtx;
12876 *expandedp = false;
12877 return NULL_RTX;
12880 static rtx
12881 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12883 rtx pat;
12884 tree arg0 = CALL_EXPR_ARG (exp, 0);
12885 tree arg1 = CALL_EXPR_ARG (exp, 1);
12886 tree arg2 = CALL_EXPR_ARG (exp, 2);
12887 rtx op0 = expand_normal (arg0);
12888 rtx op1 = expand_normal (arg1);
12889 rtx op2 = expand_normal (arg2);
12890 machine_mode tmode = insn_data[icode].operand[0].mode;
12891 machine_mode mode0 = insn_data[icode].operand[1].mode;
12892 machine_mode mode1 = insn_data[icode].operand[2].mode;
12893 machine_mode mode2 = insn_data[icode].operand[3].mode;
12895 if (icode == CODE_FOR_nothing)
12896 /* Builtin not supported on this processor. */
12897 return 0;
12899 /* If we got invalid arguments bail out before generating bad rtl. */
12900 if (arg0 == error_mark_node
12901 || arg1 == error_mark_node
12902 || arg2 == error_mark_node)
12903 return const0_rtx;
12905 /* Check and prepare argument depending on the instruction code.
12907 Note that a switch statement instead of the sequence of tests
12908 would be incorrect as many of the CODE_FOR values could be
12909 CODE_FOR_nothing and that would yield multiple alternatives
12910 with identical values. We'd never reach here at runtime in
12911 this case. */
12912 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12913 || icode == CODE_FOR_altivec_vsldoi_v4si
12914 || icode == CODE_FOR_altivec_vsldoi_v8hi
12915 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12917 /* Only allow 4-bit unsigned literals. */
12918 STRIP_NOPS (arg2);
12919 if (TREE_CODE (arg2) != INTEGER_CST
12920 || TREE_INT_CST_LOW (arg2) & ~0xf)
12922 error ("argument 3 must be a 4-bit unsigned literal");
12923 return const0_rtx;
12926 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12927 || icode == CODE_FOR_vsx_xxpermdi_v2di
12928 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12929 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12930 || icode == CODE_FOR_vsx_xxsldwi_v4si
12931 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12932 || icode == CODE_FOR_vsx_xxsldwi_v2di
12933 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12935 /* Only allow 2-bit unsigned literals. */
12936 STRIP_NOPS (arg2);
12937 if (TREE_CODE (arg2) != INTEGER_CST
12938 || TREE_INT_CST_LOW (arg2) & ~0x3)
12940 error ("argument 3 must be a 2-bit unsigned literal");
12941 return const0_rtx;
12944 else if (icode == CODE_FOR_vsx_set_v2df
12945 || icode == CODE_FOR_vsx_set_v2di
12946 || icode == CODE_FOR_bcdadd
12947 || icode == CODE_FOR_bcdadd_lt
12948 || icode == CODE_FOR_bcdadd_eq
12949 || icode == CODE_FOR_bcdadd_gt
12950 || icode == CODE_FOR_bcdsub
12951 || icode == CODE_FOR_bcdsub_lt
12952 || icode == CODE_FOR_bcdsub_eq
12953 || icode == CODE_FOR_bcdsub_gt)
12955 /* Only allow 1-bit unsigned literals. */
12956 STRIP_NOPS (arg2);
12957 if (TREE_CODE (arg2) != INTEGER_CST
12958 || TREE_INT_CST_LOW (arg2) & ~0x1)
12960 error ("argument 3 must be a 1-bit unsigned literal");
12961 return const0_rtx;
12964 else if (icode == CODE_FOR_dfp_ddedpd_dd
12965 || icode == CODE_FOR_dfp_ddedpd_td)
12967 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12968 STRIP_NOPS (arg0);
12969 if (TREE_CODE (arg0) != INTEGER_CST
12970 || TREE_INT_CST_LOW (arg2) & ~0x3)
12972 error ("argument 1 must be 0 or 2");
12973 return const0_rtx;
12976 else if (icode == CODE_FOR_dfp_denbcd_dd
12977 || icode == CODE_FOR_dfp_denbcd_td)
12979 /* Only allow 1-bit unsigned literals. */
12980 STRIP_NOPS (arg0);
12981 if (TREE_CODE (arg0) != INTEGER_CST
12982 || TREE_INT_CST_LOW (arg0) & ~0x1)
12984 error ("argument 1 must be a 1-bit unsigned literal");
12985 return const0_rtx;
12988 else if (icode == CODE_FOR_dfp_dscli_dd
12989 || icode == CODE_FOR_dfp_dscli_td
12990 || icode == CODE_FOR_dfp_dscri_dd
12991 || icode == CODE_FOR_dfp_dscri_td)
12993 /* Only allow 6-bit unsigned literals. */
12994 STRIP_NOPS (arg1);
12995 if (TREE_CODE (arg1) != INTEGER_CST
12996 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12998 error ("argument 2 must be a 6-bit unsigned literal");
12999 return const0_rtx;
13002 else if (icode == CODE_FOR_crypto_vshasigmaw
13003 || icode == CODE_FOR_crypto_vshasigmad)
13005 /* Check whether the 2nd and 3rd arguments are integer constants and in
13006 range and prepare arguments. */
13007 STRIP_NOPS (arg1);
13008 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
13010 error ("argument 2 must be 0 or 1");
13011 return const0_rtx;
13014 STRIP_NOPS (arg2);
13015 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
13017 error ("argument 3 must be in the range 0..15");
13018 return const0_rtx;
13022 if (target == 0
13023 || GET_MODE (target) != tmode
13024 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13025 target = gen_reg_rtx (tmode);
13027 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13028 op0 = copy_to_mode_reg (mode0, op0);
13029 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13030 op1 = copy_to_mode_reg (mode1, op1);
13031 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13032 op2 = copy_to_mode_reg (mode2, op2);
13034 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
13035 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
13036 else
13037 pat = GEN_FCN (icode) (target, op0, op1, op2);
13038 if (! pat)
13039 return 0;
13040 emit_insn (pat);
13042 return target;
13045 /* Expand the lvx builtins. */
13046 static rtx
13047 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
13049 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13050 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13051 tree arg0;
13052 machine_mode tmode, mode0;
13053 rtx pat, op0;
13054 enum insn_code icode;
13056 switch (fcode)
13058 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
13059 icode = CODE_FOR_vector_altivec_load_v16qi;
13060 break;
13061 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
13062 icode = CODE_FOR_vector_altivec_load_v8hi;
13063 break;
13064 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
13065 icode = CODE_FOR_vector_altivec_load_v4si;
13066 break;
13067 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
13068 icode = CODE_FOR_vector_altivec_load_v4sf;
13069 break;
13070 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
13071 icode = CODE_FOR_vector_altivec_load_v2df;
13072 break;
13073 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
13074 icode = CODE_FOR_vector_altivec_load_v2di;
13075 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
13076 icode = CODE_FOR_vector_altivec_load_v1ti;
13077 break;
13078 default:
13079 *expandedp = false;
13080 return NULL_RTX;
13083 *expandedp = true;
13085 arg0 = CALL_EXPR_ARG (exp, 0);
13086 op0 = expand_normal (arg0);
13087 tmode = insn_data[icode].operand[0].mode;
13088 mode0 = insn_data[icode].operand[1].mode;
13090 if (target == 0
13091 || GET_MODE (target) != tmode
13092 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13093 target = gen_reg_rtx (tmode);
13095 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13096 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13098 pat = GEN_FCN (icode) (target, op0);
13099 if (! pat)
13100 return 0;
13101 emit_insn (pat);
13102 return target;
13105 /* Expand the stvx builtins. */
13106 static rtx
13107 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13108 bool *expandedp)
13110 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13111 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13112 tree arg0, arg1;
13113 machine_mode mode0, mode1;
13114 rtx pat, op0, op1;
13115 enum insn_code icode;
13117 switch (fcode)
13119 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13120 icode = CODE_FOR_vector_altivec_store_v16qi;
13121 break;
13122 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13123 icode = CODE_FOR_vector_altivec_store_v8hi;
13124 break;
13125 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13126 icode = CODE_FOR_vector_altivec_store_v4si;
13127 break;
13128 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13129 icode = CODE_FOR_vector_altivec_store_v4sf;
13130 break;
13131 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13132 icode = CODE_FOR_vector_altivec_store_v2df;
13133 break;
13134 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13135 icode = CODE_FOR_vector_altivec_store_v2di;
13136 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13137 icode = CODE_FOR_vector_altivec_store_v1ti;
13138 break;
13139 default:
13140 *expandedp = false;
13141 return NULL_RTX;
13144 arg0 = CALL_EXPR_ARG (exp, 0);
13145 arg1 = CALL_EXPR_ARG (exp, 1);
13146 op0 = expand_normal (arg0);
13147 op1 = expand_normal (arg1);
13148 mode0 = insn_data[icode].operand[0].mode;
13149 mode1 = insn_data[icode].operand[1].mode;
13151 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13152 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13153 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13154 op1 = copy_to_mode_reg (mode1, op1);
13156 pat = GEN_FCN (icode) (op0, op1);
13157 if (pat)
13158 emit_insn (pat);
13160 *expandedp = true;
13161 return NULL_RTX;
13164 /* Expand the dst builtins. */
13165 static rtx
13166 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13167 bool *expandedp)
13169 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13170 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13171 tree arg0, arg1, arg2;
13172 machine_mode mode0, mode1;
13173 rtx pat, op0, op1, op2;
13174 const struct builtin_description *d;
13175 size_t i;
13177 *expandedp = false;
13179 /* Handle DST variants. */
13180 d = bdesc_dst;
13181 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13182 if (d->code == fcode)
13184 arg0 = CALL_EXPR_ARG (exp, 0);
13185 arg1 = CALL_EXPR_ARG (exp, 1);
13186 arg2 = CALL_EXPR_ARG (exp, 2);
13187 op0 = expand_normal (arg0);
13188 op1 = expand_normal (arg1);
13189 op2 = expand_normal (arg2);
13190 mode0 = insn_data[d->icode].operand[0].mode;
13191 mode1 = insn_data[d->icode].operand[1].mode;
13193 /* Invalid arguments, bail out before generating bad rtl. */
13194 if (arg0 == error_mark_node
13195 || arg1 == error_mark_node
13196 || arg2 == error_mark_node)
13197 return const0_rtx;
13199 *expandedp = true;
13200 STRIP_NOPS (arg2);
13201 if (TREE_CODE (arg2) != INTEGER_CST
13202 || TREE_INT_CST_LOW (arg2) & ~0x3)
13204 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13205 return const0_rtx;
13208 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13209 op0 = copy_to_mode_reg (Pmode, op0);
13210 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13211 op1 = copy_to_mode_reg (mode1, op1);
13213 pat = GEN_FCN (d->icode) (op0, op1, op2);
13214 if (pat != 0)
13215 emit_insn (pat);
13217 return NULL_RTX;
13220 return NULL_RTX;
13223 /* Expand vec_init builtin. */
13224 static rtx
13225 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13227 machine_mode tmode = TYPE_MODE (type);
13228 machine_mode inner_mode = GET_MODE_INNER (tmode);
13229 int i, n_elt = GET_MODE_NUNITS (tmode);
13231 gcc_assert (VECTOR_MODE_P (tmode));
13232 gcc_assert (n_elt == call_expr_nargs (exp));
13234 if (!target || !register_operand (target, tmode))
13235 target = gen_reg_rtx (tmode);
13237 /* If we have a vector compromised of a single element, such as V1TImode, do
13238 the initialization directly. */
13239 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13241 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13242 emit_move_insn (target, gen_lowpart (tmode, x));
13244 else
13246 rtvec v = rtvec_alloc (n_elt);
13248 for (i = 0; i < n_elt; ++i)
13250 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13251 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13254 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13257 return target;
13260 /* Return the integer constant in ARG. Constrain it to be in the range
13261 of the subparts of VEC_TYPE; issue an error if not. */
13263 static int
13264 get_element_number (tree vec_type, tree arg)
13266 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13268 if (!tree_fits_uhwi_p (arg)
13269 || (elt = tree_to_uhwi (arg), elt > max))
13271 error ("selector must be an integer constant in the range 0..%wi", max);
13272 return 0;
13275 return elt;
13278 /* Expand vec_set builtin. */
13279 static rtx
13280 altivec_expand_vec_set_builtin (tree exp)
13282 machine_mode tmode, mode1;
13283 tree arg0, arg1, arg2;
13284 int elt;
13285 rtx op0, op1;
13287 arg0 = CALL_EXPR_ARG (exp, 0);
13288 arg1 = CALL_EXPR_ARG (exp, 1);
13289 arg2 = CALL_EXPR_ARG (exp, 2);
13291 tmode = TYPE_MODE (TREE_TYPE (arg0));
13292 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13293 gcc_assert (VECTOR_MODE_P (tmode));
13295 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13296 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13297 elt = get_element_number (TREE_TYPE (arg0), arg2);
13299 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13300 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13302 op0 = force_reg (tmode, op0);
13303 op1 = force_reg (mode1, op1);
13305 rs6000_expand_vector_set (op0, op1, elt);
13307 return op0;
13310 /* Expand vec_ext builtin. */
13311 static rtx
13312 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13314 machine_mode tmode, mode0;
13315 tree arg0, arg1;
13316 int elt;
13317 rtx op0;
13319 arg0 = CALL_EXPR_ARG (exp, 0);
13320 arg1 = CALL_EXPR_ARG (exp, 1);
13322 op0 = expand_normal (arg0);
13323 elt = get_element_number (TREE_TYPE (arg0), arg1);
13325 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13326 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13327 gcc_assert (VECTOR_MODE_P (mode0));
13329 op0 = force_reg (mode0, op0);
13331 if (optimize || !target || !register_operand (target, tmode))
13332 target = gen_reg_rtx (tmode);
13334 rs6000_expand_vector_extract (target, op0, elt);
13336 return target;
13339 /* Expand the builtin in EXP and store the result in TARGET. Store
13340 true in *EXPANDEDP if we found a builtin to expand. */
13341 static rtx
13342 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13344 const struct builtin_description *d;
13345 size_t i;
13346 enum insn_code icode;
13347 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13348 tree arg0;
13349 rtx op0, pat;
13350 machine_mode tmode, mode0;
13351 enum rs6000_builtins fcode
13352 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13354 if (rs6000_overloaded_builtin_p (fcode))
13356 *expandedp = true;
13357 error ("unresolved overload for Altivec builtin %qF", fndecl);
13359 /* Given it is invalid, just generate a normal call. */
13360 return expand_call (exp, target, false);
13363 target = altivec_expand_ld_builtin (exp, target, expandedp);
13364 if (*expandedp)
13365 return target;
13367 target = altivec_expand_st_builtin (exp, target, expandedp);
13368 if (*expandedp)
13369 return target;
13371 target = altivec_expand_dst_builtin (exp, target, expandedp);
13372 if (*expandedp)
13373 return target;
13375 *expandedp = true;
13377 switch (fcode)
13379 case ALTIVEC_BUILTIN_STVX_V2DF:
13380 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13381 case ALTIVEC_BUILTIN_STVX_V2DI:
13382 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13383 case ALTIVEC_BUILTIN_STVX_V4SF:
13384 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13385 case ALTIVEC_BUILTIN_STVX:
13386 case ALTIVEC_BUILTIN_STVX_V4SI:
13387 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13388 case ALTIVEC_BUILTIN_STVX_V8HI:
13389 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13390 case ALTIVEC_BUILTIN_STVX_V16QI:
13391 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13392 case ALTIVEC_BUILTIN_STVEBX:
13393 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13394 case ALTIVEC_BUILTIN_STVEHX:
13395 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13396 case ALTIVEC_BUILTIN_STVEWX:
13397 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13398 case ALTIVEC_BUILTIN_STVXL_V2DF:
13399 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13400 case ALTIVEC_BUILTIN_STVXL_V2DI:
13401 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13402 case ALTIVEC_BUILTIN_STVXL_V4SF:
13403 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13404 case ALTIVEC_BUILTIN_STVXL:
13405 case ALTIVEC_BUILTIN_STVXL_V4SI:
13406 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13407 case ALTIVEC_BUILTIN_STVXL_V8HI:
13408 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13409 case ALTIVEC_BUILTIN_STVXL_V16QI:
13410 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13412 case ALTIVEC_BUILTIN_STVLX:
13413 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13414 case ALTIVEC_BUILTIN_STVLXL:
13415 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13416 case ALTIVEC_BUILTIN_STVRX:
13417 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13418 case ALTIVEC_BUILTIN_STVRXL:
13419 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13421 case VSX_BUILTIN_STXVD2X_V1TI:
13422 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13423 case VSX_BUILTIN_STXVD2X_V2DF:
13424 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13425 case VSX_BUILTIN_STXVD2X_V2DI:
13426 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13427 case VSX_BUILTIN_STXVW4X_V4SF:
13428 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13429 case VSX_BUILTIN_STXVW4X_V4SI:
13430 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13431 case VSX_BUILTIN_STXVW4X_V8HI:
13432 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13433 case VSX_BUILTIN_STXVW4X_V16QI:
13434 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13436 case ALTIVEC_BUILTIN_MFVSCR:
13437 icode = CODE_FOR_altivec_mfvscr;
13438 tmode = insn_data[icode].operand[0].mode;
13440 if (target == 0
13441 || GET_MODE (target) != tmode
13442 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13443 target = gen_reg_rtx (tmode);
13445 pat = GEN_FCN (icode) (target);
13446 if (! pat)
13447 return 0;
13448 emit_insn (pat);
13449 return target;
13451 case ALTIVEC_BUILTIN_MTVSCR:
13452 icode = CODE_FOR_altivec_mtvscr;
13453 arg0 = CALL_EXPR_ARG (exp, 0);
13454 op0 = expand_normal (arg0);
13455 mode0 = insn_data[icode].operand[0].mode;
13457 /* If we got invalid arguments bail out before generating bad rtl. */
13458 if (arg0 == error_mark_node)
13459 return const0_rtx;
13461 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13462 op0 = copy_to_mode_reg (mode0, op0);
13464 pat = GEN_FCN (icode) (op0);
13465 if (pat)
13466 emit_insn (pat);
13467 return NULL_RTX;
13469 case ALTIVEC_BUILTIN_DSSALL:
13470 emit_insn (gen_altivec_dssall ());
13471 return NULL_RTX;
13473 case ALTIVEC_BUILTIN_DSS:
13474 icode = CODE_FOR_altivec_dss;
13475 arg0 = CALL_EXPR_ARG (exp, 0);
13476 STRIP_NOPS (arg0);
13477 op0 = expand_normal (arg0);
13478 mode0 = insn_data[icode].operand[0].mode;
13480 /* If we got invalid arguments bail out before generating bad rtl. */
13481 if (arg0 == error_mark_node)
13482 return const0_rtx;
13484 if (TREE_CODE (arg0) != INTEGER_CST
13485 || TREE_INT_CST_LOW (arg0) & ~0x3)
13487 error ("argument to dss must be a 2-bit unsigned literal");
13488 return const0_rtx;
13491 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13492 op0 = copy_to_mode_reg (mode0, op0);
13494 emit_insn (gen_altivec_dss (op0));
13495 return NULL_RTX;
13497 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13498 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13499 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13500 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13501 case VSX_BUILTIN_VEC_INIT_V2DF:
13502 case VSX_BUILTIN_VEC_INIT_V2DI:
13503 case VSX_BUILTIN_VEC_INIT_V1TI:
13504 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13506 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13507 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13508 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13509 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13510 case VSX_BUILTIN_VEC_SET_V2DF:
13511 case VSX_BUILTIN_VEC_SET_V2DI:
13512 case VSX_BUILTIN_VEC_SET_V1TI:
13513 return altivec_expand_vec_set_builtin (exp);
13515 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13516 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13517 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13518 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13519 case VSX_BUILTIN_VEC_EXT_V2DF:
13520 case VSX_BUILTIN_VEC_EXT_V2DI:
13521 case VSX_BUILTIN_VEC_EXT_V1TI:
13522 return altivec_expand_vec_ext_builtin (exp, target);
13524 default:
13525 break;
13526 /* Fall through. */
13529 /* Expand abs* operations. */
13530 d = bdesc_abs;
13531 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13532 if (d->code == fcode)
13533 return altivec_expand_abs_builtin (d->icode, exp, target);
13535 /* Expand the AltiVec predicates. */
13536 d = bdesc_altivec_preds;
13537 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13538 if (d->code == fcode)
13539 return altivec_expand_predicate_builtin (d->icode, exp, target);
13541 /* LV* are funky. We initialized them differently. */
13542 switch (fcode)
13544 case ALTIVEC_BUILTIN_LVSL:
13545 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13546 exp, target, false);
13547 case ALTIVEC_BUILTIN_LVSR:
13548 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13549 exp, target, false);
13550 case ALTIVEC_BUILTIN_LVEBX:
13551 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13552 exp, target, false);
13553 case ALTIVEC_BUILTIN_LVEHX:
13554 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13555 exp, target, false);
13556 case ALTIVEC_BUILTIN_LVEWX:
13557 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13558 exp, target, false);
13559 case ALTIVEC_BUILTIN_LVXL_V2DF:
13560 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13561 exp, target, false);
13562 case ALTIVEC_BUILTIN_LVXL_V2DI:
13563 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13564 exp, target, false);
13565 case ALTIVEC_BUILTIN_LVXL_V4SF:
13566 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13567 exp, target, false);
13568 case ALTIVEC_BUILTIN_LVXL:
13569 case ALTIVEC_BUILTIN_LVXL_V4SI:
13570 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13571 exp, target, false);
13572 case ALTIVEC_BUILTIN_LVXL_V8HI:
13573 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13574 exp, target, false);
13575 case ALTIVEC_BUILTIN_LVXL_V16QI:
13576 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13577 exp, target, false);
13578 case ALTIVEC_BUILTIN_LVX_V2DF:
13579 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13580 exp, target, false);
13581 case ALTIVEC_BUILTIN_LVX_V2DI:
13582 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13583 exp, target, false);
13584 case ALTIVEC_BUILTIN_LVX_V4SF:
13585 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13586 exp, target, false);
13587 case ALTIVEC_BUILTIN_LVX:
13588 case ALTIVEC_BUILTIN_LVX_V4SI:
13589 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13590 exp, target, false);
13591 case ALTIVEC_BUILTIN_LVX_V8HI:
13592 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13593 exp, target, false);
13594 case ALTIVEC_BUILTIN_LVX_V16QI:
13595 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13596 exp, target, false);
13597 case ALTIVEC_BUILTIN_LVLX:
13598 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13599 exp, target, true);
13600 case ALTIVEC_BUILTIN_LVLXL:
13601 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13602 exp, target, true);
13603 case ALTIVEC_BUILTIN_LVRX:
13604 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13605 exp, target, true);
13606 case ALTIVEC_BUILTIN_LVRXL:
13607 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13608 exp, target, true);
13609 case VSX_BUILTIN_LXVD2X_V1TI:
13610 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13611 exp, target, false);
13612 case VSX_BUILTIN_LXVD2X_V2DF:
13613 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13614 exp, target, false);
13615 case VSX_BUILTIN_LXVD2X_V2DI:
13616 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13617 exp, target, false);
13618 case VSX_BUILTIN_LXVW4X_V4SF:
13619 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13620 exp, target, false);
13621 case VSX_BUILTIN_LXVW4X_V4SI:
13622 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13623 exp, target, false);
13624 case VSX_BUILTIN_LXVW4X_V8HI:
13625 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13626 exp, target, false);
13627 case VSX_BUILTIN_LXVW4X_V16QI:
13628 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13629 exp, target, false);
13630 break;
13631 default:
13632 break;
13633 /* Fall through. */
13636 *expandedp = false;
13637 return NULL_RTX;
13640 /* Expand the builtin in EXP and store the result in TARGET. Store
13641 true in *EXPANDEDP if we found a builtin to expand. */
13642 static rtx
13643 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13645 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13646 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13647 const struct builtin_description *d;
13648 size_t i;
13650 *expandedp = true;
13652 switch (fcode)
13654 case PAIRED_BUILTIN_STX:
13655 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13656 case PAIRED_BUILTIN_LX:
13657 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13658 default:
13659 break;
13660 /* Fall through. */
13663 /* Expand the paired predicates. */
13664 d = bdesc_paired_preds;
13665 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13666 if (d->code == fcode)
13667 return paired_expand_predicate_builtin (d->icode, exp, target);
13669 *expandedp = false;
13670 return NULL_RTX;
13673 /* Binops that need to be initialized manually, but can be expanded
13674 automagically by rs6000_expand_binop_builtin. */
13675 static const struct builtin_description bdesc_2arg_spe[] =
13677 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13678 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13679 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13680 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13681 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13682 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13683 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13684 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13685 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13686 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13687 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13688 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13689 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13690 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13691 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13692 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13693 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13694 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13695 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13696 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13697 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13698 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13701 /* Expand the builtin in EXP and store the result in TARGET. Store
13702 true in *EXPANDEDP if we found a builtin to expand.
13704 This expands the SPE builtins that are not simple unary and binary
13705 operations. */
13706 static rtx
13707 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13709 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13710 tree arg1, arg0;
13711 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13712 enum insn_code icode;
13713 machine_mode tmode, mode0;
13714 rtx pat, op0;
13715 const struct builtin_description *d;
13716 size_t i;
13718 *expandedp = true;
13720 /* Syntax check for a 5-bit unsigned immediate. */
13721 switch (fcode)
13723 case SPE_BUILTIN_EVSTDD:
13724 case SPE_BUILTIN_EVSTDH:
13725 case SPE_BUILTIN_EVSTDW:
13726 case SPE_BUILTIN_EVSTWHE:
13727 case SPE_BUILTIN_EVSTWHO:
13728 case SPE_BUILTIN_EVSTWWE:
13729 case SPE_BUILTIN_EVSTWWO:
13730 arg1 = CALL_EXPR_ARG (exp, 2);
13731 if (TREE_CODE (arg1) != INTEGER_CST
13732 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13734 error ("argument 2 must be a 5-bit unsigned literal");
13735 return const0_rtx;
13737 break;
13738 default:
13739 break;
13742 /* The evsplat*i instructions are not quite generic. */
13743 switch (fcode)
13745 case SPE_BUILTIN_EVSPLATFI:
13746 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13747 exp, target);
13748 case SPE_BUILTIN_EVSPLATI:
13749 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13750 exp, target);
13751 default:
13752 break;
13755 d = bdesc_2arg_spe;
13756 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13757 if (d->code == fcode)
13758 return rs6000_expand_binop_builtin (d->icode, exp, target);
13760 d = bdesc_spe_predicates;
13761 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13762 if (d->code == fcode)
13763 return spe_expand_predicate_builtin (d->icode, exp, target);
13765 d = bdesc_spe_evsel;
13766 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13767 if (d->code == fcode)
13768 return spe_expand_evsel_builtin (d->icode, exp, target);
13770 switch (fcode)
13772 case SPE_BUILTIN_EVSTDDX:
13773 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13774 case SPE_BUILTIN_EVSTDHX:
13775 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13776 case SPE_BUILTIN_EVSTDWX:
13777 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13778 case SPE_BUILTIN_EVSTWHEX:
13779 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13780 case SPE_BUILTIN_EVSTWHOX:
13781 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13782 case SPE_BUILTIN_EVSTWWEX:
13783 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13784 case SPE_BUILTIN_EVSTWWOX:
13785 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13786 case SPE_BUILTIN_EVSTDD:
13787 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13788 case SPE_BUILTIN_EVSTDH:
13789 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13790 case SPE_BUILTIN_EVSTDW:
13791 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13792 case SPE_BUILTIN_EVSTWHE:
13793 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13794 case SPE_BUILTIN_EVSTWHO:
13795 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13796 case SPE_BUILTIN_EVSTWWE:
13797 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13798 case SPE_BUILTIN_EVSTWWO:
13799 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13800 case SPE_BUILTIN_MFSPEFSCR:
13801 icode = CODE_FOR_spe_mfspefscr;
13802 tmode = insn_data[icode].operand[0].mode;
13804 if (target == 0
13805 || GET_MODE (target) != tmode
13806 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13807 target = gen_reg_rtx (tmode);
13809 pat = GEN_FCN (icode) (target);
13810 if (! pat)
13811 return 0;
13812 emit_insn (pat);
13813 return target;
13814 case SPE_BUILTIN_MTSPEFSCR:
13815 icode = CODE_FOR_spe_mtspefscr;
13816 arg0 = CALL_EXPR_ARG (exp, 0);
13817 op0 = expand_normal (arg0);
13818 mode0 = insn_data[icode].operand[0].mode;
13820 if (arg0 == error_mark_node)
13821 return const0_rtx;
13823 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13824 op0 = copy_to_mode_reg (mode0, op0);
13826 pat = GEN_FCN (icode) (op0);
13827 if (pat)
13828 emit_insn (pat);
13829 return NULL_RTX;
13830 default:
13831 break;
13834 *expandedp = false;
13835 return NULL_RTX;
13838 static rtx
13839 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13841 rtx pat, scratch, tmp;
13842 tree form = CALL_EXPR_ARG (exp, 0);
13843 tree arg0 = CALL_EXPR_ARG (exp, 1);
13844 tree arg1 = CALL_EXPR_ARG (exp, 2);
13845 rtx op0 = expand_normal (arg0);
13846 rtx op1 = expand_normal (arg1);
13847 machine_mode mode0 = insn_data[icode].operand[1].mode;
13848 machine_mode mode1 = insn_data[icode].operand[2].mode;
13849 int form_int;
13850 enum rtx_code code;
13852 if (TREE_CODE (form) != INTEGER_CST)
13854 error ("argument 1 of __builtin_paired_predicate must be a constant");
13855 return const0_rtx;
13857 else
13858 form_int = TREE_INT_CST_LOW (form);
13860 gcc_assert (mode0 == mode1);
13862 if (arg0 == error_mark_node || arg1 == error_mark_node)
13863 return const0_rtx;
13865 if (target == 0
13866 || GET_MODE (target) != SImode
13867 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13868 target = gen_reg_rtx (SImode);
13869 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13870 op0 = copy_to_mode_reg (mode0, op0);
13871 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13872 op1 = copy_to_mode_reg (mode1, op1);
13874 scratch = gen_reg_rtx (CCFPmode);
13876 pat = GEN_FCN (icode) (scratch, op0, op1);
13877 if (!pat)
13878 return const0_rtx;
13880 emit_insn (pat);
13882 switch (form_int)
13884 /* LT bit. */
13885 case 0:
13886 code = LT;
13887 break;
13888 /* GT bit. */
13889 case 1:
13890 code = GT;
13891 break;
13892 /* EQ bit. */
13893 case 2:
13894 code = EQ;
13895 break;
13896 /* UN bit. */
13897 case 3:
13898 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13899 return target;
13900 default:
13901 error ("argument 1 of __builtin_paired_predicate is out of range");
13902 return const0_rtx;
13905 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13906 emit_move_insn (target, tmp);
13907 return target;
13910 static rtx
13911 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13913 rtx pat, scratch, tmp;
13914 tree form = CALL_EXPR_ARG (exp, 0);
13915 tree arg0 = CALL_EXPR_ARG (exp, 1);
13916 tree arg1 = CALL_EXPR_ARG (exp, 2);
13917 rtx op0 = expand_normal (arg0);
13918 rtx op1 = expand_normal (arg1);
13919 machine_mode mode0 = insn_data[icode].operand[1].mode;
13920 machine_mode mode1 = insn_data[icode].operand[2].mode;
13921 int form_int;
13922 enum rtx_code code;
13924 if (TREE_CODE (form) != INTEGER_CST)
13926 error ("argument 1 of __builtin_spe_predicate must be a constant");
13927 return const0_rtx;
13929 else
13930 form_int = TREE_INT_CST_LOW (form);
13932 gcc_assert (mode0 == mode1);
13934 if (arg0 == error_mark_node || arg1 == error_mark_node)
13935 return const0_rtx;
13937 if (target == 0
13938 || GET_MODE (target) != SImode
13939 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13940 target = gen_reg_rtx (SImode);
13942 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13943 op0 = copy_to_mode_reg (mode0, op0);
13944 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13945 op1 = copy_to_mode_reg (mode1, op1);
13947 scratch = gen_reg_rtx (CCmode);
13949 pat = GEN_FCN (icode) (scratch, op0, op1);
13950 if (! pat)
13951 return const0_rtx;
13952 emit_insn (pat);
13954 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13955 _lower_. We use one compare, but look in different bits of the
13956 CR for each variant.
13958 There are 2 elements in each SPE simd type (upper/lower). The CR
13959 bits are set as follows:
13961 BIT0 | BIT 1 | BIT 2 | BIT 3
13962 U | L | (U | L) | (U & L)
13964 So, for an "all" relationship, BIT 3 would be set.
13965 For an "any" relationship, BIT 2 would be set. Etc.
13967 Following traditional nomenclature, these bits map to:
13969 BIT0 | BIT 1 | BIT 2 | BIT 3
13970 LT | GT | EQ | OV
13972 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13975 switch (form_int)
13977 /* All variant. OV bit. */
13978 case 0:
13979 /* We need to get to the OV bit, which is the ORDERED bit. We
13980 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13981 that's ugly and will make validate_condition_mode die.
13982 So let's just use another pattern. */
13983 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13984 return target;
13985 /* Any variant. EQ bit. */
13986 case 1:
13987 code = EQ;
13988 break;
13989 /* Upper variant. LT bit. */
13990 case 2:
13991 code = LT;
13992 break;
13993 /* Lower variant. GT bit. */
13994 case 3:
13995 code = GT;
13996 break;
13997 default:
13998 error ("argument 1 of __builtin_spe_predicate is out of range");
13999 return const0_rtx;
14002 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14003 emit_move_insn (target, tmp);
14005 return target;
14008 /* The evsel builtins look like this:
14010 e = __builtin_spe_evsel_OP (a, b, c, d);
14012 and work like this:
14014 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
14015 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
14018 static rtx
14019 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
14021 rtx pat, scratch;
14022 tree arg0 = CALL_EXPR_ARG (exp, 0);
14023 tree arg1 = CALL_EXPR_ARG (exp, 1);
14024 tree arg2 = CALL_EXPR_ARG (exp, 2);
14025 tree arg3 = CALL_EXPR_ARG (exp, 3);
14026 rtx op0 = expand_normal (arg0);
14027 rtx op1 = expand_normal (arg1);
14028 rtx op2 = expand_normal (arg2);
14029 rtx op3 = expand_normal (arg3);
14030 machine_mode mode0 = insn_data[icode].operand[1].mode;
14031 machine_mode mode1 = insn_data[icode].operand[2].mode;
14033 gcc_assert (mode0 == mode1);
14035 if (arg0 == error_mark_node || arg1 == error_mark_node
14036 || arg2 == error_mark_node || arg3 == error_mark_node)
14037 return const0_rtx;
14039 if (target == 0
14040 || GET_MODE (target) != mode0
14041 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
14042 target = gen_reg_rtx (mode0);
14044 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14045 op0 = copy_to_mode_reg (mode0, op0);
14046 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14047 op1 = copy_to_mode_reg (mode0, op1);
14048 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14049 op2 = copy_to_mode_reg (mode0, op2);
14050 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
14051 op3 = copy_to_mode_reg (mode0, op3);
14053 /* Generate the compare. */
14054 scratch = gen_reg_rtx (CCmode);
14055 pat = GEN_FCN (icode) (scratch, op0, op1);
14056 if (! pat)
14057 return const0_rtx;
14058 emit_insn (pat);
14060 if (mode0 == V2SImode)
14061 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
14062 else
14063 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
14065 return target;
14068 /* Raise an error message for a builtin function that is called without the
14069 appropriate target options being set. */
14071 static void
14072 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14074 size_t uns_fncode = (size_t)fncode;
14075 const char *name = rs6000_builtin_info[uns_fncode].name;
14076 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14078 gcc_assert (name != NULL);
14079 if ((fnmask & RS6000_BTM_CELL) != 0)
14080 error ("Builtin function %s is only valid for the cell processor", name);
14081 else if ((fnmask & RS6000_BTM_VSX) != 0)
14082 error ("Builtin function %s requires the -mvsx option", name);
14083 else if ((fnmask & RS6000_BTM_HTM) != 0)
14084 error ("Builtin function %s requires the -mhtm option", name);
14085 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14086 error ("Builtin function %s requires the -maltivec option", name);
14087 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
14088 error ("Builtin function %s requires the -mpaired option", name);
14089 else if ((fnmask & RS6000_BTM_SPE) != 0)
14090 error ("Builtin function %s requires the -mspe option", name);
14091 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14092 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14093 error ("Builtin function %s requires the -mhard-dfp and"
14094 " -mpower8-vector options", name);
14095 else if ((fnmask & RS6000_BTM_DFP) != 0)
14096 error ("Builtin function %s requires the -mhard-dfp option", name);
14097 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14098 error ("Builtin function %s requires the -mpower8-vector option", name);
14099 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14100 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14101 error ("Builtin function %s requires the -mhard-float and"
14102 " -mlong-double-128 options", name);
14103 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14104 error ("Builtin function %s requires the -mhard-float option", name);
14105 else
14106 error ("Builtin function %s is not supported with the current options",
14107 name);
14110 /* Expand an expression EXP that calls a built-in function,
14111 with result going to TARGET if that's convenient
14112 (and in mode MODE if that's convenient).
14113 SUBTARGET may be used as the target for computing one of EXP's operands.
14114 IGNORE is nonzero if the value is to be ignored. */
14116 static rtx
14117 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14118 machine_mode mode ATTRIBUTE_UNUSED,
14119 int ignore ATTRIBUTE_UNUSED)
14121 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14122 enum rs6000_builtins fcode
14123 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14124 size_t uns_fcode = (size_t)fcode;
14125 const struct builtin_description *d;
14126 size_t i;
14127 rtx ret;
14128 bool success;
14129 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14130 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14132 if (TARGET_DEBUG_BUILTIN)
14134 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14135 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14136 const char *name2 = ((icode != CODE_FOR_nothing)
14137 ? get_insn_name ((int)icode)
14138 : "nothing");
14139 const char *name3;
14141 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14143 default: name3 = "unknown"; break;
14144 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14145 case RS6000_BTC_UNARY: name3 = "unary"; break;
14146 case RS6000_BTC_BINARY: name3 = "binary"; break;
14147 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14148 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14149 case RS6000_BTC_ABS: name3 = "abs"; break;
14150 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14151 case RS6000_BTC_DST: name3 = "dst"; break;
14155 fprintf (stderr,
14156 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14157 (name1) ? name1 : "---", fcode,
14158 (name2) ? name2 : "---", (int)icode,
14159 name3,
14160 func_valid_p ? "" : ", not valid");
14163 if (!func_valid_p)
14165 rs6000_invalid_builtin (fcode);
14167 /* Given it is invalid, just generate a normal call. */
14168 return expand_call (exp, target, ignore);
14171 switch (fcode)
14173 case RS6000_BUILTIN_RECIP:
14174 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14176 case RS6000_BUILTIN_RECIPF:
14177 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14179 case RS6000_BUILTIN_RSQRTF:
14180 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14182 case RS6000_BUILTIN_RSQRT:
14183 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14185 case POWER7_BUILTIN_BPERMD:
14186 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14187 ? CODE_FOR_bpermd_di
14188 : CODE_FOR_bpermd_si), exp, target);
14190 case RS6000_BUILTIN_GET_TB:
14191 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14192 target);
14194 case RS6000_BUILTIN_MFTB:
14195 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14196 ? CODE_FOR_rs6000_mftb_di
14197 : CODE_FOR_rs6000_mftb_si),
14198 target);
14200 case RS6000_BUILTIN_MFFS:
14201 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14203 case RS6000_BUILTIN_MTFSF:
14204 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14206 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14207 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14209 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14210 : (int) CODE_FOR_altivec_lvsl_direct);
14211 machine_mode tmode = insn_data[icode].operand[0].mode;
14212 machine_mode mode = insn_data[icode].operand[1].mode;
14213 tree arg;
14214 rtx op, addr, pat;
14216 gcc_assert (TARGET_ALTIVEC);
14218 arg = CALL_EXPR_ARG (exp, 0);
14219 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14220 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14221 addr = memory_address (mode, op);
14222 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14223 op = addr;
14224 else
14226 /* For the load case need to negate the address. */
14227 op = gen_reg_rtx (GET_MODE (addr));
14228 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
14230 op = gen_rtx_MEM (mode, op);
14232 if (target == 0
14233 || GET_MODE (target) != tmode
14234 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14235 target = gen_reg_rtx (tmode);
14237 pat = GEN_FCN (icode) (target, op);
14238 if (!pat)
14239 return 0;
14240 emit_insn (pat);
14242 return target;
14245 case ALTIVEC_BUILTIN_VCFUX:
14246 case ALTIVEC_BUILTIN_VCFSX:
14247 case ALTIVEC_BUILTIN_VCTUXS:
14248 case ALTIVEC_BUILTIN_VCTSXS:
14249 /* FIXME: There's got to be a nicer way to handle this case than
14250 constructing a new CALL_EXPR. */
14251 if (call_expr_nargs (exp) == 1)
14253 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14254 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14256 break;
14258 default:
14259 break;
14262 if (TARGET_ALTIVEC)
14264 ret = altivec_expand_builtin (exp, target, &success);
14266 if (success)
14267 return ret;
14269 if (TARGET_SPE)
14271 ret = spe_expand_builtin (exp, target, &success);
14273 if (success)
14274 return ret;
14276 if (TARGET_PAIRED_FLOAT)
14278 ret = paired_expand_builtin (exp, target, &success);
14280 if (success)
14281 return ret;
14283 if (TARGET_HTM)
14285 ret = htm_expand_builtin (exp, target, &success);
14287 if (success)
14288 return ret;
14291 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14292 gcc_assert (attr == RS6000_BTC_UNARY
14293 || attr == RS6000_BTC_BINARY
14294 || attr == RS6000_BTC_TERNARY);
14296 /* Handle simple unary operations. */
14297 d = bdesc_1arg;
14298 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14299 if (d->code == fcode)
14300 return rs6000_expand_unop_builtin (d->icode, exp, target);
14302 /* Handle simple binary operations. */
14303 d = bdesc_2arg;
14304 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14305 if (d->code == fcode)
14306 return rs6000_expand_binop_builtin (d->icode, exp, target);
14308 /* Handle simple ternary operations. */
14309 d = bdesc_3arg;
14310 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14311 if (d->code == fcode)
14312 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14314 gcc_unreachable ();
14317 static void
14318 rs6000_init_builtins (void)
14320 tree tdecl;
14321 tree ftype;
14322 machine_mode mode;
14324 if (TARGET_DEBUG_BUILTIN)
14325 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14326 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14327 (TARGET_SPE) ? ", spe" : "",
14328 (TARGET_ALTIVEC) ? ", altivec" : "",
14329 (TARGET_VSX) ? ", vsx" : "");
14331 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14332 V2SF_type_node = build_vector_type (float_type_node, 2);
14333 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14334 V2DF_type_node = build_vector_type (double_type_node, 2);
14335 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14336 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14337 V4SF_type_node = build_vector_type (float_type_node, 4);
14338 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14339 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14341 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14342 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14343 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14344 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14346 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14347 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14348 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14349 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14351 /* We use V1TI mode as a special container to hold __int128_t items that
14352 must live in VSX registers. */
14353 if (intTI_type_node)
14355 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14356 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14359 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14360 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14361 'vector unsigned short'. */
14363 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14364 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14365 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14366 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14367 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14369 long_integer_type_internal_node = long_integer_type_node;
14370 long_unsigned_type_internal_node = long_unsigned_type_node;
14371 long_long_integer_type_internal_node = long_long_integer_type_node;
14372 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14373 intQI_type_internal_node = intQI_type_node;
14374 uintQI_type_internal_node = unsigned_intQI_type_node;
14375 intHI_type_internal_node = intHI_type_node;
14376 uintHI_type_internal_node = unsigned_intHI_type_node;
14377 intSI_type_internal_node = intSI_type_node;
14378 uintSI_type_internal_node = unsigned_intSI_type_node;
14379 intDI_type_internal_node = intDI_type_node;
14380 uintDI_type_internal_node = unsigned_intDI_type_node;
14381 intTI_type_internal_node = intTI_type_node;
14382 uintTI_type_internal_node = unsigned_intTI_type_node;
14383 float_type_internal_node = float_type_node;
14384 double_type_internal_node = double_type_node;
14385 long_double_type_internal_node = long_double_type_node;
14386 dfloat64_type_internal_node = dfloat64_type_node;
14387 dfloat128_type_internal_node = dfloat128_type_node;
14388 void_type_internal_node = void_type_node;
14390 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14391 tree type node. */
14392 builtin_mode_to_type[QImode][0] = integer_type_node;
14393 builtin_mode_to_type[HImode][0] = integer_type_node;
14394 builtin_mode_to_type[SImode][0] = intSI_type_node;
14395 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14396 builtin_mode_to_type[DImode][0] = intDI_type_node;
14397 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14398 builtin_mode_to_type[TImode][0] = intTI_type_node;
14399 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14400 builtin_mode_to_type[SFmode][0] = float_type_node;
14401 builtin_mode_to_type[DFmode][0] = double_type_node;
14402 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14403 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14404 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14405 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14406 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14407 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14408 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14409 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14410 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14411 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14412 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14413 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14414 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14415 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14416 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14417 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14418 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14419 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14421 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14422 TYPE_NAME (bool_char_type_node) = tdecl;
14424 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14425 TYPE_NAME (bool_short_type_node) = tdecl;
14427 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14428 TYPE_NAME (bool_int_type_node) = tdecl;
14430 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14431 TYPE_NAME (pixel_type_node) = tdecl;
14433 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14434 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14435 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14436 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14437 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14439 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14440 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14442 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14443 TYPE_NAME (V16QI_type_node) = tdecl;
14445 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14446 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14448 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14449 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14451 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14452 TYPE_NAME (V8HI_type_node) = tdecl;
14454 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14455 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14457 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14458 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14460 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14461 TYPE_NAME (V4SI_type_node) = tdecl;
14463 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14464 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14466 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14467 TYPE_NAME (V4SF_type_node) = tdecl;
14469 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14470 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14472 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14473 TYPE_NAME (V2DF_type_node) = tdecl;
14475 if (TARGET_POWERPC64)
14477 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14478 TYPE_NAME (V2DI_type_node) = tdecl;
14480 tdecl = add_builtin_type ("__vector unsigned long",
14481 unsigned_V2DI_type_node);
14482 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14484 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14485 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14487 else
14489 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14490 TYPE_NAME (V2DI_type_node) = tdecl;
14492 tdecl = add_builtin_type ("__vector unsigned long long",
14493 unsigned_V2DI_type_node);
14494 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14496 tdecl = add_builtin_type ("__vector __bool long long",
14497 bool_V2DI_type_node);
14498 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14501 if (V1TI_type_node)
14503 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14504 TYPE_NAME (V1TI_type_node) = tdecl;
14506 tdecl = add_builtin_type ("__vector unsigned __int128",
14507 unsigned_V1TI_type_node);
14508 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14511 /* Paired and SPE builtins are only available if you build a compiler with
14512 the appropriate options, so only create those builtins with the
14513 appropriate compiler option. Create Altivec and VSX builtins on machines
14514 with at least the general purpose extensions (970 and newer) to allow the
14515 use of the target attribute. */
14516 if (TARGET_PAIRED_FLOAT)
14517 paired_init_builtins ();
14518 if (TARGET_SPE)
14519 spe_init_builtins ();
14520 if (TARGET_EXTRA_BUILTINS)
14521 altivec_init_builtins ();
14522 if (TARGET_HTM)
14523 htm_init_builtins ();
14525 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14526 rs6000_common_init_builtins ();
14528 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14529 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14530 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14532 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14533 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14534 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14536 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14537 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14538 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14540 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14541 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14542 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14544 mode = (TARGET_64BIT) ? DImode : SImode;
14545 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14546 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14547 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14549 ftype = build_function_type_list (unsigned_intDI_type_node,
14550 NULL_TREE);
14551 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14553 if (TARGET_64BIT)
14554 ftype = build_function_type_list (unsigned_intDI_type_node,
14555 NULL_TREE);
14556 else
14557 ftype = build_function_type_list (unsigned_intSI_type_node,
14558 NULL_TREE);
14559 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14561 ftype = build_function_type_list (double_type_node, NULL_TREE);
14562 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14564 ftype = build_function_type_list (void_type_node,
14565 intSI_type_node, double_type_node,
14566 NULL_TREE);
14567 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14569 #if TARGET_XCOFF
14570 /* AIX libm provides clog as __clog. */
14571 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14572 set_user_assembler_name (tdecl, "__clog");
14573 #endif
14575 #ifdef SUBTARGET_INIT_BUILTINS
14576 SUBTARGET_INIT_BUILTINS;
14577 #endif
14580 /* Returns the rs6000 builtin decl for CODE. */
14582 static tree
14583 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14585 HOST_WIDE_INT fnmask;
14587 if (code >= RS6000_BUILTIN_COUNT)
14588 return error_mark_node;
14590 fnmask = rs6000_builtin_info[code].mask;
14591 if ((fnmask & rs6000_builtin_mask) != fnmask)
14593 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14594 return error_mark_node;
14597 return rs6000_builtin_decls[code];
14600 static void
14601 spe_init_builtins (void)
14603 tree puint_type_node = build_pointer_type (unsigned_type_node);
14604 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14605 const struct builtin_description *d;
14606 size_t i;
14608 tree v2si_ftype_4_v2si
14609 = build_function_type_list (opaque_V2SI_type_node,
14610 opaque_V2SI_type_node,
14611 opaque_V2SI_type_node,
14612 opaque_V2SI_type_node,
14613 opaque_V2SI_type_node,
14614 NULL_TREE);
14616 tree v2sf_ftype_4_v2sf
14617 = build_function_type_list (opaque_V2SF_type_node,
14618 opaque_V2SF_type_node,
14619 opaque_V2SF_type_node,
14620 opaque_V2SF_type_node,
14621 opaque_V2SF_type_node,
14622 NULL_TREE);
14624 tree int_ftype_int_v2si_v2si
14625 = build_function_type_list (integer_type_node,
14626 integer_type_node,
14627 opaque_V2SI_type_node,
14628 opaque_V2SI_type_node,
14629 NULL_TREE);
14631 tree int_ftype_int_v2sf_v2sf
14632 = build_function_type_list (integer_type_node,
14633 integer_type_node,
14634 opaque_V2SF_type_node,
14635 opaque_V2SF_type_node,
14636 NULL_TREE);
14638 tree void_ftype_v2si_puint_int
14639 = build_function_type_list (void_type_node,
14640 opaque_V2SI_type_node,
14641 puint_type_node,
14642 integer_type_node,
14643 NULL_TREE);
14645 tree void_ftype_v2si_puint_char
14646 = build_function_type_list (void_type_node,
14647 opaque_V2SI_type_node,
14648 puint_type_node,
14649 char_type_node,
14650 NULL_TREE);
14652 tree void_ftype_v2si_pv2si_int
14653 = build_function_type_list (void_type_node,
14654 opaque_V2SI_type_node,
14655 opaque_p_V2SI_type_node,
14656 integer_type_node,
14657 NULL_TREE);
14659 tree void_ftype_v2si_pv2si_char
14660 = build_function_type_list (void_type_node,
14661 opaque_V2SI_type_node,
14662 opaque_p_V2SI_type_node,
14663 char_type_node,
14664 NULL_TREE);
14666 tree void_ftype_int
14667 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14669 tree int_ftype_void
14670 = build_function_type_list (integer_type_node, NULL_TREE);
14672 tree v2si_ftype_pv2si_int
14673 = build_function_type_list (opaque_V2SI_type_node,
14674 opaque_p_V2SI_type_node,
14675 integer_type_node,
14676 NULL_TREE);
14678 tree v2si_ftype_puint_int
14679 = build_function_type_list (opaque_V2SI_type_node,
14680 puint_type_node,
14681 integer_type_node,
14682 NULL_TREE);
14684 tree v2si_ftype_pushort_int
14685 = build_function_type_list (opaque_V2SI_type_node,
14686 pushort_type_node,
14687 integer_type_node,
14688 NULL_TREE);
14690 tree v2si_ftype_signed_char
14691 = build_function_type_list (opaque_V2SI_type_node,
14692 signed_char_type_node,
14693 NULL_TREE);
14695 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14697 /* Initialize irregular SPE builtins. */
14699 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14700 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14701 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14702 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14703 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14704 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14705 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14706 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14707 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14708 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14709 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14710 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14711 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14712 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14713 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14714 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14715 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14716 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14718 /* Loads. */
14719 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14720 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14721 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14722 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14723 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14724 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14725 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14726 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14727 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14728 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14729 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14730 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14731 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14732 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14733 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14734 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14735 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14736 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14737 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14738 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14739 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14740 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14742 /* Predicates. */
14743 d = bdesc_spe_predicates;
14744 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14746 tree type;
14748 switch (insn_data[d->icode].operand[1].mode)
14750 case V2SImode:
14751 type = int_ftype_int_v2si_v2si;
14752 break;
14753 case V2SFmode:
14754 type = int_ftype_int_v2sf_v2sf;
14755 break;
14756 default:
14757 gcc_unreachable ();
14760 def_builtin (d->name, type, d->code);
14763 /* Evsel predicates. */
14764 d = bdesc_spe_evsel;
14765 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14767 tree type;
14769 switch (insn_data[d->icode].operand[1].mode)
14771 case V2SImode:
14772 type = v2si_ftype_4_v2si;
14773 break;
14774 case V2SFmode:
14775 type = v2sf_ftype_4_v2sf;
14776 break;
14777 default:
14778 gcc_unreachable ();
14781 def_builtin (d->name, type, d->code);
14785 static void
14786 paired_init_builtins (void)
14788 const struct builtin_description *d;
14789 size_t i;
14791 tree int_ftype_int_v2sf_v2sf
14792 = build_function_type_list (integer_type_node,
14793 integer_type_node,
14794 V2SF_type_node,
14795 V2SF_type_node,
14796 NULL_TREE);
14797 tree pcfloat_type_node =
14798 build_pointer_type (build_qualified_type
14799 (float_type_node, TYPE_QUAL_CONST));
14801 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14802 long_integer_type_node,
14803 pcfloat_type_node,
14804 NULL_TREE);
14805 tree void_ftype_v2sf_long_pcfloat =
14806 build_function_type_list (void_type_node,
14807 V2SF_type_node,
14808 long_integer_type_node,
14809 pcfloat_type_node,
14810 NULL_TREE);
14813 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14814 PAIRED_BUILTIN_LX);
14817 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14818 PAIRED_BUILTIN_STX);
14820 /* Predicates. */
14821 d = bdesc_paired_preds;
14822 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14824 tree type;
14826 if (TARGET_DEBUG_BUILTIN)
14827 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14828 (int)i, get_insn_name (d->icode), (int)d->icode,
14829 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14831 switch (insn_data[d->icode].operand[1].mode)
14833 case V2SFmode:
14834 type = int_ftype_int_v2sf_v2sf;
14835 break;
14836 default:
14837 gcc_unreachable ();
14840 def_builtin (d->name, type, d->code);
14844 static void
14845 altivec_init_builtins (void)
14847 const struct builtin_description *d;
14848 size_t i;
14849 tree ftype;
14850 tree decl;
14852 tree pvoid_type_node = build_pointer_type (void_type_node);
14854 tree pcvoid_type_node
14855 = build_pointer_type (build_qualified_type (void_type_node,
14856 TYPE_QUAL_CONST));
14858 tree int_ftype_opaque
14859 = build_function_type_list (integer_type_node,
14860 opaque_V4SI_type_node, NULL_TREE);
14861 tree opaque_ftype_opaque
14862 = build_function_type_list (integer_type_node, NULL_TREE);
14863 tree opaque_ftype_opaque_int
14864 = build_function_type_list (opaque_V4SI_type_node,
14865 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14866 tree opaque_ftype_opaque_opaque_int
14867 = build_function_type_list (opaque_V4SI_type_node,
14868 opaque_V4SI_type_node, opaque_V4SI_type_node,
14869 integer_type_node, NULL_TREE);
14870 tree int_ftype_int_opaque_opaque
14871 = build_function_type_list (integer_type_node,
14872 integer_type_node, opaque_V4SI_type_node,
14873 opaque_V4SI_type_node, NULL_TREE);
14874 tree int_ftype_int_v4si_v4si
14875 = build_function_type_list (integer_type_node,
14876 integer_type_node, V4SI_type_node,
14877 V4SI_type_node, NULL_TREE);
14878 tree int_ftype_int_v2di_v2di
14879 = build_function_type_list (integer_type_node,
14880 integer_type_node, V2DI_type_node,
14881 V2DI_type_node, NULL_TREE);
14882 tree void_ftype_v4si
14883 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14884 tree v8hi_ftype_void
14885 = build_function_type_list (V8HI_type_node, NULL_TREE);
14886 tree void_ftype_void
14887 = build_function_type_list (void_type_node, NULL_TREE);
14888 tree void_ftype_int
14889 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14891 tree opaque_ftype_long_pcvoid
14892 = build_function_type_list (opaque_V4SI_type_node,
14893 long_integer_type_node, pcvoid_type_node,
14894 NULL_TREE);
14895 tree v16qi_ftype_long_pcvoid
14896 = build_function_type_list (V16QI_type_node,
14897 long_integer_type_node, pcvoid_type_node,
14898 NULL_TREE);
14899 tree v8hi_ftype_long_pcvoid
14900 = build_function_type_list (V8HI_type_node,
14901 long_integer_type_node, pcvoid_type_node,
14902 NULL_TREE);
14903 tree v4si_ftype_long_pcvoid
14904 = build_function_type_list (V4SI_type_node,
14905 long_integer_type_node, pcvoid_type_node,
14906 NULL_TREE);
14907 tree v4sf_ftype_long_pcvoid
14908 = build_function_type_list (V4SF_type_node,
14909 long_integer_type_node, pcvoid_type_node,
14910 NULL_TREE);
14911 tree v2df_ftype_long_pcvoid
14912 = build_function_type_list (V2DF_type_node,
14913 long_integer_type_node, pcvoid_type_node,
14914 NULL_TREE);
14915 tree v2di_ftype_long_pcvoid
14916 = build_function_type_list (V2DI_type_node,
14917 long_integer_type_node, pcvoid_type_node,
14918 NULL_TREE);
14920 tree void_ftype_opaque_long_pvoid
14921 = build_function_type_list (void_type_node,
14922 opaque_V4SI_type_node, long_integer_type_node,
14923 pvoid_type_node, NULL_TREE);
14924 tree void_ftype_v4si_long_pvoid
14925 = build_function_type_list (void_type_node,
14926 V4SI_type_node, long_integer_type_node,
14927 pvoid_type_node, NULL_TREE);
14928 tree void_ftype_v16qi_long_pvoid
14929 = build_function_type_list (void_type_node,
14930 V16QI_type_node, long_integer_type_node,
14931 pvoid_type_node, NULL_TREE);
14932 tree void_ftype_v8hi_long_pvoid
14933 = build_function_type_list (void_type_node,
14934 V8HI_type_node, long_integer_type_node,
14935 pvoid_type_node, NULL_TREE);
14936 tree void_ftype_v4sf_long_pvoid
14937 = build_function_type_list (void_type_node,
14938 V4SF_type_node, long_integer_type_node,
14939 pvoid_type_node, NULL_TREE);
14940 tree void_ftype_v2df_long_pvoid
14941 = build_function_type_list (void_type_node,
14942 V2DF_type_node, long_integer_type_node,
14943 pvoid_type_node, NULL_TREE);
14944 tree void_ftype_v2di_long_pvoid
14945 = build_function_type_list (void_type_node,
14946 V2DI_type_node, long_integer_type_node,
14947 pvoid_type_node, NULL_TREE);
14948 tree int_ftype_int_v8hi_v8hi
14949 = build_function_type_list (integer_type_node,
14950 integer_type_node, V8HI_type_node,
14951 V8HI_type_node, NULL_TREE);
14952 tree int_ftype_int_v16qi_v16qi
14953 = build_function_type_list (integer_type_node,
14954 integer_type_node, V16QI_type_node,
14955 V16QI_type_node, NULL_TREE);
14956 tree int_ftype_int_v4sf_v4sf
14957 = build_function_type_list (integer_type_node,
14958 integer_type_node, V4SF_type_node,
14959 V4SF_type_node, NULL_TREE);
14960 tree int_ftype_int_v2df_v2df
14961 = build_function_type_list (integer_type_node,
14962 integer_type_node, V2DF_type_node,
14963 V2DF_type_node, NULL_TREE);
14964 tree v2di_ftype_v2di
14965 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14966 tree v4si_ftype_v4si
14967 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14968 tree v8hi_ftype_v8hi
14969 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14970 tree v16qi_ftype_v16qi
14971 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14972 tree v4sf_ftype_v4sf
14973 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14974 tree v2df_ftype_v2df
14975 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14976 tree void_ftype_pcvoid_int_int
14977 = build_function_type_list (void_type_node,
14978 pcvoid_type_node, integer_type_node,
14979 integer_type_node, NULL_TREE);
14981 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14982 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14983 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14984 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14985 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14986 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14987 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14988 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14989 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14990 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14991 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14992 ALTIVEC_BUILTIN_LVXL_V2DF);
14993 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14994 ALTIVEC_BUILTIN_LVXL_V2DI);
14995 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14996 ALTIVEC_BUILTIN_LVXL_V4SF);
14997 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14998 ALTIVEC_BUILTIN_LVXL_V4SI);
14999 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
15000 ALTIVEC_BUILTIN_LVXL_V8HI);
15001 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
15002 ALTIVEC_BUILTIN_LVXL_V16QI);
15003 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
15004 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
15005 ALTIVEC_BUILTIN_LVX_V2DF);
15006 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
15007 ALTIVEC_BUILTIN_LVX_V2DI);
15008 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
15009 ALTIVEC_BUILTIN_LVX_V4SF);
15010 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
15011 ALTIVEC_BUILTIN_LVX_V4SI);
15012 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
15013 ALTIVEC_BUILTIN_LVX_V8HI);
15014 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
15015 ALTIVEC_BUILTIN_LVX_V16QI);
15016 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
15017 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
15018 ALTIVEC_BUILTIN_STVX_V2DF);
15019 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
15020 ALTIVEC_BUILTIN_STVX_V2DI);
15021 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
15022 ALTIVEC_BUILTIN_STVX_V4SF);
15023 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
15024 ALTIVEC_BUILTIN_STVX_V4SI);
15025 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
15026 ALTIVEC_BUILTIN_STVX_V8HI);
15027 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
15028 ALTIVEC_BUILTIN_STVX_V16QI);
15029 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
15030 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
15031 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
15032 ALTIVEC_BUILTIN_STVXL_V2DF);
15033 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
15034 ALTIVEC_BUILTIN_STVXL_V2DI);
15035 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
15036 ALTIVEC_BUILTIN_STVXL_V4SF);
15037 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
15038 ALTIVEC_BUILTIN_STVXL_V4SI);
15039 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
15040 ALTIVEC_BUILTIN_STVXL_V8HI);
15041 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
15042 ALTIVEC_BUILTIN_STVXL_V16QI);
15043 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
15044 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
15045 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
15046 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
15047 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
15048 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
15049 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
15050 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
15051 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
15052 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
15053 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
15054 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
15055 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
15056 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
15057 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
15058 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
15060 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
15061 VSX_BUILTIN_LXVD2X_V2DF);
15062 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
15063 VSX_BUILTIN_LXVD2X_V2DI);
15064 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
15065 VSX_BUILTIN_LXVW4X_V4SF);
15066 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
15067 VSX_BUILTIN_LXVW4X_V4SI);
15068 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
15069 VSX_BUILTIN_LXVW4X_V8HI);
15070 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
15071 VSX_BUILTIN_LXVW4X_V16QI);
15072 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
15073 VSX_BUILTIN_STXVD2X_V2DF);
15074 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
15075 VSX_BUILTIN_STXVD2X_V2DI);
15076 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
15077 VSX_BUILTIN_STXVW4X_V4SF);
15078 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
15079 VSX_BUILTIN_STXVW4X_V4SI);
15080 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
15081 VSX_BUILTIN_STXVW4X_V8HI);
15082 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
15083 VSX_BUILTIN_STXVW4X_V16QI);
15084 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
15085 VSX_BUILTIN_VEC_LD);
15086 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
15087 VSX_BUILTIN_VEC_ST);
15089 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
15090 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
15091 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
15093 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
15094 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
15095 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
15096 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
15097 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
15098 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
15099 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
15100 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
15101 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
15102 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
15103 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
15104 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
15106 /* Cell builtins. */
15107 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
15108 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
15109 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
15110 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
15112 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
15113 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
15114 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
15115 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
15117 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
15118 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
15119 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
15120 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
15122 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
15123 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
15124 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15125 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15127 /* Add the DST variants. */
15128 d = bdesc_dst;
15129 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15130 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15132 /* Initialize the predicates. */
15133 d = bdesc_altivec_preds;
15134 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15136 machine_mode mode1;
15137 tree type;
15139 if (rs6000_overloaded_builtin_p (d->code))
15140 mode1 = VOIDmode;
15141 else
15142 mode1 = insn_data[d->icode].operand[1].mode;
15144 switch (mode1)
15146 case VOIDmode:
15147 type = int_ftype_int_opaque_opaque;
15148 break;
15149 case V2DImode:
15150 type = int_ftype_int_v2di_v2di;
15151 break;
15152 case V4SImode:
15153 type = int_ftype_int_v4si_v4si;
15154 break;
15155 case V8HImode:
15156 type = int_ftype_int_v8hi_v8hi;
15157 break;
15158 case V16QImode:
15159 type = int_ftype_int_v16qi_v16qi;
15160 break;
15161 case V4SFmode:
15162 type = int_ftype_int_v4sf_v4sf;
15163 break;
15164 case V2DFmode:
15165 type = int_ftype_int_v2df_v2df;
15166 break;
15167 default:
15168 gcc_unreachable ();
15171 def_builtin (d->name, type, d->code);
15174 /* Initialize the abs* operators. */
15175 d = bdesc_abs;
15176 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15178 machine_mode mode0;
15179 tree type;
15181 mode0 = insn_data[d->icode].operand[0].mode;
15183 switch (mode0)
15185 case V2DImode:
15186 type = v2di_ftype_v2di;
15187 break;
15188 case V4SImode:
15189 type = v4si_ftype_v4si;
15190 break;
15191 case V8HImode:
15192 type = v8hi_ftype_v8hi;
15193 break;
15194 case V16QImode:
15195 type = v16qi_ftype_v16qi;
15196 break;
15197 case V4SFmode:
15198 type = v4sf_ftype_v4sf;
15199 break;
15200 case V2DFmode:
15201 type = v2df_ftype_v2df;
15202 break;
15203 default:
15204 gcc_unreachable ();
15207 def_builtin (d->name, type, d->code);
15210 /* Initialize target builtin that implements
15211 targetm.vectorize.builtin_mask_for_load. */
15213 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15214 v16qi_ftype_long_pcvoid,
15215 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15216 BUILT_IN_MD, NULL, NULL_TREE);
15217 TREE_READONLY (decl) = 1;
15218 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15219 altivec_builtin_mask_for_load = decl;
15221 /* Access to the vec_init patterns. */
15222 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15223 integer_type_node, integer_type_node,
15224 integer_type_node, NULL_TREE);
15225 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15227 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15228 short_integer_type_node,
15229 short_integer_type_node,
15230 short_integer_type_node,
15231 short_integer_type_node,
15232 short_integer_type_node,
15233 short_integer_type_node,
15234 short_integer_type_node, NULL_TREE);
15235 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15237 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15238 char_type_node, char_type_node,
15239 char_type_node, char_type_node,
15240 char_type_node, char_type_node,
15241 char_type_node, char_type_node,
15242 char_type_node, char_type_node,
15243 char_type_node, char_type_node,
15244 char_type_node, char_type_node,
15245 char_type_node, NULL_TREE);
15246 def_builtin ("__builtin_vec_init_v16qi", ftype,
15247 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15249 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15250 float_type_node, float_type_node,
15251 float_type_node, NULL_TREE);
15252 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15254 /* VSX builtins. */
15255 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15256 double_type_node, NULL_TREE);
15257 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15259 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15260 intDI_type_node, NULL_TREE);
15261 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15263 /* Access to the vec_set patterns. */
15264 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15265 intSI_type_node,
15266 integer_type_node, NULL_TREE);
15267 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15269 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15270 intHI_type_node,
15271 integer_type_node, NULL_TREE);
15272 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15274 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15275 intQI_type_node,
15276 integer_type_node, NULL_TREE);
15277 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15279 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15280 float_type_node,
15281 integer_type_node, NULL_TREE);
15282 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15284 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15285 double_type_node,
15286 integer_type_node, NULL_TREE);
15287 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15289 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15290 intDI_type_node,
15291 integer_type_node, NULL_TREE);
15292 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15294 /* Access to the vec_extract patterns. */
15295 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15296 integer_type_node, NULL_TREE);
15297 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15299 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15300 integer_type_node, NULL_TREE);
15301 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15303 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15304 integer_type_node, NULL_TREE);
15305 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15307 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15308 integer_type_node, NULL_TREE);
15309 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15311 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15312 integer_type_node, NULL_TREE);
15313 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15315 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15316 integer_type_node, NULL_TREE);
15317 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15320 if (V1TI_type_node)
15322 tree v1ti_ftype_long_pcvoid
15323 = build_function_type_list (V1TI_type_node,
15324 long_integer_type_node, pcvoid_type_node,
15325 NULL_TREE);
15326 tree void_ftype_v1ti_long_pvoid
15327 = build_function_type_list (void_type_node,
15328 V1TI_type_node, long_integer_type_node,
15329 pvoid_type_node, NULL_TREE);
15330 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15331 VSX_BUILTIN_LXVD2X_V1TI);
15332 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15333 VSX_BUILTIN_STXVD2X_V1TI);
15334 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15335 NULL_TREE, NULL_TREE);
15336 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15337 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15338 intTI_type_node,
15339 integer_type_node, NULL_TREE);
15340 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15341 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15342 integer_type_node, NULL_TREE);
15343 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15348 static void
15349 htm_init_builtins (void)
15351 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15352 const struct builtin_description *d;
15353 size_t i;
15355 d = bdesc_htm;
15356 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15358 tree op[MAX_HTM_OPERANDS], type;
15359 HOST_WIDE_INT mask = d->mask;
15360 unsigned attr = rs6000_builtin_info[d->code].attr;
15361 bool void_func = (attr & RS6000_BTC_VOID);
15362 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15363 int nopnds = 0;
15364 tree gpr_type_node;
15365 tree rettype;
15366 tree argtype;
15368 if (TARGET_32BIT && TARGET_POWERPC64)
15369 gpr_type_node = long_long_unsigned_type_node;
15370 else
15371 gpr_type_node = long_unsigned_type_node;
15373 if (attr & RS6000_BTC_SPR)
15375 rettype = gpr_type_node;
15376 argtype = gpr_type_node;
15378 else if (d->code == HTM_BUILTIN_TABORTDC
15379 || d->code == HTM_BUILTIN_TABORTDCI)
15381 rettype = unsigned_type_node;
15382 argtype = gpr_type_node;
15384 else
15386 rettype = unsigned_type_node;
15387 argtype = unsigned_type_node;
15390 if ((mask & builtin_mask) != mask)
15392 if (TARGET_DEBUG_BUILTIN)
15393 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15394 continue;
15397 if (d->name == 0)
15399 if (TARGET_DEBUG_BUILTIN)
15400 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15401 (long unsigned) i);
15402 continue;
15405 op[nopnds++] = (void_func) ? void_type_node : rettype;
15407 if (attr_args == RS6000_BTC_UNARY)
15408 op[nopnds++] = argtype;
15409 else if (attr_args == RS6000_BTC_BINARY)
15411 op[nopnds++] = argtype;
15412 op[nopnds++] = argtype;
15414 else if (attr_args == RS6000_BTC_TERNARY)
15416 op[nopnds++] = argtype;
15417 op[nopnds++] = argtype;
15418 op[nopnds++] = argtype;
15421 switch (nopnds)
15423 case 1:
15424 type = build_function_type_list (op[0], NULL_TREE);
15425 break;
15426 case 2:
15427 type = build_function_type_list (op[0], op[1], NULL_TREE);
15428 break;
15429 case 3:
15430 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15431 break;
15432 case 4:
15433 type = build_function_type_list (op[0], op[1], op[2], op[3],
15434 NULL_TREE);
15435 break;
15436 default:
15437 gcc_unreachable ();
15440 def_builtin (d->name, type, d->code);
15444 /* Hash function for builtin functions with up to 3 arguments and a return
15445 type. */
15446 hashval_t
15447 builtin_hasher::hash (builtin_hash_struct *bh)
15449 unsigned ret = 0;
15450 int i;
15452 for (i = 0; i < 4; i++)
15454 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15455 ret = (ret * 2) + bh->uns_p[i];
15458 return ret;
15461 /* Compare builtin hash entries H1 and H2 for equivalence. */
15462 bool
15463 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15465 return ((p1->mode[0] == p2->mode[0])
15466 && (p1->mode[1] == p2->mode[1])
15467 && (p1->mode[2] == p2->mode[2])
15468 && (p1->mode[3] == p2->mode[3])
15469 && (p1->uns_p[0] == p2->uns_p[0])
15470 && (p1->uns_p[1] == p2->uns_p[1])
15471 && (p1->uns_p[2] == p2->uns_p[2])
15472 && (p1->uns_p[3] == p2->uns_p[3]));
15475 /* Map types for builtin functions with an explicit return type and up to 3
15476 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15477 of the argument. */
15478 static tree
15479 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15480 machine_mode mode_arg1, machine_mode mode_arg2,
15481 enum rs6000_builtins builtin, const char *name)
15483 struct builtin_hash_struct h;
15484 struct builtin_hash_struct *h2;
15485 int num_args = 3;
15486 int i;
15487 tree ret_type = NULL_TREE;
15488 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15490 /* Create builtin_hash_table. */
15491 if (builtin_hash_table == NULL)
15492 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15494 h.type = NULL_TREE;
15495 h.mode[0] = mode_ret;
15496 h.mode[1] = mode_arg0;
15497 h.mode[2] = mode_arg1;
15498 h.mode[3] = mode_arg2;
15499 h.uns_p[0] = 0;
15500 h.uns_p[1] = 0;
15501 h.uns_p[2] = 0;
15502 h.uns_p[3] = 0;
15504 /* If the builtin is a type that produces unsigned results or takes unsigned
15505 arguments, and it is returned as a decl for the vectorizer (such as
15506 widening multiplies, permute), make sure the arguments and return value
15507 are type correct. */
15508 switch (builtin)
15510 /* unsigned 1 argument functions. */
15511 case CRYPTO_BUILTIN_VSBOX:
15512 case P8V_BUILTIN_VGBBD:
15513 case MISC_BUILTIN_CDTBCD:
15514 case MISC_BUILTIN_CBCDTD:
15515 h.uns_p[0] = 1;
15516 h.uns_p[1] = 1;
15517 break;
15519 /* unsigned 2 argument functions. */
15520 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15521 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15522 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15523 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15524 case CRYPTO_BUILTIN_VCIPHER:
15525 case CRYPTO_BUILTIN_VCIPHERLAST:
15526 case CRYPTO_BUILTIN_VNCIPHER:
15527 case CRYPTO_BUILTIN_VNCIPHERLAST:
15528 case CRYPTO_BUILTIN_VPMSUMB:
15529 case CRYPTO_BUILTIN_VPMSUMH:
15530 case CRYPTO_BUILTIN_VPMSUMW:
15531 case CRYPTO_BUILTIN_VPMSUMD:
15532 case CRYPTO_BUILTIN_VPMSUM:
15533 case MISC_BUILTIN_ADDG6S:
15534 case MISC_BUILTIN_DIVWEU:
15535 case MISC_BUILTIN_DIVWEUO:
15536 case MISC_BUILTIN_DIVDEU:
15537 case MISC_BUILTIN_DIVDEUO:
15538 h.uns_p[0] = 1;
15539 h.uns_p[1] = 1;
15540 h.uns_p[2] = 1;
15541 break;
15543 /* unsigned 3 argument functions. */
15544 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15545 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15546 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15547 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15548 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15549 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15550 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15551 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15552 case VSX_BUILTIN_VPERM_16QI_UNS:
15553 case VSX_BUILTIN_VPERM_8HI_UNS:
15554 case VSX_BUILTIN_VPERM_4SI_UNS:
15555 case VSX_BUILTIN_VPERM_2DI_UNS:
15556 case VSX_BUILTIN_XXSEL_16QI_UNS:
15557 case VSX_BUILTIN_XXSEL_8HI_UNS:
15558 case VSX_BUILTIN_XXSEL_4SI_UNS:
15559 case VSX_BUILTIN_XXSEL_2DI_UNS:
15560 case CRYPTO_BUILTIN_VPERMXOR:
15561 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15562 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15563 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15564 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15565 case CRYPTO_BUILTIN_VSHASIGMAW:
15566 case CRYPTO_BUILTIN_VSHASIGMAD:
15567 case CRYPTO_BUILTIN_VSHASIGMA:
15568 h.uns_p[0] = 1;
15569 h.uns_p[1] = 1;
15570 h.uns_p[2] = 1;
15571 h.uns_p[3] = 1;
15572 break;
15574 /* signed permute functions with unsigned char mask. */
15575 case ALTIVEC_BUILTIN_VPERM_16QI:
15576 case ALTIVEC_BUILTIN_VPERM_8HI:
15577 case ALTIVEC_BUILTIN_VPERM_4SI:
15578 case ALTIVEC_BUILTIN_VPERM_4SF:
15579 case ALTIVEC_BUILTIN_VPERM_2DI:
15580 case ALTIVEC_BUILTIN_VPERM_2DF:
15581 case VSX_BUILTIN_VPERM_16QI:
15582 case VSX_BUILTIN_VPERM_8HI:
15583 case VSX_BUILTIN_VPERM_4SI:
15584 case VSX_BUILTIN_VPERM_4SF:
15585 case VSX_BUILTIN_VPERM_2DI:
15586 case VSX_BUILTIN_VPERM_2DF:
15587 h.uns_p[3] = 1;
15588 break;
15590 /* unsigned args, signed return. */
15591 case VSX_BUILTIN_XVCVUXDDP_UNS:
15592 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15593 h.uns_p[1] = 1;
15594 break;
15596 /* signed args, unsigned return. */
15597 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15598 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15599 case MISC_BUILTIN_UNPACK_TD:
15600 case MISC_BUILTIN_UNPACK_V1TI:
15601 h.uns_p[0] = 1;
15602 break;
15604 /* unsigned arguments for 128-bit pack instructions. */
15605 case MISC_BUILTIN_PACK_TD:
15606 case MISC_BUILTIN_PACK_V1TI:
15607 h.uns_p[1] = 1;
15608 h.uns_p[2] = 1;
15609 break;
15611 default:
15612 break;
15615 /* Figure out how many args are present. */
15616 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15617 num_args--;
15619 if (num_args == 0)
15620 fatal_error (input_location,
15621 "internal error: builtin function %s had no type", name);
15623 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15624 if (!ret_type && h.uns_p[0])
15625 ret_type = builtin_mode_to_type[h.mode[0]][0];
15627 if (!ret_type)
15628 fatal_error (input_location,
15629 "internal error: builtin function %s had an unexpected "
15630 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15632 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15633 arg_type[i] = NULL_TREE;
15635 for (i = 0; i < num_args; i++)
15637 int m = (int) h.mode[i+1];
15638 int uns_p = h.uns_p[i+1];
15640 arg_type[i] = builtin_mode_to_type[m][uns_p];
15641 if (!arg_type[i] && uns_p)
15642 arg_type[i] = builtin_mode_to_type[m][0];
15644 if (!arg_type[i])
15645 fatal_error (input_location,
15646 "internal error: builtin function %s, argument %d "
15647 "had unexpected argument type %s", name, i,
15648 GET_MODE_NAME (m));
15651 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15652 if (*found == NULL)
15654 h2 = ggc_alloc<builtin_hash_struct> ();
15655 *h2 = h;
15656 *found = h2;
15658 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15659 arg_type[2], NULL_TREE);
15662 return (*found)->type;
15665 static void
15666 rs6000_common_init_builtins (void)
15668 const struct builtin_description *d;
15669 size_t i;
15671 tree opaque_ftype_opaque = NULL_TREE;
15672 tree opaque_ftype_opaque_opaque = NULL_TREE;
15673 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15674 tree v2si_ftype_qi = NULL_TREE;
15675 tree v2si_ftype_v2si_qi = NULL_TREE;
15676 tree v2si_ftype_int_qi = NULL_TREE;
15677 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15679 if (!TARGET_PAIRED_FLOAT)
15681 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15682 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15685 /* Paired and SPE builtins are only available if you build a compiler with
15686 the appropriate options, so only create those builtins with the
15687 appropriate compiler option. Create Altivec and VSX builtins on machines
15688 with at least the general purpose extensions (970 and newer) to allow the
15689 use of the target attribute.. */
15691 if (TARGET_EXTRA_BUILTINS)
15692 builtin_mask |= RS6000_BTM_COMMON;
15694 /* Add the ternary operators. */
15695 d = bdesc_3arg;
15696 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15698 tree type;
15699 HOST_WIDE_INT mask = d->mask;
15701 if ((mask & builtin_mask) != mask)
15703 if (TARGET_DEBUG_BUILTIN)
15704 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15705 continue;
15708 if (rs6000_overloaded_builtin_p (d->code))
15710 if (! (type = opaque_ftype_opaque_opaque_opaque))
15711 type = opaque_ftype_opaque_opaque_opaque
15712 = build_function_type_list (opaque_V4SI_type_node,
15713 opaque_V4SI_type_node,
15714 opaque_V4SI_type_node,
15715 opaque_V4SI_type_node,
15716 NULL_TREE);
15718 else
15720 enum insn_code icode = d->icode;
15721 if (d->name == 0)
15723 if (TARGET_DEBUG_BUILTIN)
15724 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15725 (long unsigned)i);
15727 continue;
15730 if (icode == CODE_FOR_nothing)
15732 if (TARGET_DEBUG_BUILTIN)
15733 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15734 d->name);
15736 continue;
15739 type = builtin_function_type (insn_data[icode].operand[0].mode,
15740 insn_data[icode].operand[1].mode,
15741 insn_data[icode].operand[2].mode,
15742 insn_data[icode].operand[3].mode,
15743 d->code, d->name);
15746 def_builtin (d->name, type, d->code);
15749 /* Add the binary operators. */
15750 d = bdesc_2arg;
15751 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15753 machine_mode mode0, mode1, mode2;
15754 tree type;
15755 HOST_WIDE_INT mask = d->mask;
15757 if ((mask & builtin_mask) != mask)
15759 if (TARGET_DEBUG_BUILTIN)
15760 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15761 continue;
15764 if (rs6000_overloaded_builtin_p (d->code))
15766 if (! (type = opaque_ftype_opaque_opaque))
15767 type = opaque_ftype_opaque_opaque
15768 = build_function_type_list (opaque_V4SI_type_node,
15769 opaque_V4SI_type_node,
15770 opaque_V4SI_type_node,
15771 NULL_TREE);
15773 else
15775 enum insn_code icode = d->icode;
15776 if (d->name == 0)
15778 if (TARGET_DEBUG_BUILTIN)
15779 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15780 (long unsigned)i);
15782 continue;
15785 if (icode == CODE_FOR_nothing)
15787 if (TARGET_DEBUG_BUILTIN)
15788 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15789 d->name);
15791 continue;
15794 mode0 = insn_data[icode].operand[0].mode;
15795 mode1 = insn_data[icode].operand[1].mode;
15796 mode2 = insn_data[icode].operand[2].mode;
15798 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15800 if (! (type = v2si_ftype_v2si_qi))
15801 type = v2si_ftype_v2si_qi
15802 = build_function_type_list (opaque_V2SI_type_node,
15803 opaque_V2SI_type_node,
15804 char_type_node,
15805 NULL_TREE);
15808 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15809 && mode2 == QImode)
15811 if (! (type = v2si_ftype_int_qi))
15812 type = v2si_ftype_int_qi
15813 = build_function_type_list (opaque_V2SI_type_node,
15814 integer_type_node,
15815 char_type_node,
15816 NULL_TREE);
15819 else
15820 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15821 d->code, d->name);
15824 def_builtin (d->name, type, d->code);
15827 /* Add the simple unary operators. */
15828 d = bdesc_1arg;
15829 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15831 machine_mode mode0, mode1;
15832 tree type;
15833 HOST_WIDE_INT mask = d->mask;
15835 if ((mask & builtin_mask) != mask)
15837 if (TARGET_DEBUG_BUILTIN)
15838 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15839 continue;
15842 if (rs6000_overloaded_builtin_p (d->code))
15844 if (! (type = opaque_ftype_opaque))
15845 type = opaque_ftype_opaque
15846 = build_function_type_list (opaque_V4SI_type_node,
15847 opaque_V4SI_type_node,
15848 NULL_TREE);
15850 else
15852 enum insn_code icode = d->icode;
15853 if (d->name == 0)
15855 if (TARGET_DEBUG_BUILTIN)
15856 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15857 (long unsigned)i);
15859 continue;
15862 if (icode == CODE_FOR_nothing)
15864 if (TARGET_DEBUG_BUILTIN)
15865 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15866 d->name);
15868 continue;
15871 mode0 = insn_data[icode].operand[0].mode;
15872 mode1 = insn_data[icode].operand[1].mode;
15874 if (mode0 == V2SImode && mode1 == QImode)
15876 if (! (type = v2si_ftype_qi))
15877 type = v2si_ftype_qi
15878 = build_function_type_list (opaque_V2SI_type_node,
15879 char_type_node,
15880 NULL_TREE);
15883 else
15884 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15885 d->code, d->name);
15888 def_builtin (d->name, type, d->code);
15892 static void
15893 rs6000_init_libfuncs (void)
15895 if (!TARGET_IEEEQUAD)
15896 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15897 if (!TARGET_XL_COMPAT)
15899 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15900 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15901 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15902 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15904 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15906 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15907 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15908 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15909 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15910 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15911 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15912 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15914 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15915 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15916 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15917 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15918 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15919 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15920 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15921 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15924 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15925 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15927 else
15929 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15930 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15931 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15932 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15934 else
15936 /* 32-bit SVR4 quad floating point routines. */
15938 set_optab_libfunc (add_optab, TFmode, "_q_add");
15939 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15940 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15941 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15942 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15943 if (TARGET_PPC_GPOPT)
15944 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15946 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15947 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15948 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15949 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15950 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15951 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15953 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15954 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15955 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15956 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15957 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15958 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15959 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15960 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15965 /* Expand a block clear operation, and return 1 if successful. Return 0
15966 if we should let the compiler generate normal code.
15968 operands[0] is the destination
15969 operands[1] is the length
15970 operands[3] is the alignment */
15973 expand_block_clear (rtx operands[])
15975 rtx orig_dest = operands[0];
15976 rtx bytes_rtx = operands[1];
15977 rtx align_rtx = operands[3];
15978 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15979 HOST_WIDE_INT align;
15980 HOST_WIDE_INT bytes;
15981 int offset;
15982 int clear_bytes;
15983 int clear_step;
15985 /* If this is not a fixed size move, just call memcpy */
15986 if (! constp)
15987 return 0;
15989 /* This must be a fixed size alignment */
15990 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15991 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15993 /* Anything to clear? */
15994 bytes = INTVAL (bytes_rtx);
15995 if (bytes <= 0)
15996 return 1;
15998 /* Use the builtin memset after a point, to avoid huge code bloat.
15999 When optimize_size, avoid any significant code bloat; calling
16000 memset is about 4 instructions, so allow for one instruction to
16001 load zero and three to do clearing. */
16002 if (TARGET_ALTIVEC && align >= 128)
16003 clear_step = 16;
16004 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
16005 clear_step = 8;
16006 else if (TARGET_SPE && align >= 64)
16007 clear_step = 8;
16008 else
16009 clear_step = 4;
16011 if (optimize_size && bytes > 3 * clear_step)
16012 return 0;
16013 if (! optimize_size && bytes > 8 * clear_step)
16014 return 0;
16016 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
16018 machine_mode mode = BLKmode;
16019 rtx dest;
16021 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
16023 clear_bytes = 16;
16024 mode = V4SImode;
16026 else if (bytes >= 8 && TARGET_SPE && align >= 64)
16028 clear_bytes = 8;
16029 mode = V2SImode;
16031 else if (bytes >= 8 && TARGET_POWERPC64
16032 && (align >= 64 || !STRICT_ALIGNMENT))
16034 clear_bytes = 8;
16035 mode = DImode;
16036 if (offset == 0 && align < 64)
16038 rtx addr;
16040 /* If the address form is reg+offset with offset not a
16041 multiple of four, reload into reg indirect form here
16042 rather than waiting for reload. This way we get one
16043 reload, not one per store. */
16044 addr = XEXP (orig_dest, 0);
16045 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16046 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16047 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16049 addr = copy_addr_to_reg (addr);
16050 orig_dest = replace_equiv_address (orig_dest, addr);
16054 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16055 { /* move 4 bytes */
16056 clear_bytes = 4;
16057 mode = SImode;
16059 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16060 { /* move 2 bytes */
16061 clear_bytes = 2;
16062 mode = HImode;
16064 else /* move 1 byte at a time */
16066 clear_bytes = 1;
16067 mode = QImode;
16070 dest = adjust_address (orig_dest, mode, offset);
16072 emit_move_insn (dest, CONST0_RTX (mode));
16075 return 1;
16079 /* Expand a block move operation, and return 1 if successful. Return 0
16080 if we should let the compiler generate normal code.
16082 operands[0] is the destination
16083 operands[1] is the source
16084 operands[2] is the length
16085 operands[3] is the alignment */
16087 #define MAX_MOVE_REG 4
16090 expand_block_move (rtx operands[])
16092 rtx orig_dest = operands[0];
16093 rtx orig_src = operands[1];
16094 rtx bytes_rtx = operands[2];
16095 rtx align_rtx = operands[3];
16096 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
16097 int align;
16098 int bytes;
16099 int offset;
16100 int move_bytes;
16101 rtx stores[MAX_MOVE_REG];
16102 int num_reg = 0;
16104 /* If this is not a fixed size move, just call memcpy */
16105 if (! constp)
16106 return 0;
16108 /* This must be a fixed size alignment */
16109 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16110 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16112 /* Anything to move? */
16113 bytes = INTVAL (bytes_rtx);
16114 if (bytes <= 0)
16115 return 1;
16117 if (bytes > rs6000_block_move_inline_limit)
16118 return 0;
16120 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
16122 union {
16123 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
16124 rtx (*mov) (rtx, rtx);
16125 } gen_func;
16126 machine_mode mode = BLKmode;
16127 rtx src, dest;
16129 /* Altivec first, since it will be faster than a string move
16130 when it applies, and usually not significantly larger. */
16131 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
16133 move_bytes = 16;
16134 mode = V4SImode;
16135 gen_func.mov = gen_movv4si;
16137 else if (TARGET_SPE && bytes >= 8 && align >= 64)
16139 move_bytes = 8;
16140 mode = V2SImode;
16141 gen_func.mov = gen_movv2si;
16143 else if (TARGET_STRING
16144 && bytes > 24 /* move up to 32 bytes at a time */
16145 && ! fixed_regs[5]
16146 && ! fixed_regs[6]
16147 && ! fixed_regs[7]
16148 && ! fixed_regs[8]
16149 && ! fixed_regs[9]
16150 && ! fixed_regs[10]
16151 && ! fixed_regs[11]
16152 && ! fixed_regs[12])
16154 move_bytes = (bytes > 32) ? 32 : bytes;
16155 gen_func.movmemsi = gen_movmemsi_8reg;
16157 else if (TARGET_STRING
16158 && bytes > 16 /* move up to 24 bytes at a time */
16159 && ! fixed_regs[5]
16160 && ! fixed_regs[6]
16161 && ! fixed_regs[7]
16162 && ! fixed_regs[8]
16163 && ! fixed_regs[9]
16164 && ! fixed_regs[10])
16166 move_bytes = (bytes > 24) ? 24 : bytes;
16167 gen_func.movmemsi = gen_movmemsi_6reg;
16169 else if (TARGET_STRING
16170 && bytes > 8 /* move up to 16 bytes at a time */
16171 && ! fixed_regs[5]
16172 && ! fixed_regs[6]
16173 && ! fixed_regs[7]
16174 && ! fixed_regs[8])
16176 move_bytes = (bytes > 16) ? 16 : bytes;
16177 gen_func.movmemsi = gen_movmemsi_4reg;
16179 else if (bytes >= 8 && TARGET_POWERPC64
16180 && (align >= 64 || !STRICT_ALIGNMENT))
16182 move_bytes = 8;
16183 mode = DImode;
16184 gen_func.mov = gen_movdi;
16185 if (offset == 0 && align < 64)
16187 rtx addr;
16189 /* If the address form is reg+offset with offset not a
16190 multiple of four, reload into reg indirect form here
16191 rather than waiting for reload. This way we get one
16192 reload, not one per load and/or store. */
16193 addr = XEXP (orig_dest, 0);
16194 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16195 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16196 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16198 addr = copy_addr_to_reg (addr);
16199 orig_dest = replace_equiv_address (orig_dest, addr);
16201 addr = XEXP (orig_src, 0);
16202 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16203 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16204 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16206 addr = copy_addr_to_reg (addr);
16207 orig_src = replace_equiv_address (orig_src, addr);
16211 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16212 { /* move up to 8 bytes at a time */
16213 move_bytes = (bytes > 8) ? 8 : bytes;
16214 gen_func.movmemsi = gen_movmemsi_2reg;
16216 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16217 { /* move 4 bytes */
16218 move_bytes = 4;
16219 mode = SImode;
16220 gen_func.mov = gen_movsi;
16222 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16223 { /* move 2 bytes */
16224 move_bytes = 2;
16225 mode = HImode;
16226 gen_func.mov = gen_movhi;
16228 else if (TARGET_STRING && bytes > 1)
16229 { /* move up to 4 bytes at a time */
16230 move_bytes = (bytes > 4) ? 4 : bytes;
16231 gen_func.movmemsi = gen_movmemsi_1reg;
16233 else /* move 1 byte at a time */
16235 move_bytes = 1;
16236 mode = QImode;
16237 gen_func.mov = gen_movqi;
16240 src = adjust_address (orig_src, mode, offset);
16241 dest = adjust_address (orig_dest, mode, offset);
16243 if (mode != BLKmode)
16245 rtx tmp_reg = gen_reg_rtx (mode);
16247 emit_insn ((*gen_func.mov) (tmp_reg, src));
16248 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16251 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16253 int i;
16254 for (i = 0; i < num_reg; i++)
16255 emit_insn (stores[i]);
16256 num_reg = 0;
16259 if (mode == BLKmode)
16261 /* Move the address into scratch registers. The movmemsi
16262 patterns require zero offset. */
16263 if (!REG_P (XEXP (src, 0)))
16265 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16266 src = replace_equiv_address (src, src_reg);
16268 set_mem_size (src, move_bytes);
16270 if (!REG_P (XEXP (dest, 0)))
16272 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16273 dest = replace_equiv_address (dest, dest_reg);
16275 set_mem_size (dest, move_bytes);
16277 emit_insn ((*gen_func.movmemsi) (dest, src,
16278 GEN_INT (move_bytes & 31),
16279 align_rtx));
16283 return 1;
16287 /* Return a string to perform a load_multiple operation.
16288 operands[0] is the vector.
16289 operands[1] is the source address.
16290 operands[2] is the first destination register. */
16292 const char *
16293 rs6000_output_load_multiple (rtx operands[3])
16295 /* We have to handle the case where the pseudo used to contain the address
16296 is assigned to one of the output registers. */
16297 int i, j;
16298 int words = XVECLEN (operands[0], 0);
16299 rtx xop[10];
16301 if (XVECLEN (operands[0], 0) == 1)
16302 return "lwz %2,0(%1)";
16304 for (i = 0; i < words; i++)
16305 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16307 if (i == words-1)
16309 xop[0] = GEN_INT (4 * (words-1));
16310 xop[1] = operands[1];
16311 xop[2] = operands[2];
16312 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16313 return "";
16315 else if (i == 0)
16317 xop[0] = GEN_INT (4 * (words-1));
16318 xop[1] = operands[1];
16319 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16320 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16321 return "";
16323 else
16325 for (j = 0; j < words; j++)
16326 if (j != i)
16328 xop[0] = GEN_INT (j * 4);
16329 xop[1] = operands[1];
16330 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16331 output_asm_insn ("lwz %2,%0(%1)", xop);
16333 xop[0] = GEN_INT (i * 4);
16334 xop[1] = operands[1];
16335 output_asm_insn ("lwz %1,%0(%1)", xop);
16336 return "";
16340 return "lswi %2,%1,%N0";
16344 /* A validation routine: say whether CODE, a condition code, and MODE
16345 match. The other alternatives either don't make sense or should
16346 never be generated. */
16348 void
16349 validate_condition_mode (enum rtx_code code, machine_mode mode)
16351 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16352 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16353 && GET_MODE_CLASS (mode) == MODE_CC);
16355 /* These don't make sense. */
16356 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16357 || mode != CCUNSmode);
16359 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16360 || mode == CCUNSmode);
16362 gcc_assert (mode == CCFPmode
16363 || (code != ORDERED && code != UNORDERED
16364 && code != UNEQ && code != LTGT
16365 && code != UNGT && code != UNLT
16366 && code != UNGE && code != UNLE));
16368 /* These should never be generated except for
16369 flag_finite_math_only. */
16370 gcc_assert (mode != CCFPmode
16371 || flag_finite_math_only
16372 || (code != LE && code != GE
16373 && code != UNEQ && code != LTGT
16374 && code != UNGT && code != UNLT));
16376 /* These are invalid; the information is not there. */
16377 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16381 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16382 mask required to convert the result of a rotate insn into a shift
16383 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16386 includes_lshift_p (rtx shiftop, rtx andop)
16388 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16390 shift_mask <<= INTVAL (shiftop);
16392 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16395 /* Similar, but for right shift. */
16398 includes_rshift_p (rtx shiftop, rtx andop)
16400 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16402 shift_mask >>= INTVAL (shiftop);
16404 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16407 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16408 to perform a left shift. It must have exactly SHIFTOP least
16409 significant 0's, then one or more 1's, then zero or more 0's. */
16412 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16414 if (GET_CODE (andop) == CONST_INT)
16416 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16418 c = INTVAL (andop);
16419 if (c == 0 || c == HOST_WIDE_INT_M1U)
16420 return 0;
16422 shift_mask = HOST_WIDE_INT_M1U;
16423 shift_mask <<= INTVAL (shiftop);
16425 /* Find the least significant one bit. */
16426 lsb = c & -c;
16428 /* It must coincide with the LSB of the shift mask. */
16429 if (-lsb != shift_mask)
16430 return 0;
16432 /* Invert to look for the next transition (if any). */
16433 c = ~c;
16435 /* Remove the low group of ones (originally low group of zeros). */
16436 c &= -lsb;
16438 /* Again find the lsb, and check we have all 1's above. */
16439 lsb = c & -c;
16440 return c == -lsb;
16442 else
16443 return 0;
16446 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16447 to perform a left shift. It must have SHIFTOP or more least
16448 significant 0's, with the remainder of the word 1's. */
16451 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16453 if (GET_CODE (andop) == CONST_INT)
16455 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16457 shift_mask = HOST_WIDE_INT_M1U;
16458 shift_mask <<= INTVAL (shiftop);
16459 c = INTVAL (andop);
16461 /* Find the least significant one bit. */
16462 lsb = c & -c;
16464 /* It must be covered by the shift mask.
16465 This test also rejects c == 0. */
16466 if ((lsb & shift_mask) == 0)
16467 return 0;
16469 /* Check we have all 1's above the transition, and reject all 1's. */
16470 return c == -lsb && lsb != 1;
16472 else
16473 return 0;
16476 /* Return 1 if operands will generate a valid arguments to rlwimi
16477 instruction for insert with right shift in 64-bit mode. The mask may
16478 not start on the first bit or stop on the last bit because wrap-around
16479 effects of instruction do not correspond to semantics of RTL insn. */
16482 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16484 if (INTVAL (startop) > 32
16485 && INTVAL (startop) < 64
16486 && INTVAL (sizeop) > 1
16487 && INTVAL (sizeop) + INTVAL (startop) < 64
16488 && INTVAL (shiftop) > 0
16489 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16490 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16491 return 1;
16493 return 0;
16496 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16497 for lfq and stfq insns iff the registers are hard registers. */
16500 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16502 /* We might have been passed a SUBREG. */
16503 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16504 return 0;
16506 /* We might have been passed non floating point registers. */
16507 if (!FP_REGNO_P (REGNO (reg1))
16508 || !FP_REGNO_P (REGNO (reg2)))
16509 return 0;
16511 return (REGNO (reg1) == REGNO (reg2) - 1);
16514 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16515 addr1 and addr2 must be in consecutive memory locations
16516 (addr2 == addr1 + 8). */
16519 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16521 rtx addr1, addr2;
16522 unsigned int reg1, reg2;
16523 int offset1, offset2;
16525 /* The mems cannot be volatile. */
16526 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16527 return 0;
16529 addr1 = XEXP (mem1, 0);
16530 addr2 = XEXP (mem2, 0);
16532 /* Extract an offset (if used) from the first addr. */
16533 if (GET_CODE (addr1) == PLUS)
16535 /* If not a REG, return zero. */
16536 if (GET_CODE (XEXP (addr1, 0)) != REG)
16537 return 0;
16538 else
16540 reg1 = REGNO (XEXP (addr1, 0));
16541 /* The offset must be constant! */
16542 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16543 return 0;
16544 offset1 = INTVAL (XEXP (addr1, 1));
16547 else if (GET_CODE (addr1) != REG)
16548 return 0;
16549 else
16551 reg1 = REGNO (addr1);
16552 /* This was a simple (mem (reg)) expression. Offset is 0. */
16553 offset1 = 0;
16556 /* And now for the second addr. */
16557 if (GET_CODE (addr2) == PLUS)
16559 /* If not a REG, return zero. */
16560 if (GET_CODE (XEXP (addr2, 0)) != REG)
16561 return 0;
16562 else
16564 reg2 = REGNO (XEXP (addr2, 0));
16565 /* The offset must be constant. */
16566 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16567 return 0;
16568 offset2 = INTVAL (XEXP (addr2, 1));
16571 else if (GET_CODE (addr2) != REG)
16572 return 0;
16573 else
16575 reg2 = REGNO (addr2);
16576 /* This was a simple (mem (reg)) expression. Offset is 0. */
16577 offset2 = 0;
16580 /* Both of these must have the same base register. */
16581 if (reg1 != reg2)
16582 return 0;
16584 /* The offset for the second addr must be 8 more than the first addr. */
16585 if (offset2 != offset1 + 8)
16586 return 0;
16588 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16589 instructions. */
16590 return 1;
16595 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16597 static bool eliminated = false;
16598 rtx ret;
16600 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16601 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16602 else
16604 rtx mem = cfun->machine->sdmode_stack_slot;
16605 gcc_assert (mem != NULL_RTX);
16607 if (!eliminated)
16609 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16610 cfun->machine->sdmode_stack_slot = mem;
16611 eliminated = true;
16613 ret = mem;
16616 if (TARGET_DEBUG_ADDR)
16618 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16619 GET_MODE_NAME (mode));
16620 if (!ret)
16621 fprintf (stderr, "\tNULL_RTX\n");
16622 else
16623 debug_rtx (ret);
16626 return ret;
16629 /* Return the mode to be used for memory when a secondary memory
16630 location is needed. For SDmode values we need to use DDmode, in
16631 all other cases we can use the same mode. */
16632 machine_mode
16633 rs6000_secondary_memory_needed_mode (machine_mode mode)
16635 if (lra_in_progress && mode == SDmode)
16636 return DDmode;
16637 return mode;
16640 static tree
16641 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16643 /* Don't walk into types. */
16644 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16646 *walk_subtrees = 0;
16647 return NULL_TREE;
16650 switch (TREE_CODE (*tp))
16652 case VAR_DECL:
16653 case PARM_DECL:
16654 case FIELD_DECL:
16655 case RESULT_DECL:
16656 case SSA_NAME:
16657 case REAL_CST:
16658 case MEM_REF:
16659 case VIEW_CONVERT_EXPR:
16660 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16661 return *tp;
16662 break;
16663 default:
16664 break;
16667 return NULL_TREE;
16670 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16671 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16672 only work on the traditional altivec registers, note if an altivec register
16673 was chosen. */
16675 static enum rs6000_reg_type
16676 register_to_reg_type (rtx reg, bool *is_altivec)
16678 HOST_WIDE_INT regno;
16679 enum reg_class rclass;
16681 if (GET_CODE (reg) == SUBREG)
16682 reg = SUBREG_REG (reg);
16684 if (!REG_P (reg))
16685 return NO_REG_TYPE;
16687 regno = REGNO (reg);
16688 if (regno >= FIRST_PSEUDO_REGISTER)
16690 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16691 return PSEUDO_REG_TYPE;
16693 regno = true_regnum (reg);
16694 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16695 return PSEUDO_REG_TYPE;
16698 gcc_assert (regno >= 0);
16700 if (is_altivec && ALTIVEC_REGNO_P (regno))
16701 *is_altivec = true;
16703 rclass = rs6000_regno_regclass[regno];
16704 return reg_class_to_reg_type[(int)rclass];
16707 /* Helper function to return the cost of adding a TOC entry address. */
16709 static inline int
16710 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16712 int ret;
16714 if (TARGET_CMODEL != CMODEL_SMALL)
16715 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16717 else
16718 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16720 return ret;
16723 /* Helper function for rs6000_secondary_reload to determine whether the memory
16724 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16725 needs reloading. Return negative if the memory is not handled by the memory
16726 helper functions and to try a different reload method, 0 if no additional
16727 instructions are need, and positive to give the extra cost for the
16728 memory. */
16730 static int
16731 rs6000_secondary_reload_memory (rtx addr,
16732 enum reg_class rclass,
16733 enum machine_mode mode)
16735 int extra_cost = 0;
16736 rtx reg, and_arg, plus_arg0, plus_arg1;
16737 addr_mask_type addr_mask;
16738 const char *type = NULL;
16739 const char *fail_msg = NULL;
16741 if (GPR_REG_CLASS_P (rclass))
16742 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16744 else if (rclass == FLOAT_REGS)
16745 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16747 else if (rclass == ALTIVEC_REGS)
16748 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16750 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16751 else if (rclass == VSX_REGS)
16752 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16753 & ~RELOAD_REG_AND_M16);
16755 else
16757 if (TARGET_DEBUG_ADDR)
16758 fprintf (stderr,
16759 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16760 "class is not GPR, FPR, VMX\n",
16761 GET_MODE_NAME (mode), reg_class_names[rclass]);
16763 return -1;
16766 /* If the register isn't valid in this register class, just return now. */
16767 if ((addr_mask & RELOAD_REG_VALID) == 0)
16769 if (TARGET_DEBUG_ADDR)
16770 fprintf (stderr,
16771 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16772 "not valid in class\n",
16773 GET_MODE_NAME (mode), reg_class_names[rclass]);
16775 return -1;
16778 switch (GET_CODE (addr))
16780 /* Does the register class supports auto update forms for this mode? We
16781 don't need a scratch register, since the powerpc only supports
16782 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16783 case PRE_INC:
16784 case PRE_DEC:
16785 reg = XEXP (addr, 0);
16786 if (!base_reg_operand (addr, GET_MODE (reg)))
16788 fail_msg = "no base register #1";
16789 extra_cost = -1;
16792 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16794 extra_cost = 1;
16795 type = "update";
16797 break;
16799 case PRE_MODIFY:
16800 reg = XEXP (addr, 0);
16801 plus_arg1 = XEXP (addr, 1);
16802 if (!base_reg_operand (reg, GET_MODE (reg))
16803 || GET_CODE (plus_arg1) != PLUS
16804 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16806 fail_msg = "bad PRE_MODIFY";
16807 extra_cost = -1;
16810 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16812 extra_cost = 1;
16813 type = "update";
16815 break;
16817 /* Do we need to simulate AND -16 to clear the bottom address bits used
16818 in VMX load/stores? Only allow the AND for vector sizes. */
16819 case AND:
16820 and_arg = XEXP (addr, 0);
16821 if (GET_MODE_SIZE (mode) != 16
16822 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16823 || INTVAL (XEXP (addr, 1)) != -16)
16825 fail_msg = "bad Altivec AND #1";
16826 extra_cost = -1;
16829 if (rclass != ALTIVEC_REGS)
16831 if (legitimate_indirect_address_p (and_arg, false))
16832 extra_cost = 1;
16834 else if (legitimate_indexed_address_p (and_arg, false))
16835 extra_cost = 2;
16837 else
16839 fail_msg = "bad Altivec AND #2";
16840 extra_cost = -1;
16843 type = "and";
16845 break;
16847 /* If this is an indirect address, make sure it is a base register. */
16848 case REG:
16849 case SUBREG:
16850 if (!legitimate_indirect_address_p (addr, false))
16852 extra_cost = 1;
16853 type = "move";
16855 break;
16857 /* If this is an indexed address, make sure the register class can handle
16858 indexed addresses for this mode. */
16859 case PLUS:
16860 plus_arg0 = XEXP (addr, 0);
16861 plus_arg1 = XEXP (addr, 1);
16863 /* (plus (plus (reg) (constant)) (constant)) is generated during
16864 push_reload processing, so handle it now. */
16865 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16867 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16869 extra_cost = 1;
16870 type = "offset";
16874 /* (plus (plus (reg) (constant)) (reg)) is also generated during
16875 push_reload processing, so handle it now. */
16876 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
16878 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
16880 extra_cost = 1;
16881 type = "indexed #2";
16885 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16887 fail_msg = "no base register #2";
16888 extra_cost = -1;
16891 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16893 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16894 || !legitimate_indexed_address_p (addr, false))
16896 extra_cost = 1;
16897 type = "indexed";
16901 /* Make sure the register class can handle offset addresses. */
16902 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16904 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16906 extra_cost = 1;
16907 type = "offset";
16911 else
16913 fail_msg = "bad PLUS";
16914 extra_cost = -1;
16917 break;
16919 case LO_SUM:
16920 if (!legitimate_lo_sum_address_p (mode, addr, false))
16922 fail_msg = "bad LO_SUM";
16923 extra_cost = -1;
16926 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16928 extra_cost = 1;
16929 type = "lo_sum";
16931 break;
16933 /* Static addresses need to create a TOC entry. */
16934 case CONST:
16935 case SYMBOL_REF:
16936 case LABEL_REF:
16937 type = "address";
16938 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16939 break;
16941 /* TOC references look like offsetable memory. */
16942 case UNSPEC:
16943 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16945 fail_msg = "bad UNSPEC";
16946 extra_cost = -1;
16949 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16951 extra_cost = 1;
16952 type = "toc reference";
16954 break;
16956 default:
16958 fail_msg = "bad address";
16959 extra_cost = -1;
16963 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16965 if (extra_cost < 0)
16966 fprintf (stderr,
16967 "rs6000_secondary_reload_memory error: mode = %s, "
16968 "class = %s, addr_mask = '%s', %s\n",
16969 GET_MODE_NAME (mode),
16970 reg_class_names[rclass],
16971 rs6000_debug_addr_mask (addr_mask, false),
16972 (fail_msg != NULL) ? fail_msg : "<bad address>");
16974 else
16975 fprintf (stderr,
16976 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16977 "addr_mask = '%s', extra cost = %d, %s\n",
16978 GET_MODE_NAME (mode),
16979 reg_class_names[rclass],
16980 rs6000_debug_addr_mask (addr_mask, false),
16981 extra_cost,
16982 (type) ? type : "<none>");
16984 debug_rtx (addr);
16987 return extra_cost;
16990 /* Helper function for rs6000_secondary_reload to return true if a move to a
16991 different register classe is really a simple move. */
16993 static bool
16994 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16995 enum rs6000_reg_type from_type,
16996 machine_mode mode)
16998 int size;
17000 /* Add support for various direct moves available. In this function, we only
17001 look at cases where we don't need any extra registers, and one or more
17002 simple move insns are issued. At present, 32-bit integers are not allowed
17003 in FPR/VSX registers. Single precision binary floating is not a simple
17004 move because we need to convert to the single precision memory layout.
17005 The 4-byte SDmode can be moved. */
17006 size = GET_MODE_SIZE (mode);
17007 if (TARGET_DIRECT_MOVE
17008 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
17009 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17010 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
17011 return true;
17013 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
17014 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
17015 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
17016 return true;
17018 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
17019 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
17020 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
17021 return true;
17023 return false;
17026 /* Power8 helper function for rs6000_secondary_reload, handle all of the
17027 special direct moves that involve allocating an extra register, return the
17028 insn code of the helper function if there is such a function or
17029 CODE_FOR_nothing if not. */
17031 static bool
17032 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
17033 enum rs6000_reg_type from_type,
17034 machine_mode mode,
17035 secondary_reload_info *sri,
17036 bool altivec_p)
17038 bool ret = false;
17039 enum insn_code icode = CODE_FOR_nothing;
17040 int cost = 0;
17041 int size = GET_MODE_SIZE (mode);
17043 if (TARGET_POWERPC64)
17045 if (size == 16)
17047 /* Handle moving 128-bit values from GPRs to VSX point registers on
17048 power8 when running in 64-bit mode using XXPERMDI to glue the two
17049 64-bit values back together. */
17050 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17052 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
17053 icode = reg_addr[mode].reload_vsx_gpr;
17056 /* Handle moving 128-bit values from VSX point registers to GPRs on
17057 power8 when running in 64-bit mode using XXPERMDI to get access to the
17058 bottom 64-bit value. */
17059 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17061 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
17062 icode = reg_addr[mode].reload_gpr_vsx;
17066 else if (mode == SFmode)
17068 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17070 cost = 3; /* xscvdpspn, mfvsrd, and. */
17071 icode = reg_addr[mode].reload_gpr_vsx;
17074 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17076 cost = 2; /* mtvsrz, xscvspdpn. */
17077 icode = reg_addr[mode].reload_vsx_gpr;
17082 if (TARGET_POWERPC64 && size == 16)
17084 /* Handle moving 128-bit values from GPRs to VSX point registers on
17085 power8 when running in 64-bit mode using XXPERMDI to glue the two
17086 64-bit values back together. */
17087 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17089 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
17090 icode = reg_addr[mode].reload_vsx_gpr;
17093 /* Handle moving 128-bit values from VSX point registers to GPRs on
17094 power8 when running in 64-bit mode using XXPERMDI to get access to the
17095 bottom 64-bit value. */
17096 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17098 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
17099 icode = reg_addr[mode].reload_gpr_vsx;
17103 else if (!TARGET_POWERPC64 && size == 8)
17105 /* Handle moving 64-bit values from GPRs to floating point registers on
17106 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
17107 values back together. Altivec register classes must be handled
17108 specially since a different instruction is used, and the secondary
17109 reload support requires a single instruction class in the scratch
17110 register constraint. However, right now TFmode is not allowed in
17111 Altivec registers, so the pattern will never match. */
17112 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
17114 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
17115 icode = reg_addr[mode].reload_fpr_gpr;
17119 if (icode != CODE_FOR_nothing)
17121 ret = true;
17122 if (sri)
17124 sri->icode = icode;
17125 sri->extra_cost = cost;
17129 return ret;
17132 /* Return whether a move between two register classes can be done either
17133 directly (simple move) or via a pattern that uses a single extra temporary
17134 (using power8's direct move in this case. */
17136 static bool
17137 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
17138 enum rs6000_reg_type from_type,
17139 machine_mode mode,
17140 secondary_reload_info *sri,
17141 bool altivec_p)
17143 /* Fall back to load/store reloads if either type is not a register. */
17144 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
17145 return false;
17147 /* If we haven't allocated registers yet, assume the move can be done for the
17148 standard register types. */
17149 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
17150 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
17151 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
17152 return true;
17154 /* Moves to the same set of registers is a simple move for non-specialized
17155 registers. */
17156 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
17157 return true;
17159 /* Check whether a simple move can be done directly. */
17160 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
17162 if (sri)
17164 sri->icode = CODE_FOR_nothing;
17165 sri->extra_cost = 0;
17167 return true;
17170 /* Now check if we can do it in a few steps. */
17171 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
17172 altivec_p);
17175 /* Inform reload about cases where moving X with a mode MODE to a register in
17176 RCLASS requires an extra scratch or immediate register. Return the class
17177 needed for the immediate register.
17179 For VSX and Altivec, we may need a register to convert sp+offset into
17180 reg+sp.
17182 For misaligned 64-bit gpr loads and stores we need a register to
17183 convert an offset address to indirect. */
17185 static reg_class_t
17186 rs6000_secondary_reload (bool in_p,
17187 rtx x,
17188 reg_class_t rclass_i,
17189 machine_mode mode,
17190 secondary_reload_info *sri)
17192 enum reg_class rclass = (enum reg_class) rclass_i;
17193 reg_class_t ret = ALL_REGS;
17194 enum insn_code icode;
17195 bool default_p = false;
17196 bool done_p = false;
17198 /* Allow subreg of memory before/during reload. */
17199 bool memory_p = (MEM_P (x)
17200 || (!reload_completed && GET_CODE (x) == SUBREG
17201 && MEM_P (SUBREG_REG (x))));
17203 sri->icode = CODE_FOR_nothing;
17204 sri->extra_cost = 0;
17205 icode = ((in_p)
17206 ? reg_addr[mode].reload_load
17207 : reg_addr[mode].reload_store);
17209 if (REG_P (x) || register_operand (x, mode))
17211 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
17212 bool altivec_p = (rclass == ALTIVEC_REGS);
17213 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
17215 if (!in_p)
17217 enum rs6000_reg_type exchange = to_type;
17218 to_type = from_type;
17219 from_type = exchange;
17222 /* Can we do a direct move of some sort? */
17223 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
17224 altivec_p))
17226 icode = (enum insn_code)sri->icode;
17227 default_p = false;
17228 done_p = true;
17229 ret = NO_REGS;
17233 /* Make sure 0.0 is not reloaded or forced into memory. */
17234 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17236 ret = NO_REGS;
17237 default_p = false;
17238 done_p = true;
17241 /* If this is a scalar floating point value and we want to load it into the
17242 traditional Altivec registers, do it via a move via a traditional floating
17243 point register. Also make sure that non-zero constants use a FPR. */
17244 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17245 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17246 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17248 ret = FLOAT_REGS;
17249 default_p = false;
17250 done_p = true;
17253 /* Handle reload of load/stores if we have reload helper functions. */
17254 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17256 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17257 mode);
17259 if (extra_cost >= 0)
17261 done_p = true;
17262 ret = NO_REGS;
17263 if (extra_cost > 0)
17265 sri->extra_cost = extra_cost;
17266 sri->icode = icode;
17271 /* Handle unaligned loads and stores of integer registers. */
17272 if (!done_p && TARGET_POWERPC64
17273 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17274 && memory_p
17275 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17277 rtx addr = XEXP (x, 0);
17278 rtx off = address_offset (addr);
17280 if (off != NULL_RTX)
17282 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17283 unsigned HOST_WIDE_INT offset = INTVAL (off);
17285 /* We need a secondary reload when our legitimate_address_p
17286 says the address is good (as otherwise the entire address
17287 will be reloaded), and the offset is not a multiple of
17288 four or we have an address wrap. Address wrap will only
17289 occur for LO_SUMs since legitimate_offset_address_p
17290 rejects addresses for 16-byte mems that will wrap. */
17291 if (GET_CODE (addr) == LO_SUM
17292 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17293 && ((offset & 3) != 0
17294 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17295 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17296 && (offset & 3) != 0))
17298 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
17299 if (in_p)
17300 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
17301 : CODE_FOR_reload_di_load);
17302 else
17303 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
17304 : CODE_FOR_reload_di_store);
17305 sri->extra_cost = 2;
17306 ret = NO_REGS;
17307 done_p = true;
17309 else
17310 default_p = true;
17312 else
17313 default_p = true;
17316 if (!done_p && !TARGET_POWERPC64
17317 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17318 && memory_p
17319 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17321 rtx addr = XEXP (x, 0);
17322 rtx off = address_offset (addr);
17324 if (off != NULL_RTX)
17326 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17327 unsigned HOST_WIDE_INT offset = INTVAL (off);
17329 /* We need a secondary reload when our legitimate_address_p
17330 says the address is good (as otherwise the entire address
17331 will be reloaded), and we have a wrap.
17333 legitimate_lo_sum_address_p allows LO_SUM addresses to
17334 have any offset so test for wrap in the low 16 bits.
17336 legitimate_offset_address_p checks for the range
17337 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17338 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17339 [0x7ff4,0x7fff] respectively, so test for the
17340 intersection of these ranges, [0x7ffc,0x7fff] and
17341 [0x7ff4,0x7ff7] respectively.
17343 Note that the address we see here may have been
17344 manipulated by legitimize_reload_address. */
17345 if (GET_CODE (addr) == LO_SUM
17346 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17347 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17349 if (in_p)
17350 sri->icode = CODE_FOR_reload_si_load;
17351 else
17352 sri->icode = CODE_FOR_reload_si_store;
17353 sri->extra_cost = 2;
17354 ret = NO_REGS;
17355 done_p = true;
17357 else
17358 default_p = true;
17360 else
17361 default_p = true;
17364 if (!done_p)
17365 default_p = true;
17367 if (default_p)
17368 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17370 gcc_assert (ret != ALL_REGS);
17372 if (TARGET_DEBUG_ADDR)
17374 fprintf (stderr,
17375 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17376 "mode = %s",
17377 reg_class_names[ret],
17378 in_p ? "true" : "false",
17379 reg_class_names[rclass],
17380 GET_MODE_NAME (mode));
17382 if (reload_completed)
17383 fputs (", after reload", stderr);
17385 if (!done_p)
17386 fputs (", done_p not set", stderr);
17388 if (default_p)
17389 fputs (", default secondary reload", stderr);
17391 if (sri->icode != CODE_FOR_nothing)
17392 fprintf (stderr, ", reload func = %s, extra cost = %d",
17393 insn_data[sri->icode].name, sri->extra_cost);
17395 fputs ("\n", stderr);
17396 debug_rtx (x);
17399 return ret;
17402 /* Better tracing for rs6000_secondary_reload_inner. */
17404 static void
17405 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17406 bool store_p)
17408 rtx set, clobber;
17410 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17412 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17413 store_p ? "store" : "load");
17415 if (store_p)
17416 set = gen_rtx_SET (mem, reg);
17417 else
17418 set = gen_rtx_SET (reg, mem);
17420 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17421 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17424 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17425 ATTRIBUTE_NORETURN;
17427 static void
17428 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17429 bool store_p)
17431 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17432 gcc_unreachable ();
17435 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17436 reload helper functions. These were identified in
17437 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17438 reload, it calls the insns:
17439 reload_<RELOAD:mode>_<P:mptrsize>_store
17440 reload_<RELOAD:mode>_<P:mptrsize>_load
17442 which in turn calls this function, to do whatever is necessary to create
17443 valid addresses. */
17445 void
17446 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17448 int regno = true_regnum (reg);
17449 machine_mode mode = GET_MODE (reg);
17450 addr_mask_type addr_mask;
17451 rtx addr;
17452 rtx new_addr;
17453 rtx op_reg, op0, op1;
17454 rtx and_op;
17455 rtx cc_clobber;
17456 rtvec rv;
17458 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17459 || !base_reg_operand (scratch, GET_MODE (scratch)))
17460 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17462 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17463 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17465 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17466 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17468 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17469 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17471 else
17472 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17474 /* Make sure the mode is valid in this register class. */
17475 if ((addr_mask & RELOAD_REG_VALID) == 0)
17476 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17478 if (TARGET_DEBUG_ADDR)
17479 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17481 new_addr = addr = XEXP (mem, 0);
17482 switch (GET_CODE (addr))
17484 /* Does the register class support auto update forms for this mode? If
17485 not, do the update now. We don't need a scratch register, since the
17486 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17487 case PRE_INC:
17488 case PRE_DEC:
17489 op_reg = XEXP (addr, 0);
17490 if (!base_reg_operand (op_reg, Pmode))
17491 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17493 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17495 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17496 new_addr = op_reg;
17498 break;
17500 case PRE_MODIFY:
17501 op0 = XEXP (addr, 0);
17502 op1 = XEXP (addr, 1);
17503 if (!base_reg_operand (op0, Pmode)
17504 || GET_CODE (op1) != PLUS
17505 || !rtx_equal_p (op0, XEXP (op1, 0)))
17506 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17508 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17510 emit_insn (gen_rtx_SET (op0, op1));
17511 new_addr = reg;
17513 break;
17515 /* Do we need to simulate AND -16 to clear the bottom address bits used
17516 in VMX load/stores? */
17517 case AND:
17518 op0 = XEXP (addr, 0);
17519 op1 = XEXP (addr, 1);
17520 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17522 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17523 op_reg = op0;
17525 else if (GET_CODE (op1) == PLUS)
17527 emit_insn (gen_rtx_SET (scratch, op1));
17528 op_reg = scratch;
17531 else
17532 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17534 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17535 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17536 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
17537 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17538 new_addr = scratch;
17540 break;
17542 /* If this is an indirect address, make sure it is a base register. */
17543 case REG:
17544 case SUBREG:
17545 if (!base_reg_operand (addr, GET_MODE (addr)))
17547 emit_insn (gen_rtx_SET (scratch, addr));
17548 new_addr = scratch;
17550 break;
17552 /* If this is an indexed address, make sure the register class can handle
17553 indexed addresses for this mode. */
17554 case PLUS:
17555 op0 = XEXP (addr, 0);
17556 op1 = XEXP (addr, 1);
17557 if (!base_reg_operand (op0, Pmode))
17558 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17560 else if (int_reg_operand (op1, Pmode))
17562 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17564 emit_insn (gen_rtx_SET (scratch, addr));
17565 new_addr = scratch;
17569 /* Make sure the register class can handle offset addresses. */
17570 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17572 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17574 emit_insn (gen_rtx_SET (scratch, addr));
17575 new_addr = scratch;
17579 else
17580 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17582 break;
17584 case LO_SUM:
17585 op0 = XEXP (addr, 0);
17586 op1 = XEXP (addr, 1);
17587 if (!base_reg_operand (op0, Pmode))
17588 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17590 else if (int_reg_operand (op1, Pmode))
17592 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17594 emit_insn (gen_rtx_SET (scratch, addr));
17595 new_addr = scratch;
17599 /* Make sure the register class can handle offset addresses. */
17600 else if (legitimate_lo_sum_address_p (mode, addr, false))
17602 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17604 emit_insn (gen_rtx_SET (scratch, addr));
17605 new_addr = scratch;
17609 else
17610 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17612 break;
17614 case SYMBOL_REF:
17615 case CONST:
17616 case LABEL_REF:
17617 rs6000_emit_move (scratch, addr, Pmode);
17618 new_addr = scratch;
17619 break;
17621 default:
17622 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17625 /* Adjust the address if it changed. */
17626 if (addr != new_addr)
17628 mem = replace_equiv_address_nv (mem, new_addr);
17629 if (TARGET_DEBUG_ADDR)
17630 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17633 /* Now create the move. */
17634 if (store_p)
17635 emit_insn (gen_rtx_SET (mem, reg));
17636 else
17637 emit_insn (gen_rtx_SET (reg, mem));
17639 return;
17642 /* Convert reloads involving 64-bit gprs and misaligned offset
17643 addressing, or multiple 32-bit gprs and offsets that are too large,
17644 to use indirect addressing. */
17646 void
17647 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17649 int regno = true_regnum (reg);
17650 enum reg_class rclass;
17651 rtx addr;
17652 rtx scratch_or_premodify = scratch;
17654 if (TARGET_DEBUG_ADDR)
17656 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17657 store_p ? "store" : "load");
17658 fprintf (stderr, "reg:\n");
17659 debug_rtx (reg);
17660 fprintf (stderr, "mem:\n");
17661 debug_rtx (mem);
17662 fprintf (stderr, "scratch:\n");
17663 debug_rtx (scratch);
17666 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17667 gcc_assert (GET_CODE (mem) == MEM);
17668 rclass = REGNO_REG_CLASS (regno);
17669 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17670 addr = XEXP (mem, 0);
17672 if (GET_CODE (addr) == PRE_MODIFY)
17674 scratch_or_premodify = XEXP (addr, 0);
17675 gcc_assert (REG_P (scratch_or_premodify));
17676 addr = XEXP (addr, 1);
17678 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17680 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17682 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17684 /* Now create the move. */
17685 if (store_p)
17686 emit_insn (gen_rtx_SET (mem, reg));
17687 else
17688 emit_insn (gen_rtx_SET (reg, mem));
17690 return;
17693 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17694 this function has any SDmode references. If we are on a power7 or later, we
17695 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17696 can load/store the value. */
17698 static void
17699 rs6000_alloc_sdmode_stack_slot (void)
17701 tree t;
17702 basic_block bb;
17703 gimple_stmt_iterator gsi;
17705 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17706 /* We use a different approach for dealing with the secondary
17707 memory in LRA. */
17708 if (ira_use_lra_p)
17709 return;
17711 if (TARGET_NO_SDMODE_STACK)
17712 return;
17714 FOR_EACH_BB_FN (bb, cfun)
17715 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17717 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17718 if (ret)
17720 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17721 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17722 SDmode, 0);
17723 return;
17727 /* Check for any SDmode parameters of the function. */
17728 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17730 if (TREE_TYPE (t) == error_mark_node)
17731 continue;
17733 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17734 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17736 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17737 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17738 SDmode, 0);
17739 return;
17744 static void
17745 rs6000_instantiate_decls (void)
17747 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17748 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17751 /* Given an rtx X being reloaded into a reg required to be
17752 in class CLASS, return the class of reg to actually use.
17753 In general this is just CLASS; but on some machines
17754 in some cases it is preferable to use a more restrictive class.
17756 On the RS/6000, we have to return NO_REGS when we want to reload a
17757 floating-point CONST_DOUBLE to force it to be copied to memory.
17759 We also don't want to reload integer values into floating-point
17760 registers if we can at all help it. In fact, this can
17761 cause reload to die, if it tries to generate a reload of CTR
17762 into a FP register and discovers it doesn't have the memory location
17763 required.
17765 ??? Would it be a good idea to have reload do the converse, that is
17766 try to reload floating modes into FP registers if possible?
17769 static enum reg_class
17770 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17772 machine_mode mode = GET_MODE (x);
17773 bool is_constant = CONSTANT_P (x);
17775 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
17776 the reloading of address expressions using PLUS into floating point
17777 registers. */
17778 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
17780 if (is_constant)
17782 /* Zero is always allowed in all VSX registers. */
17783 if (x == CONST0_RTX (mode))
17784 return rclass;
17786 /* If this is a vector constant that can be formed with a few Altivec
17787 instructions, we want altivec registers. */
17788 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17789 return ALTIVEC_REGS;
17791 /* Force constant to memory. */
17792 return NO_REGS;
17795 /* If this is a scalar floating point value, prefer the traditional
17796 floating point registers so that we can use D-form (register+offset)
17797 addressing. */
17798 if (GET_MODE_SIZE (mode) < 16)
17799 return FLOAT_REGS;
17801 /* Prefer the Altivec registers if Altivec is handling the vector
17802 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17803 loads. */
17804 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17805 || mode == V1TImode)
17806 return ALTIVEC_REGS;
17808 return rclass;
17811 if (is_constant || GET_CODE (x) == PLUS)
17813 if (reg_class_subset_p (GENERAL_REGS, rclass))
17814 return GENERAL_REGS;
17815 if (reg_class_subset_p (BASE_REGS, rclass))
17816 return BASE_REGS;
17817 return NO_REGS;
17820 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17821 return GENERAL_REGS;
17823 return rclass;
17826 /* Debug version of rs6000_preferred_reload_class. */
17827 static enum reg_class
17828 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17830 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17832 fprintf (stderr,
17833 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17834 "mode = %s, x:\n",
17835 reg_class_names[ret], reg_class_names[rclass],
17836 GET_MODE_NAME (GET_MODE (x)));
17837 debug_rtx (x);
17839 return ret;
17842 /* If we are copying between FP or AltiVec registers and anything else, we need
17843 a memory location. The exception is when we are targeting ppc64 and the
17844 move to/from fpr to gpr instructions are available. Also, under VSX, you
17845 can copy vector registers from the FP register set to the Altivec register
17846 set and vice versa. */
17848 static bool
17849 rs6000_secondary_memory_needed (enum reg_class from_class,
17850 enum reg_class to_class,
17851 machine_mode mode)
17853 enum rs6000_reg_type from_type, to_type;
17854 bool altivec_p = ((from_class == ALTIVEC_REGS)
17855 || (to_class == ALTIVEC_REGS));
17857 /* If a simple/direct move is available, we don't need secondary memory */
17858 from_type = reg_class_to_reg_type[(int)from_class];
17859 to_type = reg_class_to_reg_type[(int)to_class];
17861 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17862 (secondary_reload_info *)0, altivec_p))
17863 return false;
17865 /* If we have a floating point or vector register class, we need to use
17866 memory to transfer the data. */
17867 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17868 return true;
17870 return false;
17873 /* Debug version of rs6000_secondary_memory_needed. */
17874 static bool
17875 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17876 enum reg_class to_class,
17877 machine_mode mode)
17879 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17881 fprintf (stderr,
17882 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17883 "to_class = %s, mode = %s\n",
17884 ret ? "true" : "false",
17885 reg_class_names[from_class],
17886 reg_class_names[to_class],
17887 GET_MODE_NAME (mode));
17889 return ret;
17892 /* Return the register class of a scratch register needed to copy IN into
17893 or out of a register in RCLASS in MODE. If it can be done directly,
17894 NO_REGS is returned. */
17896 static enum reg_class
17897 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17898 rtx in)
17900 int regno;
17902 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17903 #if TARGET_MACHO
17904 && MACHOPIC_INDIRECT
17905 #endif
17908 /* We cannot copy a symbolic operand directly into anything
17909 other than BASE_REGS for TARGET_ELF. So indicate that a
17910 register from BASE_REGS is needed as an intermediate
17911 register.
17913 On Darwin, pic addresses require a load from memory, which
17914 needs a base register. */
17915 if (rclass != BASE_REGS
17916 && (GET_CODE (in) == SYMBOL_REF
17917 || GET_CODE (in) == HIGH
17918 || GET_CODE (in) == LABEL_REF
17919 || GET_CODE (in) == CONST))
17920 return BASE_REGS;
17923 if (GET_CODE (in) == REG)
17925 regno = REGNO (in);
17926 if (regno >= FIRST_PSEUDO_REGISTER)
17928 regno = true_regnum (in);
17929 if (regno >= FIRST_PSEUDO_REGISTER)
17930 regno = -1;
17933 else if (GET_CODE (in) == SUBREG)
17935 regno = true_regnum (in);
17936 if (regno >= FIRST_PSEUDO_REGISTER)
17937 regno = -1;
17939 else
17940 regno = -1;
17942 /* If we have VSX register moves, prefer moving scalar values between
17943 Altivec registers and GPR by going via an FPR (and then via memory)
17944 instead of reloading the secondary memory address for Altivec moves. */
17945 if (TARGET_VSX
17946 && GET_MODE_SIZE (mode) < 16
17947 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17948 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17949 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17950 && (regno >= 0 && INT_REGNO_P (regno)))))
17951 return FLOAT_REGS;
17953 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17954 into anything. */
17955 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17956 || (regno >= 0 && INT_REGNO_P (regno)))
17957 return NO_REGS;
17959 /* Constants, memory, and VSX registers can go into VSX registers (both the
17960 traditional floating point and the altivec registers). */
17961 if (rclass == VSX_REGS
17962 && (regno == -1 || VSX_REGNO_P (regno)))
17963 return NO_REGS;
17965 /* Constants, memory, and FP registers can go into FP registers. */
17966 if ((regno == -1 || FP_REGNO_P (regno))
17967 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17968 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17970 /* Memory, and AltiVec registers can go into AltiVec registers. */
17971 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17972 && rclass == ALTIVEC_REGS)
17973 return NO_REGS;
17975 /* We can copy among the CR registers. */
17976 if ((rclass == CR_REGS || rclass == CR0_REGS)
17977 && regno >= 0 && CR_REGNO_P (regno))
17978 return NO_REGS;
17980 /* Otherwise, we need GENERAL_REGS. */
17981 return GENERAL_REGS;
17984 /* Debug version of rs6000_secondary_reload_class. */
17985 static enum reg_class
17986 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17987 machine_mode mode, rtx in)
17989 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17990 fprintf (stderr,
17991 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17992 "mode = %s, input rtx:\n",
17993 reg_class_names[ret], reg_class_names[rclass],
17994 GET_MODE_NAME (mode));
17995 debug_rtx (in);
17997 return ret;
18000 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
18002 static bool
18003 rs6000_cannot_change_mode_class (machine_mode from,
18004 machine_mode to,
18005 enum reg_class rclass)
18007 unsigned from_size = GET_MODE_SIZE (from);
18008 unsigned to_size = GET_MODE_SIZE (to);
18010 if (from_size != to_size)
18012 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
18014 if (reg_classes_intersect_p (xclass, rclass))
18016 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
18017 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
18019 /* Don't allow 64-bit types to overlap with 128-bit types that take a
18020 single register under VSX because the scalar part of the register
18021 is in the upper 64-bits, and not the lower 64-bits. Types like
18022 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
18023 IEEE floating point can't overlap, and neither can small
18024 values. */
18026 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
18027 return true;
18029 /* TDmode in floating-mode registers must always go into a register
18030 pair with the most significant word in the even-numbered register
18031 to match ISA requirements. In little-endian mode, this does not
18032 match subreg numbering, so we cannot allow subregs. */
18033 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
18034 return true;
18036 if (from_size < 8 || to_size < 8)
18037 return true;
18039 if (from_size == 8 && (8 * to_nregs) != to_size)
18040 return true;
18042 if (to_size == 8 && (8 * from_nregs) != from_size)
18043 return true;
18045 return false;
18047 else
18048 return false;
18051 if (TARGET_E500_DOUBLE
18052 && ((((to) == DFmode) + ((from) == DFmode)) == 1
18053 || (((to) == TFmode) + ((from) == TFmode)) == 1
18054 || (((to) == DDmode) + ((from) == DDmode)) == 1
18055 || (((to) == TDmode) + ((from) == TDmode)) == 1
18056 || (((to) == DImode) + ((from) == DImode)) == 1))
18057 return true;
18059 /* Since the VSX register set includes traditional floating point registers
18060 and altivec registers, just check for the size being different instead of
18061 trying to check whether the modes are vector modes. Otherwise it won't
18062 allow say DF and DI to change classes. For types like TFmode and TDmode
18063 that take 2 64-bit registers, rather than a single 128-bit register, don't
18064 allow subregs of those types to other 128 bit types. */
18065 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
18067 unsigned num_regs = (from_size + 15) / 16;
18068 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
18069 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
18070 return true;
18072 return (from_size != 8 && from_size != 16);
18075 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
18076 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
18077 return true;
18079 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
18080 && reg_classes_intersect_p (GENERAL_REGS, rclass))
18081 return true;
18083 return false;
18086 /* Debug version of rs6000_cannot_change_mode_class. */
18087 static bool
18088 rs6000_debug_cannot_change_mode_class (machine_mode from,
18089 machine_mode to,
18090 enum reg_class rclass)
18092 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
18094 fprintf (stderr,
18095 "rs6000_cannot_change_mode_class, return %s, from = %s, "
18096 "to = %s, rclass = %s\n",
18097 ret ? "true" : "false",
18098 GET_MODE_NAME (from), GET_MODE_NAME (to),
18099 reg_class_names[rclass]);
18101 return ret;
18104 /* Return a string to do a move operation of 128 bits of data. */
18106 const char *
18107 rs6000_output_move_128bit (rtx operands[])
18109 rtx dest = operands[0];
18110 rtx src = operands[1];
18111 machine_mode mode = GET_MODE (dest);
18112 int dest_regno;
18113 int src_regno;
18114 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
18115 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
18117 if (REG_P (dest))
18119 dest_regno = REGNO (dest);
18120 dest_gpr_p = INT_REGNO_P (dest_regno);
18121 dest_fp_p = FP_REGNO_P (dest_regno);
18122 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
18123 dest_vsx_p = dest_fp_p | dest_vmx_p;
18125 else
18127 dest_regno = -1;
18128 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
18131 if (REG_P (src))
18133 src_regno = REGNO (src);
18134 src_gpr_p = INT_REGNO_P (src_regno);
18135 src_fp_p = FP_REGNO_P (src_regno);
18136 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
18137 src_vsx_p = src_fp_p | src_vmx_p;
18139 else
18141 src_regno = -1;
18142 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
18145 /* Register moves. */
18146 if (dest_regno >= 0 && src_regno >= 0)
18148 if (dest_gpr_p)
18150 if (src_gpr_p)
18151 return "#";
18153 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
18154 return "#";
18157 else if (TARGET_VSX && dest_vsx_p)
18159 if (src_vsx_p)
18160 return "xxlor %x0,%x1,%x1";
18162 else if (TARGET_DIRECT_MOVE && src_gpr_p)
18163 return "#";
18166 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
18167 return "vor %0,%1,%1";
18169 else if (dest_fp_p && src_fp_p)
18170 return "#";
18173 /* Loads. */
18174 else if (dest_regno >= 0 && MEM_P (src))
18176 if (dest_gpr_p)
18178 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18179 return "lq %0,%1";
18180 else
18181 return "#";
18184 else if (TARGET_ALTIVEC && dest_vmx_p
18185 && altivec_indexed_or_indirect_operand (src, mode))
18186 return "lvx %0,%y1";
18188 else if (TARGET_VSX && dest_vsx_p)
18190 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18191 return "lxvw4x %x0,%y1";
18192 else
18193 return "lxvd2x %x0,%y1";
18196 else if (TARGET_ALTIVEC && dest_vmx_p)
18197 return "lvx %0,%y1";
18199 else if (dest_fp_p)
18200 return "#";
18203 /* Stores. */
18204 else if (src_regno >= 0 && MEM_P (dest))
18206 if (src_gpr_p)
18208 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18209 return "stq %1,%0";
18210 else
18211 return "#";
18214 else if (TARGET_ALTIVEC && src_vmx_p
18215 && altivec_indexed_or_indirect_operand (src, mode))
18216 return "stvx %1,%y0";
18218 else if (TARGET_VSX && src_vsx_p)
18220 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18221 return "stxvw4x %x1,%y0";
18222 else
18223 return "stxvd2x %x1,%y0";
18226 else if (TARGET_ALTIVEC && src_vmx_p)
18227 return "stvx %1,%y0";
18229 else if (src_fp_p)
18230 return "#";
18233 /* Constants. */
18234 else if (dest_regno >= 0
18235 && (GET_CODE (src) == CONST_INT
18236 || GET_CODE (src) == CONST_WIDE_INT
18237 || GET_CODE (src) == CONST_DOUBLE
18238 || GET_CODE (src) == CONST_VECTOR))
18240 if (dest_gpr_p)
18241 return "#";
18243 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18244 return "xxlxor %x0,%x0,%x0";
18246 else if (TARGET_ALTIVEC && dest_vmx_p)
18247 return output_vec_const_move (operands);
18250 if (TARGET_DEBUG_ADDR)
18252 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18253 debug_rtx (gen_rtx_SET (dest, src));
18256 gcc_unreachable ();
18259 /* Validate a 128-bit move. */
18260 bool
18261 rs6000_move_128bit_ok_p (rtx operands[])
18263 machine_mode mode = GET_MODE (operands[0]);
18264 return (gpc_reg_operand (operands[0], mode)
18265 || gpc_reg_operand (operands[1], mode));
18268 /* Return true if a 128-bit move needs to be split. */
18269 bool
18270 rs6000_split_128bit_ok_p (rtx operands[])
18272 if (!reload_completed)
18273 return false;
18275 if (!gpr_or_gpr_p (operands[0], operands[1]))
18276 return false;
18278 if (quad_load_store_p (operands[0], operands[1]))
18279 return false;
18281 return true;
18285 /* Given a comparison operation, return the bit number in CCR to test. We
18286 know this is a valid comparison.
18288 SCC_P is 1 if this is for an scc. That means that %D will have been
18289 used instead of %C, so the bits will be in different places.
18291 Return -1 if OP isn't a valid comparison for some reason. */
18294 ccr_bit (rtx op, int scc_p)
18296 enum rtx_code code = GET_CODE (op);
18297 machine_mode cc_mode;
18298 int cc_regnum;
18299 int base_bit;
18300 rtx reg;
18302 if (!COMPARISON_P (op))
18303 return -1;
18305 reg = XEXP (op, 0);
18307 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18309 cc_mode = GET_MODE (reg);
18310 cc_regnum = REGNO (reg);
18311 base_bit = 4 * (cc_regnum - CR0_REGNO);
18313 validate_condition_mode (code, cc_mode);
18315 /* When generating a sCOND operation, only positive conditions are
18316 allowed. */
18317 gcc_assert (!scc_p
18318 || code == EQ || code == GT || code == LT || code == UNORDERED
18319 || code == GTU || code == LTU);
18321 switch (code)
18323 case NE:
18324 return scc_p ? base_bit + 3 : base_bit + 2;
18325 case EQ:
18326 return base_bit + 2;
18327 case GT: case GTU: case UNLE:
18328 return base_bit + 1;
18329 case LT: case LTU: case UNGE:
18330 return base_bit;
18331 case ORDERED: case UNORDERED:
18332 return base_bit + 3;
18334 case GE: case GEU:
18335 /* If scc, we will have done a cror to put the bit in the
18336 unordered position. So test that bit. For integer, this is ! LT
18337 unless this is an scc insn. */
18338 return scc_p ? base_bit + 3 : base_bit;
18340 case LE: case LEU:
18341 return scc_p ? base_bit + 3 : base_bit + 1;
18343 default:
18344 gcc_unreachable ();
18348 /* Return the GOT register. */
18351 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18353 /* The second flow pass currently (June 1999) can't update
18354 regs_ever_live without disturbing other parts of the compiler, so
18355 update it here to make the prolog/epilogue code happy. */
18356 if (!can_create_pseudo_p ()
18357 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18358 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18360 crtl->uses_pic_offset_table = 1;
18362 return pic_offset_table_rtx;
18365 static rs6000_stack_t stack_info;
18367 /* Function to init struct machine_function.
18368 This will be called, via a pointer variable,
18369 from push_function_context. */
18371 static struct machine_function *
18372 rs6000_init_machine_status (void)
18374 stack_info.reload_completed = 0;
18375 return ggc_cleared_alloc<machine_function> ();
18378 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18381 extract_MB (rtx op)
18383 int i;
18384 unsigned long val = INTVAL (op);
18386 /* If the high bit is zero, the value is the first 1 bit we find
18387 from the left. */
18388 if ((val & 0x80000000) == 0)
18390 gcc_assert (val & 0xffffffff);
18392 i = 1;
18393 while (((val <<= 1) & 0x80000000) == 0)
18394 ++i;
18395 return i;
18398 /* If the high bit is set and the low bit is not, or the mask is all
18399 1's, the value is zero. */
18400 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18401 return 0;
18403 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18404 from the right. */
18405 i = 31;
18406 while (((val >>= 1) & 1) != 0)
18407 --i;
18409 return i;
18413 extract_ME (rtx op)
18415 int i;
18416 unsigned long val = INTVAL (op);
18418 /* If the low bit is zero, the value is the first 1 bit we find from
18419 the right. */
18420 if ((val & 1) == 0)
18422 gcc_assert (val & 0xffffffff);
18424 i = 30;
18425 while (((val >>= 1) & 1) == 0)
18426 --i;
18428 return i;
18431 /* If the low bit is set and the high bit is not, or the mask is all
18432 1's, the value is 31. */
18433 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18434 return 31;
18436 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18437 from the left. */
18438 i = 0;
18439 while (((val <<= 1) & 0x80000000) != 0)
18440 ++i;
18442 return i;
18445 /* Write out a function code label. */
18447 void
18448 rs6000_output_function_entry (FILE *file, const char *fname)
18450 if (fname[0] != '.')
18452 switch (DEFAULT_ABI)
18454 default:
18455 gcc_unreachable ();
18457 case ABI_AIX:
18458 if (DOT_SYMBOLS)
18459 putc ('.', file);
18460 else
18461 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18462 break;
18464 case ABI_ELFv2:
18465 case ABI_V4:
18466 case ABI_DARWIN:
18467 break;
18471 RS6000_OUTPUT_BASENAME (file, fname);
18474 /* Print an operand. Recognize special options, documented below. */
18476 #if TARGET_ELF
18477 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18478 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18479 #else
18480 #define SMALL_DATA_RELOC "sda21"
18481 #define SMALL_DATA_REG 0
18482 #endif
18484 void
18485 print_operand (FILE *file, rtx x, int code)
18487 int i;
18488 unsigned HOST_WIDE_INT uval;
18490 switch (code)
18492 /* %a is output_address. */
18494 case 'b':
18495 /* If constant, low-order 16 bits of constant, unsigned.
18496 Otherwise, write normally. */
18497 if (INT_P (x))
18498 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18499 else
18500 print_operand (file, x, 0);
18501 return;
18503 case 'B':
18504 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18505 for 64-bit mask direction. */
18506 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18507 return;
18509 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18510 output_operand. */
18512 case 'D':
18513 /* Like 'J' but get to the GT bit only. */
18514 gcc_assert (REG_P (x));
18516 /* Bit 1 is GT bit. */
18517 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18519 /* Add one for shift count in rlinm for scc. */
18520 fprintf (file, "%d", i + 1);
18521 return;
18523 case 'e':
18524 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18525 if (! INT_P (x))
18527 output_operand_lossage ("invalid %%e value");
18528 return;
18531 uval = INTVAL (x);
18532 if ((uval & 0xffff) == 0 && uval != 0)
18533 putc ('s', file);
18534 return;
18536 case 'E':
18537 /* X is a CR register. Print the number of the EQ bit of the CR */
18538 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18539 output_operand_lossage ("invalid %%E value");
18540 else
18541 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18542 return;
18544 case 'f':
18545 /* X is a CR register. Print the shift count needed to move it
18546 to the high-order four bits. */
18547 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18548 output_operand_lossage ("invalid %%f value");
18549 else
18550 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18551 return;
18553 case 'F':
18554 /* Similar, but print the count for the rotate in the opposite
18555 direction. */
18556 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18557 output_operand_lossage ("invalid %%F value");
18558 else
18559 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18560 return;
18562 case 'G':
18563 /* X is a constant integer. If it is negative, print "m",
18564 otherwise print "z". This is to make an aze or ame insn. */
18565 if (GET_CODE (x) != CONST_INT)
18566 output_operand_lossage ("invalid %%G value");
18567 else if (INTVAL (x) >= 0)
18568 putc ('z', file);
18569 else
18570 putc ('m', file);
18571 return;
18573 case 'h':
18574 /* If constant, output low-order five bits. Otherwise, write
18575 normally. */
18576 if (INT_P (x))
18577 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18578 else
18579 print_operand (file, x, 0);
18580 return;
18582 case 'H':
18583 /* If constant, output low-order six bits. Otherwise, write
18584 normally. */
18585 if (INT_P (x))
18586 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18587 else
18588 print_operand (file, x, 0);
18589 return;
18591 case 'I':
18592 /* Print `i' if this is a constant, else nothing. */
18593 if (INT_P (x))
18594 putc ('i', file);
18595 return;
18597 case 'j':
18598 /* Write the bit number in CCR for jump. */
18599 i = ccr_bit (x, 0);
18600 if (i == -1)
18601 output_operand_lossage ("invalid %%j code");
18602 else
18603 fprintf (file, "%d", i);
18604 return;
18606 case 'J':
18607 /* Similar, but add one for shift count in rlinm for scc and pass
18608 scc flag to `ccr_bit'. */
18609 i = ccr_bit (x, 1);
18610 if (i == -1)
18611 output_operand_lossage ("invalid %%J code");
18612 else
18613 /* If we want bit 31, write a shift count of zero, not 32. */
18614 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18615 return;
18617 case 'k':
18618 /* X must be a constant. Write the 1's complement of the
18619 constant. */
18620 if (! INT_P (x))
18621 output_operand_lossage ("invalid %%k value");
18622 else
18623 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18624 return;
18626 case 'K':
18627 /* X must be a symbolic constant on ELF. Write an
18628 expression suitable for an 'addi' that adds in the low 16
18629 bits of the MEM. */
18630 if (GET_CODE (x) == CONST)
18632 if (GET_CODE (XEXP (x, 0)) != PLUS
18633 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18634 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18635 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18636 output_operand_lossage ("invalid %%K value");
18638 print_operand_address (file, x);
18639 fputs ("@l", file);
18640 return;
18642 /* %l is output_asm_label. */
18644 case 'L':
18645 /* Write second word of DImode or DFmode reference. Works on register
18646 or non-indexed memory only. */
18647 if (REG_P (x))
18648 fputs (reg_names[REGNO (x) + 1], file);
18649 else if (MEM_P (x))
18651 /* Handle possible auto-increment. Since it is pre-increment and
18652 we have already done it, we can just use an offset of word. */
18653 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18654 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18655 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18656 UNITS_PER_WORD));
18657 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18658 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18659 UNITS_PER_WORD));
18660 else
18661 output_address (XEXP (adjust_address_nv (x, SImode,
18662 UNITS_PER_WORD),
18663 0));
18665 if (small_data_operand (x, GET_MODE (x)))
18666 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18667 reg_names[SMALL_DATA_REG]);
18669 return;
18671 case 'm':
18672 /* MB value for a mask operand. */
18673 if (! mask_operand (x, SImode))
18674 output_operand_lossage ("invalid %%m value");
18676 fprintf (file, "%d", extract_MB (x));
18677 return;
18679 case 'M':
18680 /* ME value for a mask operand. */
18681 if (! mask_operand (x, SImode))
18682 output_operand_lossage ("invalid %%M value");
18684 fprintf (file, "%d", extract_ME (x));
18685 return;
18687 /* %n outputs the negative of its operand. */
18689 case 'N':
18690 /* Write the number of elements in the vector times 4. */
18691 if (GET_CODE (x) != PARALLEL)
18692 output_operand_lossage ("invalid %%N value");
18693 else
18694 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18695 return;
18697 case 'O':
18698 /* Similar, but subtract 1 first. */
18699 if (GET_CODE (x) != PARALLEL)
18700 output_operand_lossage ("invalid %%O value");
18701 else
18702 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18703 return;
18705 case 'p':
18706 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18707 if (! INT_P (x)
18708 || INTVAL (x) < 0
18709 || (i = exact_log2 (INTVAL (x))) < 0)
18710 output_operand_lossage ("invalid %%p value");
18711 else
18712 fprintf (file, "%d", i);
18713 return;
18715 case 'P':
18716 /* The operand must be an indirect memory reference. The result
18717 is the register name. */
18718 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18719 || REGNO (XEXP (x, 0)) >= 32)
18720 output_operand_lossage ("invalid %%P value");
18721 else
18722 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18723 return;
18725 case 'q':
18726 /* This outputs the logical code corresponding to a boolean
18727 expression. The expression may have one or both operands
18728 negated (if one, only the first one). For condition register
18729 logical operations, it will also treat the negated
18730 CR codes as NOTs, but not handle NOTs of them. */
18732 const char *const *t = 0;
18733 const char *s;
18734 enum rtx_code code = GET_CODE (x);
18735 static const char * const tbl[3][3] = {
18736 { "and", "andc", "nor" },
18737 { "or", "orc", "nand" },
18738 { "xor", "eqv", "xor" } };
18740 if (code == AND)
18741 t = tbl[0];
18742 else if (code == IOR)
18743 t = tbl[1];
18744 else if (code == XOR)
18745 t = tbl[2];
18746 else
18747 output_operand_lossage ("invalid %%q value");
18749 if (GET_CODE (XEXP (x, 0)) != NOT)
18750 s = t[0];
18751 else
18753 if (GET_CODE (XEXP (x, 1)) == NOT)
18754 s = t[2];
18755 else
18756 s = t[1];
18759 fputs (s, file);
18761 return;
18763 case 'Q':
18764 if (! TARGET_MFCRF)
18765 return;
18766 fputc (',', file);
18767 /* FALLTHRU */
18769 case 'R':
18770 /* X is a CR register. Print the mask for `mtcrf'. */
18771 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18772 output_operand_lossage ("invalid %%R value");
18773 else
18774 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18775 return;
18777 case 's':
18778 /* Low 5 bits of 32 - value */
18779 if (! INT_P (x))
18780 output_operand_lossage ("invalid %%s value");
18781 else
18782 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18783 return;
18785 case 'S':
18786 /* PowerPC64 mask position. All 0's is excluded.
18787 CONST_INT 32-bit mask is considered sign-extended so any
18788 transition must occur within the CONST_INT, not on the boundary. */
18789 if (! mask64_operand (x, DImode))
18790 output_operand_lossage ("invalid %%S value");
18792 uval = INTVAL (x);
18794 if (uval & 1) /* Clear Left */
18796 #if HOST_BITS_PER_WIDE_INT > 64
18797 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18798 #endif
18799 i = 64;
18801 else /* Clear Right */
18803 uval = ~uval;
18804 #if HOST_BITS_PER_WIDE_INT > 64
18805 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18806 #endif
18807 i = 63;
18809 while (uval != 0)
18810 --i, uval >>= 1;
18811 gcc_assert (i >= 0);
18812 fprintf (file, "%d", i);
18813 return;
18815 case 't':
18816 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18817 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18819 /* Bit 3 is OV bit. */
18820 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18822 /* If we want bit 31, write a shift count of zero, not 32. */
18823 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18824 return;
18826 case 'T':
18827 /* Print the symbolic name of a branch target register. */
18828 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18829 && REGNO (x) != CTR_REGNO))
18830 output_operand_lossage ("invalid %%T value");
18831 else if (REGNO (x) == LR_REGNO)
18832 fputs ("lr", file);
18833 else
18834 fputs ("ctr", file);
18835 return;
18837 case 'u':
18838 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18839 for use in unsigned operand. */
18840 if (! INT_P (x))
18842 output_operand_lossage ("invalid %%u value");
18843 return;
18846 uval = INTVAL (x);
18847 if ((uval & 0xffff) == 0)
18848 uval >>= 16;
18850 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18851 return;
18853 case 'v':
18854 /* High-order 16 bits of constant for use in signed operand. */
18855 if (! INT_P (x))
18856 output_operand_lossage ("invalid %%v value");
18857 else
18858 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18859 (INTVAL (x) >> 16) & 0xffff);
18860 return;
18862 case 'U':
18863 /* Print `u' if this has an auto-increment or auto-decrement. */
18864 if (MEM_P (x)
18865 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18866 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18867 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18868 putc ('u', file);
18869 return;
18871 case 'V':
18872 /* Print the trap code for this operand. */
18873 switch (GET_CODE (x))
18875 case EQ:
18876 fputs ("eq", file); /* 4 */
18877 break;
18878 case NE:
18879 fputs ("ne", file); /* 24 */
18880 break;
18881 case LT:
18882 fputs ("lt", file); /* 16 */
18883 break;
18884 case LE:
18885 fputs ("le", file); /* 20 */
18886 break;
18887 case GT:
18888 fputs ("gt", file); /* 8 */
18889 break;
18890 case GE:
18891 fputs ("ge", file); /* 12 */
18892 break;
18893 case LTU:
18894 fputs ("llt", file); /* 2 */
18895 break;
18896 case LEU:
18897 fputs ("lle", file); /* 6 */
18898 break;
18899 case GTU:
18900 fputs ("lgt", file); /* 1 */
18901 break;
18902 case GEU:
18903 fputs ("lge", file); /* 5 */
18904 break;
18905 default:
18906 gcc_unreachable ();
18908 break;
18910 case 'w':
18911 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18912 normally. */
18913 if (INT_P (x))
18914 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18915 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18916 else
18917 print_operand (file, x, 0);
18918 return;
18920 case 'W':
18921 /* MB value for a PowerPC64 rldic operand. */
18922 i = clz_hwi (INTVAL (x));
18924 fprintf (file, "%d", i);
18925 return;
18927 case 'x':
18928 /* X is a FPR or Altivec register used in a VSX context. */
18929 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18930 output_operand_lossage ("invalid %%x value");
18931 else
18933 int reg = REGNO (x);
18934 int vsx_reg = (FP_REGNO_P (reg)
18935 ? reg - 32
18936 : reg - FIRST_ALTIVEC_REGNO + 32);
18938 #ifdef TARGET_REGNAMES
18939 if (TARGET_REGNAMES)
18940 fprintf (file, "%%vs%d", vsx_reg);
18941 else
18942 #endif
18943 fprintf (file, "%d", vsx_reg);
18945 return;
18947 case 'X':
18948 if (MEM_P (x)
18949 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18950 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18951 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18952 putc ('x', file);
18953 return;
18955 case 'Y':
18956 /* Like 'L', for third word of TImode/PTImode */
18957 if (REG_P (x))
18958 fputs (reg_names[REGNO (x) + 2], file);
18959 else if (MEM_P (x))
18961 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18962 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18963 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18964 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18965 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18966 else
18967 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18968 if (small_data_operand (x, GET_MODE (x)))
18969 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18970 reg_names[SMALL_DATA_REG]);
18972 return;
18974 case 'z':
18975 /* X is a SYMBOL_REF. Write out the name preceded by a
18976 period and without any trailing data in brackets. Used for function
18977 names. If we are configured for System V (or the embedded ABI) on
18978 the PowerPC, do not emit the period, since those systems do not use
18979 TOCs and the like. */
18980 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18982 /* For macho, check to see if we need a stub. */
18983 if (TARGET_MACHO)
18985 const char *name = XSTR (x, 0);
18986 #if TARGET_MACHO
18987 if (darwin_emit_branch_islands
18988 && MACHOPIC_INDIRECT
18989 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18990 name = machopic_indirection_name (x, /*stub_p=*/true);
18991 #endif
18992 assemble_name (file, name);
18994 else if (!DOT_SYMBOLS)
18995 assemble_name (file, XSTR (x, 0));
18996 else
18997 rs6000_output_function_entry (file, XSTR (x, 0));
18998 return;
19000 case 'Z':
19001 /* Like 'L', for last word of TImode/PTImode. */
19002 if (REG_P (x))
19003 fputs (reg_names[REGNO (x) + 3], file);
19004 else if (MEM_P (x))
19006 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19007 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19008 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
19009 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19010 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
19011 else
19012 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
19013 if (small_data_operand (x, GET_MODE (x)))
19014 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19015 reg_names[SMALL_DATA_REG]);
19017 return;
19019 /* Print AltiVec or SPE memory operand. */
19020 case 'y':
19022 rtx tmp;
19024 gcc_assert (MEM_P (x));
19026 tmp = XEXP (x, 0);
19028 /* Ugly hack because %y is overloaded. */
19029 if ((TARGET_SPE || TARGET_E500_DOUBLE)
19030 && (GET_MODE_SIZE (GET_MODE (x)) == 8
19031 || GET_MODE (x) == TFmode
19032 || GET_MODE (x) == TImode
19033 || GET_MODE (x) == PTImode))
19035 /* Handle [reg]. */
19036 if (REG_P (tmp))
19038 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
19039 break;
19041 /* Handle [reg+UIMM]. */
19042 else if (GET_CODE (tmp) == PLUS &&
19043 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
19045 int x;
19047 gcc_assert (REG_P (XEXP (tmp, 0)));
19049 x = INTVAL (XEXP (tmp, 1));
19050 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
19051 break;
19054 /* Fall through. Must be [reg+reg]. */
19056 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
19057 && GET_CODE (tmp) == AND
19058 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
19059 && INTVAL (XEXP (tmp, 1)) == -16)
19060 tmp = XEXP (tmp, 0);
19061 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
19062 && GET_CODE (tmp) == PRE_MODIFY)
19063 tmp = XEXP (tmp, 1);
19064 if (REG_P (tmp))
19065 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
19066 else
19068 if (GET_CODE (tmp) != PLUS
19069 || !REG_P (XEXP (tmp, 0))
19070 || !REG_P (XEXP (tmp, 1)))
19072 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
19073 break;
19076 if (REGNO (XEXP (tmp, 0)) == 0)
19077 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
19078 reg_names[ REGNO (XEXP (tmp, 0)) ]);
19079 else
19080 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
19081 reg_names[ REGNO (XEXP (tmp, 1)) ]);
19083 break;
19086 case 0:
19087 if (REG_P (x))
19088 fprintf (file, "%s", reg_names[REGNO (x)]);
19089 else if (MEM_P (x))
19091 /* We need to handle PRE_INC and PRE_DEC here, since we need to
19092 know the width from the mode. */
19093 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
19094 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
19095 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
19096 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
19097 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
19098 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
19099 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19100 output_address (XEXP (XEXP (x, 0), 1));
19101 else
19102 output_address (XEXP (x, 0));
19104 else
19106 if (toc_relative_expr_p (x, false))
19107 /* This hack along with a corresponding hack in
19108 rs6000_output_addr_const_extra arranges to output addends
19109 where the assembler expects to find them. eg.
19110 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
19111 without this hack would be output as "x@toc+4". We
19112 want "x+4@toc". */
19113 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19114 else
19115 output_addr_const (file, x);
19117 return;
19119 case '&':
19120 if (const char *name = get_some_local_dynamic_name ())
19121 assemble_name (file, name);
19122 else
19123 output_operand_lossage ("'%%&' used without any "
19124 "local dynamic TLS references");
19125 return;
19127 default:
19128 output_operand_lossage ("invalid %%xn code");
19132 /* Print the address of an operand. */
19134 void
19135 print_operand_address (FILE *file, rtx x)
19137 if (REG_P (x))
19138 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
19139 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
19140 || GET_CODE (x) == LABEL_REF)
19142 output_addr_const (file, x);
19143 if (small_data_operand (x, GET_MODE (x)))
19144 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19145 reg_names[SMALL_DATA_REG]);
19146 else
19147 gcc_assert (!TARGET_TOC);
19149 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19150 && REG_P (XEXP (x, 1)))
19152 if (REGNO (XEXP (x, 0)) == 0)
19153 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
19154 reg_names[ REGNO (XEXP (x, 0)) ]);
19155 else
19156 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
19157 reg_names[ REGNO (XEXP (x, 1)) ]);
19159 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19160 && GET_CODE (XEXP (x, 1)) == CONST_INT)
19161 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
19162 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
19163 #if TARGET_MACHO
19164 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19165 && CONSTANT_P (XEXP (x, 1)))
19167 fprintf (file, "lo16(");
19168 output_addr_const (file, XEXP (x, 1));
19169 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19171 #endif
19172 #if TARGET_ELF
19173 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19174 && CONSTANT_P (XEXP (x, 1)))
19176 output_addr_const (file, XEXP (x, 1));
19177 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19179 #endif
19180 else if (toc_relative_expr_p (x, false))
19182 /* This hack along with a corresponding hack in
19183 rs6000_output_addr_const_extra arranges to output addends
19184 where the assembler expects to find them. eg.
19185 (lo_sum (reg 9)
19186 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
19187 without this hack would be output as "x@toc+8@l(9)". We
19188 want "x+8@toc@l(9)". */
19189 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19190 if (GET_CODE (x) == LO_SUM)
19191 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
19192 else
19193 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
19195 else
19196 gcc_unreachable ();
19199 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
19201 static bool
19202 rs6000_output_addr_const_extra (FILE *file, rtx x)
19204 if (GET_CODE (x) == UNSPEC)
19205 switch (XINT (x, 1))
19207 case UNSPEC_TOCREL:
19208 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
19209 && REG_P (XVECEXP (x, 0, 1))
19210 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
19211 output_addr_const (file, XVECEXP (x, 0, 0));
19212 if (x == tocrel_base && tocrel_offset != const0_rtx)
19214 if (INTVAL (tocrel_offset) >= 0)
19215 fprintf (file, "+");
19216 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
19218 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
19220 putc ('-', file);
19221 assemble_name (file, toc_label_name);
19223 else if (TARGET_ELF)
19224 fputs ("@toc", file);
19225 return true;
19227 #if TARGET_MACHO
19228 case UNSPEC_MACHOPIC_OFFSET:
19229 output_addr_const (file, XVECEXP (x, 0, 0));
19230 putc ('-', file);
19231 machopic_output_function_base_name (file);
19232 return true;
19233 #endif
19235 return false;
19238 /* Target hook for assembling integer objects. The PowerPC version has
19239 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19240 is defined. It also needs to handle DI-mode objects on 64-bit
19241 targets. */
19243 static bool
19244 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19246 #ifdef RELOCATABLE_NEEDS_FIXUP
19247 /* Special handling for SI values. */
19248 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19250 static int recurse = 0;
19252 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19253 the .fixup section. Since the TOC section is already relocated, we
19254 don't need to mark it here. We used to skip the text section, but it
19255 should never be valid for relocated addresses to be placed in the text
19256 section. */
19257 if (TARGET_RELOCATABLE
19258 && in_section != toc_section
19259 && !recurse
19260 && !CONST_SCALAR_INT_P (x)
19261 && CONSTANT_P (x))
19263 char buf[256];
19265 recurse = 1;
19266 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19267 fixuplabelno++;
19268 ASM_OUTPUT_LABEL (asm_out_file, buf);
19269 fprintf (asm_out_file, "\t.long\t(");
19270 output_addr_const (asm_out_file, x);
19271 fprintf (asm_out_file, ")@fixup\n");
19272 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19273 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19274 fprintf (asm_out_file, "\t.long\t");
19275 assemble_name (asm_out_file, buf);
19276 fprintf (asm_out_file, "\n\t.previous\n");
19277 recurse = 0;
19278 return true;
19280 /* Remove initial .'s to turn a -mcall-aixdesc function
19281 address into the address of the descriptor, not the function
19282 itself. */
19283 else if (GET_CODE (x) == SYMBOL_REF
19284 && XSTR (x, 0)[0] == '.'
19285 && DEFAULT_ABI == ABI_AIX)
19287 const char *name = XSTR (x, 0);
19288 while (*name == '.')
19289 name++;
19291 fprintf (asm_out_file, "\t.long\t%s\n", name);
19292 return true;
19295 #endif /* RELOCATABLE_NEEDS_FIXUP */
19296 return default_assemble_integer (x, size, aligned_p);
19299 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19300 /* Emit an assembler directive to set symbol visibility for DECL to
19301 VISIBILITY_TYPE. */
19303 static void
19304 rs6000_assemble_visibility (tree decl, int vis)
19306 if (TARGET_XCOFF)
19307 return;
19309 /* Functions need to have their entry point symbol visibility set as
19310 well as their descriptor symbol visibility. */
19311 if (DEFAULT_ABI == ABI_AIX
19312 && DOT_SYMBOLS
19313 && TREE_CODE (decl) == FUNCTION_DECL)
19315 static const char * const visibility_types[] = {
19316 NULL, "internal", "hidden", "protected"
19319 const char *name, *type;
19321 name = ((* targetm.strip_name_encoding)
19322 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19323 type = visibility_types[vis];
19325 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19326 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19328 else
19329 default_assemble_visibility (decl, vis);
19331 #endif
19333 enum rtx_code
19334 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19336 /* Reversal of FP compares takes care -- an ordered compare
19337 becomes an unordered compare and vice versa. */
19338 if (mode == CCFPmode
19339 && (!flag_finite_math_only
19340 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19341 || code == UNEQ || code == LTGT))
19342 return reverse_condition_maybe_unordered (code);
19343 else
19344 return reverse_condition (code);
19347 /* Generate a compare for CODE. Return a brand-new rtx that
19348 represents the result of the compare. */
19350 static rtx
19351 rs6000_generate_compare (rtx cmp, machine_mode mode)
19353 machine_mode comp_mode;
19354 rtx compare_result;
19355 enum rtx_code code = GET_CODE (cmp);
19356 rtx op0 = XEXP (cmp, 0);
19357 rtx op1 = XEXP (cmp, 1);
19359 if (FLOAT_MODE_P (mode))
19360 comp_mode = CCFPmode;
19361 else if (code == GTU || code == LTU
19362 || code == GEU || code == LEU)
19363 comp_mode = CCUNSmode;
19364 else if ((code == EQ || code == NE)
19365 && unsigned_reg_p (op0)
19366 && (unsigned_reg_p (op1)
19367 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19368 /* These are unsigned values, perhaps there will be a later
19369 ordering compare that can be shared with this one. */
19370 comp_mode = CCUNSmode;
19371 else
19372 comp_mode = CCmode;
19374 /* If we have an unsigned compare, make sure we don't have a signed value as
19375 an immediate. */
19376 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19377 && INTVAL (op1) < 0)
19379 op0 = copy_rtx_if_shared (op0);
19380 op1 = force_reg (GET_MODE (op0), op1);
19381 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19384 /* First, the compare. */
19385 compare_result = gen_reg_rtx (comp_mode);
19387 /* E500 FP compare instructions on the GPRs. Yuck! */
19388 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19389 && FLOAT_MODE_P (mode))
19391 rtx cmp, or_result, compare_result2;
19392 machine_mode op_mode = GET_MODE (op0);
19393 bool reverse_p;
19395 if (op_mode == VOIDmode)
19396 op_mode = GET_MODE (op1);
19398 /* First reverse the condition codes that aren't directly supported. */
19399 switch (code)
19401 case NE:
19402 case UNLT:
19403 case UNLE:
19404 case UNGT:
19405 case UNGE:
19406 code = reverse_condition_maybe_unordered (code);
19407 reverse_p = true;
19408 break;
19410 case EQ:
19411 case LT:
19412 case LE:
19413 case GT:
19414 case GE:
19415 reverse_p = false;
19416 break;
19418 default:
19419 gcc_unreachable ();
19422 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19423 This explains the following mess. */
19425 switch (code)
19427 case EQ:
19428 switch (op_mode)
19430 case SFmode:
19431 cmp = (flag_finite_math_only && !flag_trapping_math)
19432 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19433 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19434 break;
19436 case DFmode:
19437 cmp = (flag_finite_math_only && !flag_trapping_math)
19438 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19439 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19440 break;
19442 case TFmode:
19443 cmp = (flag_finite_math_only && !flag_trapping_math)
19444 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19445 : gen_cmptfeq_gpr (compare_result, op0, op1);
19446 break;
19448 default:
19449 gcc_unreachable ();
19451 break;
19453 case GT:
19454 case GE:
19455 switch (op_mode)
19457 case SFmode:
19458 cmp = (flag_finite_math_only && !flag_trapping_math)
19459 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19460 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19461 break;
19463 case DFmode:
19464 cmp = (flag_finite_math_only && !flag_trapping_math)
19465 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19466 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19467 break;
19469 case TFmode:
19470 cmp = (flag_finite_math_only && !flag_trapping_math)
19471 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19472 : gen_cmptfgt_gpr (compare_result, op0, op1);
19473 break;
19475 default:
19476 gcc_unreachable ();
19478 break;
19480 case LT:
19481 case LE:
19482 switch (op_mode)
19484 case SFmode:
19485 cmp = (flag_finite_math_only && !flag_trapping_math)
19486 ? gen_tstsflt_gpr (compare_result, op0, op1)
19487 : gen_cmpsflt_gpr (compare_result, op0, op1);
19488 break;
19490 case DFmode:
19491 cmp = (flag_finite_math_only && !flag_trapping_math)
19492 ? gen_tstdflt_gpr (compare_result, op0, op1)
19493 : gen_cmpdflt_gpr (compare_result, op0, op1);
19494 break;
19496 case TFmode:
19497 cmp = (flag_finite_math_only && !flag_trapping_math)
19498 ? gen_tsttflt_gpr (compare_result, op0, op1)
19499 : gen_cmptflt_gpr (compare_result, op0, op1);
19500 break;
19502 default:
19503 gcc_unreachable ();
19505 break;
19507 default:
19508 gcc_unreachable ();
19511 /* Synthesize LE and GE from LT/GT || EQ. */
19512 if (code == LE || code == GE)
19514 emit_insn (cmp);
19516 compare_result2 = gen_reg_rtx (CCFPmode);
19518 /* Do the EQ. */
19519 switch (op_mode)
19521 case SFmode:
19522 cmp = (flag_finite_math_only && !flag_trapping_math)
19523 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19524 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19525 break;
19527 case DFmode:
19528 cmp = (flag_finite_math_only && !flag_trapping_math)
19529 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19530 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19531 break;
19533 case TFmode:
19534 cmp = (flag_finite_math_only && !flag_trapping_math)
19535 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19536 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19537 break;
19539 default:
19540 gcc_unreachable ();
19543 emit_insn (cmp);
19545 /* OR them together. */
19546 or_result = gen_reg_rtx (CCFPmode);
19547 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19548 compare_result2);
19549 compare_result = or_result;
19552 code = reverse_p ? NE : EQ;
19554 emit_insn (cmp);
19556 else
19558 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19559 CLOBBERs to match cmptf_internal2 pattern. */
19560 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19561 && GET_MODE (op0) == TFmode
19562 && !TARGET_IEEEQUAD
19563 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19564 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19565 gen_rtvec (10,
19566 gen_rtx_SET (compare_result,
19567 gen_rtx_COMPARE (comp_mode, op0, op1)),
19568 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19569 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19570 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19571 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19572 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19573 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19574 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19575 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19576 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19577 else if (GET_CODE (op1) == UNSPEC
19578 && XINT (op1, 1) == UNSPEC_SP_TEST)
19580 rtx op1b = XVECEXP (op1, 0, 0);
19581 comp_mode = CCEQmode;
19582 compare_result = gen_reg_rtx (CCEQmode);
19583 if (TARGET_64BIT)
19584 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19585 else
19586 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19588 else
19589 emit_insn (gen_rtx_SET (compare_result,
19590 gen_rtx_COMPARE (comp_mode, op0, op1)));
19593 /* Some kinds of FP comparisons need an OR operation;
19594 under flag_finite_math_only we don't bother. */
19595 if (FLOAT_MODE_P (mode)
19596 && !flag_finite_math_only
19597 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19598 && (code == LE || code == GE
19599 || code == UNEQ || code == LTGT
19600 || code == UNGT || code == UNLT))
19602 enum rtx_code or1, or2;
19603 rtx or1_rtx, or2_rtx, compare2_rtx;
19604 rtx or_result = gen_reg_rtx (CCEQmode);
19606 switch (code)
19608 case LE: or1 = LT; or2 = EQ; break;
19609 case GE: or1 = GT; or2 = EQ; break;
19610 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19611 case LTGT: or1 = LT; or2 = GT; break;
19612 case UNGT: or1 = UNORDERED; or2 = GT; break;
19613 case UNLT: or1 = UNORDERED; or2 = LT; break;
19614 default: gcc_unreachable ();
19616 validate_condition_mode (or1, comp_mode);
19617 validate_condition_mode (or2, comp_mode);
19618 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19619 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19620 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19621 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19622 const_true_rtx);
19623 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
19625 compare_result = or_result;
19626 code = EQ;
19629 validate_condition_mode (code, GET_MODE (compare_result));
19631 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19635 /* Emit the RTL for an sISEL pattern. */
19637 void
19638 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19640 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19643 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
19644 can be used as that dest register. Return the dest register. */
19647 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
19649 if (op2 == const0_rtx)
19650 return op1;
19652 if (GET_CODE (scratch) == SCRATCH)
19653 scratch = gen_reg_rtx (mode);
19655 if (logical_operand (op2, mode))
19656 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
19657 else
19658 emit_insn (gen_rtx_SET (scratch,
19659 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
19661 return scratch;
19664 void
19665 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19667 rtx condition_rtx;
19668 machine_mode op_mode;
19669 enum rtx_code cond_code;
19670 rtx result = operands[0];
19672 condition_rtx = rs6000_generate_compare (operands[1], mode);
19673 cond_code = GET_CODE (condition_rtx);
19675 if (FLOAT_MODE_P (mode)
19676 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19678 rtx t;
19680 PUT_MODE (condition_rtx, SImode);
19681 t = XEXP (condition_rtx, 0);
19683 gcc_assert (cond_code == NE || cond_code == EQ);
19685 if (cond_code == NE)
19686 emit_insn (gen_e500_flip_gt_bit (t, t));
19688 emit_insn (gen_move_from_CR_gt_bit (result, t));
19689 return;
19692 if (cond_code == NE
19693 || cond_code == GE || cond_code == LE
19694 || cond_code == GEU || cond_code == LEU
19695 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19697 rtx not_result = gen_reg_rtx (CCEQmode);
19698 rtx not_op, rev_cond_rtx;
19699 machine_mode cc_mode;
19701 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19703 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19704 SImode, XEXP (condition_rtx, 0), const0_rtx);
19705 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19706 emit_insn (gen_rtx_SET (not_result, not_op));
19707 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19710 op_mode = GET_MODE (XEXP (operands[1], 0));
19711 if (op_mode == VOIDmode)
19712 op_mode = GET_MODE (XEXP (operands[1], 1));
19714 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19716 PUT_MODE (condition_rtx, DImode);
19717 convert_move (result, condition_rtx, 0);
19719 else
19721 PUT_MODE (condition_rtx, SImode);
19722 emit_insn (gen_rtx_SET (result, condition_rtx));
19726 /* Emit a branch of kind CODE to location LOC. */
19728 void
19729 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19731 rtx condition_rtx, loc_ref;
19733 condition_rtx = rs6000_generate_compare (operands[0], mode);
19734 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19735 emit_jump_insn (gen_rtx_SET (pc_rtx,
19736 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19737 loc_ref, pc_rtx)));
19740 /* Return the string to output a conditional branch to LABEL, which is
19741 the operand template of the label, or NULL if the branch is really a
19742 conditional return.
19744 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19745 condition code register and its mode specifies what kind of
19746 comparison we made.
19748 REVERSED is nonzero if we should reverse the sense of the comparison.
19750 INSN is the insn. */
19752 char *
19753 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19755 static char string[64];
19756 enum rtx_code code = GET_CODE (op);
19757 rtx cc_reg = XEXP (op, 0);
19758 machine_mode mode = GET_MODE (cc_reg);
19759 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19760 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19761 int really_reversed = reversed ^ need_longbranch;
19762 char *s = string;
19763 const char *ccode;
19764 const char *pred;
19765 rtx note;
19767 validate_condition_mode (code, mode);
19769 /* Work out which way this really branches. We could use
19770 reverse_condition_maybe_unordered here always but this
19771 makes the resulting assembler clearer. */
19772 if (really_reversed)
19774 /* Reversal of FP compares takes care -- an ordered compare
19775 becomes an unordered compare and vice versa. */
19776 if (mode == CCFPmode)
19777 code = reverse_condition_maybe_unordered (code);
19778 else
19779 code = reverse_condition (code);
19782 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19784 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19785 to the GT bit. */
19786 switch (code)
19788 case EQ:
19789 /* Opposite of GT. */
19790 code = GT;
19791 break;
19793 case NE:
19794 code = UNLE;
19795 break;
19797 default:
19798 gcc_unreachable ();
19802 switch (code)
19804 /* Not all of these are actually distinct opcodes, but
19805 we distinguish them for clarity of the resulting assembler. */
19806 case NE: case LTGT:
19807 ccode = "ne"; break;
19808 case EQ: case UNEQ:
19809 ccode = "eq"; break;
19810 case GE: case GEU:
19811 ccode = "ge"; break;
19812 case GT: case GTU: case UNGT:
19813 ccode = "gt"; break;
19814 case LE: case LEU:
19815 ccode = "le"; break;
19816 case LT: case LTU: case UNLT:
19817 ccode = "lt"; break;
19818 case UNORDERED: ccode = "un"; break;
19819 case ORDERED: ccode = "nu"; break;
19820 case UNGE: ccode = "nl"; break;
19821 case UNLE: ccode = "ng"; break;
19822 default:
19823 gcc_unreachable ();
19826 /* Maybe we have a guess as to how likely the branch is. */
19827 pred = "";
19828 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19829 if (note != NULL_RTX)
19831 /* PROB is the difference from 50%. */
19832 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19834 /* Only hint for highly probable/improbable branches on newer
19835 cpus as static prediction overrides processor dynamic
19836 prediction. For older cpus we may as well always hint, but
19837 assume not taken for branches that are very close to 50% as a
19838 mispredicted taken branch is more expensive than a
19839 mispredicted not-taken branch. */
19840 if (rs6000_always_hint
19841 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19842 && br_prob_note_reliable_p (note)))
19844 if (abs (prob) > REG_BR_PROB_BASE / 20
19845 && ((prob > 0) ^ need_longbranch))
19846 pred = "+";
19847 else
19848 pred = "-";
19852 if (label == NULL)
19853 s += sprintf (s, "b%slr%s ", ccode, pred);
19854 else
19855 s += sprintf (s, "b%s%s ", ccode, pred);
19857 /* We need to escape any '%' characters in the reg_names string.
19858 Assume they'd only be the first character.... */
19859 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19860 *s++ = '%';
19861 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19863 if (label != NULL)
19865 /* If the branch distance was too far, we may have to use an
19866 unconditional branch to go the distance. */
19867 if (need_longbranch)
19868 s += sprintf (s, ",$+8\n\tb %s", label);
19869 else
19870 s += sprintf (s, ",%s", label);
19873 return string;
19876 /* Return the string to flip the GT bit on a CR. */
19877 char *
19878 output_e500_flip_gt_bit (rtx dst, rtx src)
19880 static char string[64];
19881 int a, b;
19883 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19884 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19886 /* GT bit. */
19887 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19888 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19890 sprintf (string, "crnot %d,%d", a, b);
19891 return string;
19894 /* Return insn for VSX or Altivec comparisons. */
19896 static rtx
19897 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19899 rtx mask;
19900 machine_mode mode = GET_MODE (op0);
19902 switch (code)
19904 default:
19905 break;
19907 case GE:
19908 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19909 return NULL_RTX;
19911 case EQ:
19912 case GT:
19913 case GTU:
19914 case ORDERED:
19915 case UNORDERED:
19916 case UNEQ:
19917 case LTGT:
19918 mask = gen_reg_rtx (mode);
19919 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
19920 return mask;
19923 return NULL_RTX;
19926 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19927 DMODE is expected destination mode. This is a recursive function. */
19929 static rtx
19930 rs6000_emit_vector_compare (enum rtx_code rcode,
19931 rtx op0, rtx op1,
19932 machine_mode dmode)
19934 rtx mask;
19935 bool swap_operands = false;
19936 bool try_again = false;
19938 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19939 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19941 /* See if the comparison works as is. */
19942 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19943 if (mask)
19944 return mask;
19946 switch (rcode)
19948 case LT:
19949 rcode = GT;
19950 swap_operands = true;
19951 try_again = true;
19952 break;
19953 case LTU:
19954 rcode = GTU;
19955 swap_operands = true;
19956 try_again = true;
19957 break;
19958 case NE:
19959 case UNLE:
19960 case UNLT:
19961 case UNGE:
19962 case UNGT:
19963 /* Invert condition and try again.
19964 e.g., A != B becomes ~(A==B). */
19966 enum rtx_code rev_code;
19967 enum insn_code nor_code;
19968 rtx mask2;
19970 rev_code = reverse_condition_maybe_unordered (rcode);
19971 if (rev_code == UNKNOWN)
19972 return NULL_RTX;
19974 nor_code = optab_handler (one_cmpl_optab, dmode);
19975 if (nor_code == CODE_FOR_nothing)
19976 return NULL_RTX;
19978 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19979 if (!mask2)
19980 return NULL_RTX;
19982 mask = gen_reg_rtx (dmode);
19983 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19984 return mask;
19986 break;
19987 case GE:
19988 case GEU:
19989 case LE:
19990 case LEU:
19991 /* Try GT/GTU/LT/LTU OR EQ */
19993 rtx c_rtx, eq_rtx;
19994 enum insn_code ior_code;
19995 enum rtx_code new_code;
19997 switch (rcode)
19999 case GE:
20000 new_code = GT;
20001 break;
20003 case GEU:
20004 new_code = GTU;
20005 break;
20007 case LE:
20008 new_code = LT;
20009 break;
20011 case LEU:
20012 new_code = LTU;
20013 break;
20015 default:
20016 gcc_unreachable ();
20019 ior_code = optab_handler (ior_optab, dmode);
20020 if (ior_code == CODE_FOR_nothing)
20021 return NULL_RTX;
20023 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
20024 if (!c_rtx)
20025 return NULL_RTX;
20027 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
20028 if (!eq_rtx)
20029 return NULL_RTX;
20031 mask = gen_reg_rtx (dmode);
20032 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
20033 return mask;
20035 break;
20036 default:
20037 return NULL_RTX;
20040 if (try_again)
20042 if (swap_operands)
20043 std::swap (op0, op1);
20045 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
20046 if (mask)
20047 return mask;
20050 /* You only get two chances. */
20051 return NULL_RTX;
20054 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
20055 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
20056 operands for the relation operation COND. */
20059 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
20060 rtx cond, rtx cc_op0, rtx cc_op1)
20062 machine_mode dest_mode = GET_MODE (dest);
20063 machine_mode mask_mode = GET_MODE (cc_op0);
20064 enum rtx_code rcode = GET_CODE (cond);
20065 machine_mode cc_mode = CCmode;
20066 rtx mask;
20067 rtx cond2;
20068 rtx tmp;
20069 bool invert_move = false;
20071 if (VECTOR_UNIT_NONE_P (dest_mode))
20072 return 0;
20074 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
20075 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
20077 switch (rcode)
20079 /* Swap operands if we can, and fall back to doing the operation as
20080 specified, and doing a NOR to invert the test. */
20081 case NE:
20082 case UNLE:
20083 case UNLT:
20084 case UNGE:
20085 case UNGT:
20086 /* Invert condition and try again.
20087 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
20088 invert_move = true;
20089 rcode = reverse_condition_maybe_unordered (rcode);
20090 if (rcode == UNKNOWN)
20091 return 0;
20092 break;
20094 /* Mark unsigned tests with CCUNSmode. */
20095 case GTU:
20096 case GEU:
20097 case LTU:
20098 case LEU:
20099 cc_mode = CCUNSmode;
20100 break;
20102 default:
20103 break;
20106 /* Get the vector mask for the given relational operations. */
20107 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
20109 if (!mask)
20110 return 0;
20112 if (invert_move)
20114 tmp = op_true;
20115 op_true = op_false;
20116 op_false = tmp;
20119 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
20120 CONST0_RTX (dest_mode));
20121 emit_insn (gen_rtx_SET (dest,
20122 gen_rtx_IF_THEN_ELSE (dest_mode,
20123 cond2,
20124 op_true,
20125 op_false)));
20126 return 1;
20129 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
20130 operands of the last comparison is nonzero/true, FALSE_COND if it
20131 is zero/false. Return 0 if the hardware has no such operation. */
20134 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20136 enum rtx_code code = GET_CODE (op);
20137 rtx op0 = XEXP (op, 0);
20138 rtx op1 = XEXP (op, 1);
20139 REAL_VALUE_TYPE c1;
20140 machine_mode compare_mode = GET_MODE (op0);
20141 machine_mode result_mode = GET_MODE (dest);
20142 rtx temp;
20143 bool is_against_zero;
20145 /* These modes should always match. */
20146 if (GET_MODE (op1) != compare_mode
20147 /* In the isel case however, we can use a compare immediate, so
20148 op1 may be a small constant. */
20149 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
20150 return 0;
20151 if (GET_MODE (true_cond) != result_mode)
20152 return 0;
20153 if (GET_MODE (false_cond) != result_mode)
20154 return 0;
20156 /* Don't allow using floating point comparisons for integer results for
20157 now. */
20158 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
20159 return 0;
20161 /* First, work out if the hardware can do this at all, or
20162 if it's too slow.... */
20163 if (!FLOAT_MODE_P (compare_mode))
20165 if (TARGET_ISEL)
20166 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
20167 return 0;
20169 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
20170 && SCALAR_FLOAT_MODE_P (compare_mode))
20171 return 0;
20173 is_against_zero = op1 == CONST0_RTX (compare_mode);
20175 /* A floating-point subtract might overflow, underflow, or produce
20176 an inexact result, thus changing the floating-point flags, so it
20177 can't be generated if we care about that. It's safe if one side
20178 of the construct is zero, since then no subtract will be
20179 generated. */
20180 if (SCALAR_FLOAT_MODE_P (compare_mode)
20181 && flag_trapping_math && ! is_against_zero)
20182 return 0;
20184 /* Eliminate half of the comparisons by switching operands, this
20185 makes the remaining code simpler. */
20186 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
20187 || code == LTGT || code == LT || code == UNLE)
20189 code = reverse_condition_maybe_unordered (code);
20190 temp = true_cond;
20191 true_cond = false_cond;
20192 false_cond = temp;
20195 /* UNEQ and LTGT take four instructions for a comparison with zero,
20196 it'll probably be faster to use a branch here too. */
20197 if (code == UNEQ && HONOR_NANS (compare_mode))
20198 return 0;
20200 if (GET_CODE (op1) == CONST_DOUBLE)
20201 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
20203 /* We're going to try to implement comparisons by performing
20204 a subtract, then comparing against zero. Unfortunately,
20205 Inf - Inf is NaN which is not zero, and so if we don't
20206 know that the operand is finite and the comparison
20207 would treat EQ different to UNORDERED, we can't do it. */
20208 if (HONOR_INFINITIES (compare_mode)
20209 && code != GT && code != UNGE
20210 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
20211 /* Constructs of the form (a OP b ? a : b) are safe. */
20212 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
20213 || (! rtx_equal_p (op0, true_cond)
20214 && ! rtx_equal_p (op1, true_cond))))
20215 return 0;
20217 /* At this point we know we can use fsel. */
20219 /* Reduce the comparison to a comparison against zero. */
20220 if (! is_against_zero)
20222 temp = gen_reg_rtx (compare_mode);
20223 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
20224 op0 = temp;
20225 op1 = CONST0_RTX (compare_mode);
20228 /* If we don't care about NaNs we can reduce some of the comparisons
20229 down to faster ones. */
20230 if (! HONOR_NANS (compare_mode))
20231 switch (code)
20233 case GT:
20234 code = LE;
20235 temp = true_cond;
20236 true_cond = false_cond;
20237 false_cond = temp;
20238 break;
20239 case UNGE:
20240 code = GE;
20241 break;
20242 case UNEQ:
20243 code = EQ;
20244 break;
20245 default:
20246 break;
20249 /* Now, reduce everything down to a GE. */
20250 switch (code)
20252 case GE:
20253 break;
20255 case LE:
20256 temp = gen_reg_rtx (compare_mode);
20257 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
20258 op0 = temp;
20259 break;
20261 case ORDERED:
20262 temp = gen_reg_rtx (compare_mode);
20263 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
20264 op0 = temp;
20265 break;
20267 case EQ:
20268 temp = gen_reg_rtx (compare_mode);
20269 emit_insn (gen_rtx_SET (temp,
20270 gen_rtx_NEG (compare_mode,
20271 gen_rtx_ABS (compare_mode, op0))));
20272 op0 = temp;
20273 break;
20275 case UNGE:
20276 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20277 temp = gen_reg_rtx (result_mode);
20278 emit_insn (gen_rtx_SET (temp,
20279 gen_rtx_IF_THEN_ELSE (result_mode,
20280 gen_rtx_GE (VOIDmode,
20281 op0, op1),
20282 true_cond, false_cond)));
20283 false_cond = true_cond;
20284 true_cond = temp;
20286 temp = gen_reg_rtx (compare_mode);
20287 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
20288 op0 = temp;
20289 break;
20291 case GT:
20292 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20293 temp = gen_reg_rtx (result_mode);
20294 emit_insn (gen_rtx_SET (temp,
20295 gen_rtx_IF_THEN_ELSE (result_mode,
20296 gen_rtx_GE (VOIDmode,
20297 op0, op1),
20298 true_cond, false_cond)));
20299 true_cond = false_cond;
20300 false_cond = temp;
20302 temp = gen_reg_rtx (compare_mode);
20303 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
20304 op0 = temp;
20305 break;
20307 default:
20308 gcc_unreachable ();
20311 emit_insn (gen_rtx_SET (dest,
20312 gen_rtx_IF_THEN_ELSE (result_mode,
20313 gen_rtx_GE (VOIDmode,
20314 op0, op1),
20315 true_cond, false_cond)));
20316 return 1;
20319 /* Same as above, but for ints (isel). */
20321 static int
20322 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20324 rtx condition_rtx, cr;
20325 machine_mode mode = GET_MODE (dest);
20326 enum rtx_code cond_code;
20327 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20328 bool signedp;
20330 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20331 return 0;
20333 /* We still have to do the compare, because isel doesn't do a
20334 compare, it just looks at the CRx bits set by a previous compare
20335 instruction. */
20336 condition_rtx = rs6000_generate_compare (op, mode);
20337 cond_code = GET_CODE (condition_rtx);
20338 cr = XEXP (condition_rtx, 0);
20339 signedp = GET_MODE (cr) == CCmode;
20341 isel_func = (mode == SImode
20342 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20343 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20345 switch (cond_code)
20347 case LT: case GT: case LTU: case GTU: case EQ:
20348 /* isel handles these directly. */
20349 break;
20351 default:
20352 /* We need to swap the sense of the comparison. */
20354 std::swap (false_cond, true_cond);
20355 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20357 break;
20360 false_cond = force_reg (mode, false_cond);
20361 if (true_cond != const0_rtx)
20362 true_cond = force_reg (mode, true_cond);
20364 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20366 return 1;
20369 const char *
20370 output_isel (rtx *operands)
20372 enum rtx_code code;
20374 code = GET_CODE (operands[1]);
20376 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20378 gcc_assert (GET_CODE (operands[2]) == REG
20379 && GET_CODE (operands[3]) == REG);
20380 PUT_CODE (operands[1], reverse_condition (code));
20381 return "isel %0,%3,%2,%j1";
20384 return "isel %0,%2,%3,%j1";
20387 void
20388 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20390 machine_mode mode = GET_MODE (op0);
20391 enum rtx_code c;
20392 rtx target;
20394 /* VSX/altivec have direct min/max insns. */
20395 if ((code == SMAX || code == SMIN)
20396 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20397 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20399 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
20400 return;
20403 if (code == SMAX || code == SMIN)
20404 c = GE;
20405 else
20406 c = GEU;
20408 if (code == SMAX || code == UMAX)
20409 target = emit_conditional_move (dest, c, op0, op1, mode,
20410 op0, op1, mode, 0);
20411 else
20412 target = emit_conditional_move (dest, c, op0, op1, mode,
20413 op1, op0, mode, 0);
20414 gcc_assert (target);
20415 if (target != dest)
20416 emit_move_insn (dest, target);
20419 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20420 COND is true. Mark the jump as unlikely to be taken. */
20422 static void
20423 emit_unlikely_jump (rtx cond, rtx label)
20425 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20426 rtx x;
20428 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20429 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
20430 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20433 /* A subroutine of the atomic operation splitters. Emit a load-locked
20434 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20435 the zero_extend operation. */
20437 static void
20438 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20440 rtx (*fn) (rtx, rtx) = NULL;
20442 switch (mode)
20444 case QImode:
20445 fn = gen_load_lockedqi;
20446 break;
20447 case HImode:
20448 fn = gen_load_lockedhi;
20449 break;
20450 case SImode:
20451 if (GET_MODE (mem) == QImode)
20452 fn = gen_load_lockedqi_si;
20453 else if (GET_MODE (mem) == HImode)
20454 fn = gen_load_lockedhi_si;
20455 else
20456 fn = gen_load_lockedsi;
20457 break;
20458 case DImode:
20459 fn = gen_load_lockeddi;
20460 break;
20461 case TImode:
20462 fn = gen_load_lockedti;
20463 break;
20464 default:
20465 gcc_unreachable ();
20467 emit_insn (fn (reg, mem));
20470 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20471 instruction in MODE. */
20473 static void
20474 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20476 rtx (*fn) (rtx, rtx, rtx) = NULL;
20478 switch (mode)
20480 case QImode:
20481 fn = gen_store_conditionalqi;
20482 break;
20483 case HImode:
20484 fn = gen_store_conditionalhi;
20485 break;
20486 case SImode:
20487 fn = gen_store_conditionalsi;
20488 break;
20489 case DImode:
20490 fn = gen_store_conditionaldi;
20491 break;
20492 case TImode:
20493 fn = gen_store_conditionalti;
20494 break;
20495 default:
20496 gcc_unreachable ();
20499 /* Emit sync before stwcx. to address PPC405 Erratum. */
20500 if (PPC405_ERRATUM77)
20501 emit_insn (gen_hwsync ());
20503 emit_insn (fn (res, mem, val));
20506 /* Expand barriers before and after a load_locked/store_cond sequence. */
20508 static rtx
20509 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20511 rtx addr = XEXP (mem, 0);
20512 int strict_p = (reload_in_progress || reload_completed);
20514 if (!legitimate_indirect_address_p (addr, strict_p)
20515 && !legitimate_indexed_address_p (addr, strict_p))
20517 addr = force_reg (Pmode, addr);
20518 mem = replace_equiv_address_nv (mem, addr);
20521 switch (model)
20523 case MEMMODEL_RELAXED:
20524 case MEMMODEL_CONSUME:
20525 case MEMMODEL_ACQUIRE:
20526 case MEMMODEL_SYNC_ACQUIRE:
20527 break;
20528 case MEMMODEL_RELEASE:
20529 case MEMMODEL_SYNC_RELEASE:
20530 case MEMMODEL_ACQ_REL:
20531 emit_insn (gen_lwsync ());
20532 break;
20533 case MEMMODEL_SEQ_CST:
20534 case MEMMODEL_SYNC_SEQ_CST:
20535 emit_insn (gen_hwsync ());
20536 break;
20537 default:
20538 gcc_unreachable ();
20540 return mem;
20543 static void
20544 rs6000_post_atomic_barrier (enum memmodel model)
20546 switch (model)
20548 case MEMMODEL_RELAXED:
20549 case MEMMODEL_CONSUME:
20550 case MEMMODEL_RELEASE:
20551 case MEMMODEL_SYNC_RELEASE:
20552 break;
20553 case MEMMODEL_ACQUIRE:
20554 case MEMMODEL_SYNC_ACQUIRE:
20555 case MEMMODEL_ACQ_REL:
20556 case MEMMODEL_SEQ_CST:
20557 case MEMMODEL_SYNC_SEQ_CST:
20558 emit_insn (gen_isync ());
20559 break;
20560 default:
20561 gcc_unreachable ();
20565 /* A subroutine of the various atomic expanders. For sub-word operations,
20566 we must adjust things to operate on SImode. Given the original MEM,
20567 return a new aligned memory. Also build and return the quantities by
20568 which to shift and mask. */
20570 static rtx
20571 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20573 rtx addr, align, shift, mask, mem;
20574 HOST_WIDE_INT shift_mask;
20575 machine_mode mode = GET_MODE (orig_mem);
20577 /* For smaller modes, we have to implement this via SImode. */
20578 shift_mask = (mode == QImode ? 0x18 : 0x10);
20580 addr = XEXP (orig_mem, 0);
20581 addr = force_reg (GET_MODE (addr), addr);
20583 /* Aligned memory containing subword. Generate a new memory. We
20584 do not want any of the existing MEM_ATTR data, as we're now
20585 accessing memory outside the original object. */
20586 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20587 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20588 mem = gen_rtx_MEM (SImode, align);
20589 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20590 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20591 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20593 /* Shift amount for subword relative to aligned word. */
20594 shift = gen_reg_rtx (SImode);
20595 addr = gen_lowpart (SImode, addr);
20596 rtx tmp = gen_reg_rtx (SImode);
20597 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
20598 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
20599 if (BYTES_BIG_ENDIAN)
20600 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20601 shift, 1, OPTAB_LIB_WIDEN);
20602 *pshift = shift;
20604 /* Mask for insertion. */
20605 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20606 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20607 *pmask = mask;
20609 return mem;
20612 /* A subroutine of the various atomic expanders. For sub-word operands,
20613 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20615 static rtx
20616 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20618 rtx x;
20620 x = gen_reg_rtx (SImode);
20621 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
20622 gen_rtx_NOT (SImode, mask),
20623 oldval)));
20625 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20627 return x;
20630 /* A subroutine of the various atomic expanders. For sub-word operands,
20631 extract WIDE to NARROW via SHIFT. */
20633 static void
20634 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20636 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20637 wide, 1, OPTAB_LIB_WIDEN);
20638 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20641 /* Expand an atomic compare and swap operation. */
20643 void
20644 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20646 rtx boolval, retval, mem, oldval, newval, cond;
20647 rtx label1, label2, x, mask, shift;
20648 machine_mode mode, orig_mode;
20649 enum memmodel mod_s, mod_f;
20650 bool is_weak;
20652 boolval = operands[0];
20653 retval = operands[1];
20654 mem = operands[2];
20655 oldval = operands[3];
20656 newval = operands[4];
20657 is_weak = (INTVAL (operands[5]) != 0);
20658 mod_s = memmodel_from_int (INTVAL (operands[6]));
20659 mod_f = memmodel_from_int (INTVAL (operands[7]));
20660 orig_mode = mode = GET_MODE (mem);
20662 mask = shift = NULL_RTX;
20663 if (mode == QImode || mode == HImode)
20665 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20666 lwarx and shift/mask operations. With power8, we need to do the
20667 comparison in SImode, but the store is still done in QI/HImode. */
20668 oldval = convert_modes (SImode, mode, oldval, 1);
20670 if (!TARGET_SYNC_HI_QI)
20672 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20674 /* Shift and mask OLDVAL into position with the word. */
20675 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20676 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20678 /* Shift and mask NEWVAL into position within the word. */
20679 newval = convert_modes (SImode, mode, newval, 1);
20680 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20681 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20684 /* Prepare to adjust the return value. */
20685 retval = gen_reg_rtx (SImode);
20686 mode = SImode;
20688 else if (reg_overlap_mentioned_p (retval, oldval))
20689 oldval = copy_to_reg (oldval);
20691 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20693 label1 = NULL_RTX;
20694 if (!is_weak)
20696 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20697 emit_label (XEXP (label1, 0));
20699 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20701 emit_load_locked (mode, retval, mem);
20703 x = retval;
20704 if (mask)
20706 x = expand_simple_binop (SImode, AND, retval, mask,
20707 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20710 cond = gen_reg_rtx (CCmode);
20711 /* If we have TImode, synthesize a comparison. */
20712 if (mode != TImode)
20713 x = gen_rtx_COMPARE (CCmode, x, oldval);
20714 else
20716 rtx xor1_result = gen_reg_rtx (DImode);
20717 rtx xor2_result = gen_reg_rtx (DImode);
20718 rtx or_result = gen_reg_rtx (DImode);
20719 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20720 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20721 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20722 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20724 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20725 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20726 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20727 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20730 emit_insn (gen_rtx_SET (cond, x));
20732 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20733 emit_unlikely_jump (x, label2);
20735 x = newval;
20736 if (mask)
20737 x = rs6000_mask_atomic_subword (retval, newval, mask);
20739 emit_store_conditional (orig_mode, cond, mem, x);
20741 if (!is_weak)
20743 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20744 emit_unlikely_jump (x, label1);
20747 if (!is_mm_relaxed (mod_f))
20748 emit_label (XEXP (label2, 0));
20750 rs6000_post_atomic_barrier (mod_s);
20752 if (is_mm_relaxed (mod_f))
20753 emit_label (XEXP (label2, 0));
20755 if (shift)
20756 rs6000_finish_atomic_subword (operands[1], retval, shift);
20757 else if (mode != GET_MODE (operands[1]))
20758 convert_move (operands[1], retval, 1);
20760 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20761 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20762 emit_insn (gen_rtx_SET (boolval, x));
20765 /* Expand an atomic exchange operation. */
20767 void
20768 rs6000_expand_atomic_exchange (rtx operands[])
20770 rtx retval, mem, val, cond;
20771 machine_mode mode;
20772 enum memmodel model;
20773 rtx label, x, mask, shift;
20775 retval = operands[0];
20776 mem = operands[1];
20777 val = operands[2];
20778 model = (enum memmodel) INTVAL (operands[3]);
20779 mode = GET_MODE (mem);
20781 mask = shift = NULL_RTX;
20782 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20784 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20786 /* Shift and mask VAL into position with the word. */
20787 val = convert_modes (SImode, mode, val, 1);
20788 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20789 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20791 /* Prepare to adjust the return value. */
20792 retval = gen_reg_rtx (SImode);
20793 mode = SImode;
20796 mem = rs6000_pre_atomic_barrier (mem, model);
20798 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20799 emit_label (XEXP (label, 0));
20801 emit_load_locked (mode, retval, mem);
20803 x = val;
20804 if (mask)
20805 x = rs6000_mask_atomic_subword (retval, val, mask);
20807 cond = gen_reg_rtx (CCmode);
20808 emit_store_conditional (mode, cond, mem, x);
20810 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20811 emit_unlikely_jump (x, label);
20813 rs6000_post_atomic_barrier (model);
20815 if (shift)
20816 rs6000_finish_atomic_subword (operands[0], retval, shift);
20819 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20820 to perform. MEM is the memory on which to operate. VAL is the second
20821 operand of the binary operator. BEFORE and AFTER are optional locations to
20822 return the value of MEM either before of after the operation. MODEL_RTX
20823 is a CONST_INT containing the memory model to use. */
20825 void
20826 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20827 rtx orig_before, rtx orig_after, rtx model_rtx)
20829 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20830 machine_mode mode = GET_MODE (mem);
20831 machine_mode store_mode = mode;
20832 rtx label, x, cond, mask, shift;
20833 rtx before = orig_before, after = orig_after;
20835 mask = shift = NULL_RTX;
20836 /* On power8, we want to use SImode for the operation. On previous systems,
20837 use the operation in a subword and shift/mask to get the proper byte or
20838 halfword. */
20839 if (mode == QImode || mode == HImode)
20841 if (TARGET_SYNC_HI_QI)
20843 val = convert_modes (SImode, mode, val, 1);
20845 /* Prepare to adjust the return value. */
20846 before = gen_reg_rtx (SImode);
20847 if (after)
20848 after = gen_reg_rtx (SImode);
20849 mode = SImode;
20851 else
20853 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20855 /* Shift and mask VAL into position with the word. */
20856 val = convert_modes (SImode, mode, val, 1);
20857 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20858 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20860 switch (code)
20862 case IOR:
20863 case XOR:
20864 /* We've already zero-extended VAL. That is sufficient to
20865 make certain that it does not affect other bits. */
20866 mask = NULL;
20867 break;
20869 case AND:
20870 /* If we make certain that all of the other bits in VAL are
20871 set, that will be sufficient to not affect other bits. */
20872 x = gen_rtx_NOT (SImode, mask);
20873 x = gen_rtx_IOR (SImode, x, val);
20874 emit_insn (gen_rtx_SET (val, x));
20875 mask = NULL;
20876 break;
20878 case NOT:
20879 case PLUS:
20880 case MINUS:
20881 /* These will all affect bits outside the field and need
20882 adjustment via MASK within the loop. */
20883 break;
20885 default:
20886 gcc_unreachable ();
20889 /* Prepare to adjust the return value. */
20890 before = gen_reg_rtx (SImode);
20891 if (after)
20892 after = gen_reg_rtx (SImode);
20893 store_mode = mode = SImode;
20897 mem = rs6000_pre_atomic_barrier (mem, model);
20899 label = gen_label_rtx ();
20900 emit_label (label);
20901 label = gen_rtx_LABEL_REF (VOIDmode, label);
20903 if (before == NULL_RTX)
20904 before = gen_reg_rtx (mode);
20906 emit_load_locked (mode, before, mem);
20908 if (code == NOT)
20910 x = expand_simple_binop (mode, AND, before, val,
20911 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20912 after = expand_simple_unop (mode, NOT, x, after, 1);
20914 else
20916 after = expand_simple_binop (mode, code, before, val,
20917 after, 1, OPTAB_LIB_WIDEN);
20920 x = after;
20921 if (mask)
20923 x = expand_simple_binop (SImode, AND, after, mask,
20924 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20925 x = rs6000_mask_atomic_subword (before, x, mask);
20927 else if (store_mode != mode)
20928 x = convert_modes (store_mode, mode, x, 1);
20930 cond = gen_reg_rtx (CCmode);
20931 emit_store_conditional (store_mode, cond, mem, x);
20933 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20934 emit_unlikely_jump (x, label);
20936 rs6000_post_atomic_barrier (model);
20938 if (shift)
20940 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20941 then do the calcuations in a SImode register. */
20942 if (orig_before)
20943 rs6000_finish_atomic_subword (orig_before, before, shift);
20944 if (orig_after)
20945 rs6000_finish_atomic_subword (orig_after, after, shift);
20947 else if (store_mode != mode)
20949 /* QImode/HImode on machines with lbarx/lharx where we do the native
20950 operation and then do the calcuations in a SImode register. */
20951 if (orig_before)
20952 convert_move (orig_before, before, 1);
20953 if (orig_after)
20954 convert_move (orig_after, after, 1);
20956 else if (orig_after && after != orig_after)
20957 emit_move_insn (orig_after, after);
20960 /* Emit instructions to move SRC to DST. Called by splitters for
20961 multi-register moves. It will emit at most one instruction for
20962 each register that is accessed; that is, it won't emit li/lis pairs
20963 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20964 register. */
20966 void
20967 rs6000_split_multireg_move (rtx dst, rtx src)
20969 /* The register number of the first register being moved. */
20970 int reg;
20971 /* The mode that is to be moved. */
20972 machine_mode mode;
20973 /* The mode that the move is being done in, and its size. */
20974 machine_mode reg_mode;
20975 int reg_mode_size;
20976 /* The number of registers that will be moved. */
20977 int nregs;
20979 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20980 mode = GET_MODE (dst);
20981 nregs = hard_regno_nregs[reg][mode];
20982 if (FP_REGNO_P (reg))
20983 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20984 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20985 else if (ALTIVEC_REGNO_P (reg))
20986 reg_mode = V16QImode;
20987 else if (TARGET_E500_DOUBLE && mode == TFmode)
20988 reg_mode = DFmode;
20989 else
20990 reg_mode = word_mode;
20991 reg_mode_size = GET_MODE_SIZE (reg_mode);
20993 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20995 /* TDmode residing in FP registers is special, since the ISA requires that
20996 the lower-numbered word of a register pair is always the most significant
20997 word, even in little-endian mode. This does not match the usual subreg
20998 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20999 the appropriate constituent registers "by hand" in little-endian mode.
21001 Note we do not need to check for destructive overlap here since TDmode
21002 can only reside in even/odd register pairs. */
21003 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
21005 rtx p_src, p_dst;
21006 int i;
21008 for (i = 0; i < nregs; i++)
21010 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
21011 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
21012 else
21013 p_src = simplify_gen_subreg (reg_mode, src, mode,
21014 i * reg_mode_size);
21016 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
21017 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
21018 else
21019 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
21020 i * reg_mode_size);
21022 emit_insn (gen_rtx_SET (p_dst, p_src));
21025 return;
21028 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
21030 /* Move register range backwards, if we might have destructive
21031 overlap. */
21032 int i;
21033 for (i = nregs - 1; i >= 0; i--)
21034 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
21035 i * reg_mode_size),
21036 simplify_gen_subreg (reg_mode, src, mode,
21037 i * reg_mode_size)));
21039 else
21041 int i;
21042 int j = -1;
21043 bool used_update = false;
21044 rtx restore_basereg = NULL_RTX;
21046 if (MEM_P (src) && INT_REGNO_P (reg))
21048 rtx breg;
21050 if (GET_CODE (XEXP (src, 0)) == PRE_INC
21051 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
21053 rtx delta_rtx;
21054 breg = XEXP (XEXP (src, 0), 0);
21055 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
21056 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
21057 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
21058 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
21059 src = replace_equiv_address (src, breg);
21061 else if (! rs6000_offsettable_memref_p (src, reg_mode))
21063 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
21065 rtx basereg = XEXP (XEXP (src, 0), 0);
21066 if (TARGET_UPDATE)
21068 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
21069 emit_insn (gen_rtx_SET (ndst,
21070 gen_rtx_MEM (reg_mode,
21071 XEXP (src, 0))));
21072 used_update = true;
21074 else
21075 emit_insn (gen_rtx_SET (basereg,
21076 XEXP (XEXP (src, 0), 1)));
21077 src = replace_equiv_address (src, basereg);
21079 else
21081 rtx basereg = gen_rtx_REG (Pmode, reg);
21082 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
21083 src = replace_equiv_address (src, basereg);
21087 breg = XEXP (src, 0);
21088 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
21089 breg = XEXP (breg, 0);
21091 /* If the base register we are using to address memory is
21092 also a destination reg, then change that register last. */
21093 if (REG_P (breg)
21094 && REGNO (breg) >= REGNO (dst)
21095 && REGNO (breg) < REGNO (dst) + nregs)
21096 j = REGNO (breg) - REGNO (dst);
21098 else if (MEM_P (dst) && INT_REGNO_P (reg))
21100 rtx breg;
21102 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
21103 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
21105 rtx delta_rtx;
21106 breg = XEXP (XEXP (dst, 0), 0);
21107 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
21108 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
21109 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
21111 /* We have to update the breg before doing the store.
21112 Use store with update, if available. */
21114 if (TARGET_UPDATE)
21116 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21117 emit_insn (TARGET_32BIT
21118 ? (TARGET_POWERPC64
21119 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
21120 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
21121 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
21122 used_update = true;
21124 else
21125 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
21126 dst = replace_equiv_address (dst, breg);
21128 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
21129 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
21131 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
21133 rtx basereg = XEXP (XEXP (dst, 0), 0);
21134 if (TARGET_UPDATE)
21136 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21137 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
21138 XEXP (dst, 0)),
21139 nsrc));
21140 used_update = true;
21142 else
21143 emit_insn (gen_rtx_SET (basereg,
21144 XEXP (XEXP (dst, 0), 1)));
21145 dst = replace_equiv_address (dst, basereg);
21147 else
21149 rtx basereg = XEXP (XEXP (dst, 0), 0);
21150 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
21151 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
21152 && REG_P (basereg)
21153 && REG_P (offsetreg)
21154 && REGNO (basereg) != REGNO (offsetreg));
21155 if (REGNO (basereg) == 0)
21157 rtx tmp = offsetreg;
21158 offsetreg = basereg;
21159 basereg = tmp;
21161 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
21162 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
21163 dst = replace_equiv_address (dst, basereg);
21166 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
21167 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
21170 for (i = 0; i < nregs; i++)
21172 /* Calculate index to next subword. */
21173 ++j;
21174 if (j == nregs)
21175 j = 0;
21177 /* If compiler already emitted move of first word by
21178 store with update, no need to do anything. */
21179 if (j == 0 && used_update)
21180 continue;
21182 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
21183 j * reg_mode_size),
21184 simplify_gen_subreg (reg_mode, src, mode,
21185 j * reg_mode_size)));
21187 if (restore_basereg != NULL_RTX)
21188 emit_insn (restore_basereg);
21193 /* This page contains routines that are used to determine what the
21194 function prologue and epilogue code will do and write them out. */
21196 static inline bool
21197 save_reg_p (int r)
21199 return !call_used_regs[r] && df_regs_ever_live_p (r);
21202 /* Return the first fixed-point register that is required to be
21203 saved. 32 if none. */
21206 first_reg_to_save (void)
21208 int first_reg;
21210 /* Find lowest numbered live register. */
21211 for (first_reg = 13; first_reg <= 31; first_reg++)
21212 if (save_reg_p (first_reg))
21213 break;
21215 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
21216 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21217 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
21218 || (TARGET_TOC && TARGET_MINIMAL_TOC))
21219 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21220 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
21222 #if TARGET_MACHO
21223 if (flag_pic
21224 && crtl->uses_pic_offset_table
21225 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
21226 return RS6000_PIC_OFFSET_TABLE_REGNUM;
21227 #endif
21229 return first_reg;
21232 /* Similar, for FP regs. */
21235 first_fp_reg_to_save (void)
21237 int first_reg;
21239 /* Find lowest numbered live register. */
21240 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21241 if (save_reg_p (first_reg))
21242 break;
21244 return first_reg;
21247 /* Similar, for AltiVec regs. */
21249 static int
21250 first_altivec_reg_to_save (void)
21252 int i;
21254 /* Stack frame remains as is unless we are in AltiVec ABI. */
21255 if (! TARGET_ALTIVEC_ABI)
21256 return LAST_ALTIVEC_REGNO + 1;
21258 /* On Darwin, the unwind routines are compiled without
21259 TARGET_ALTIVEC, and use save_world to save/restore the
21260 altivec registers when necessary. */
21261 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21262 && ! TARGET_ALTIVEC)
21263 return FIRST_ALTIVEC_REGNO + 20;
21265 /* Find lowest numbered live register. */
21266 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21267 if (save_reg_p (i))
21268 break;
21270 return i;
21273 /* Return a 32-bit mask of the AltiVec registers we need to set in
21274 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21275 the 32-bit word is 0. */
21277 static unsigned int
21278 compute_vrsave_mask (void)
21280 unsigned int i, mask = 0;
21282 /* On Darwin, the unwind routines are compiled without
21283 TARGET_ALTIVEC, and use save_world to save/restore the
21284 call-saved altivec registers when necessary. */
21285 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21286 && ! TARGET_ALTIVEC)
21287 mask |= 0xFFF;
21289 /* First, find out if we use _any_ altivec registers. */
21290 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21291 if (df_regs_ever_live_p (i))
21292 mask |= ALTIVEC_REG_BIT (i);
21294 if (mask == 0)
21295 return mask;
21297 /* Next, remove the argument registers from the set. These must
21298 be in the VRSAVE mask set by the caller, so we don't need to add
21299 them in again. More importantly, the mask we compute here is
21300 used to generate CLOBBERs in the set_vrsave insn, and we do not
21301 wish the argument registers to die. */
21302 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
21303 mask &= ~ALTIVEC_REG_BIT (i);
21305 /* Similarly, remove the return value from the set. */
21307 bool yes = false;
21308 diddle_return_value (is_altivec_return_reg, &yes);
21309 if (yes)
21310 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21313 return mask;
21316 /* For a very restricted set of circumstances, we can cut down the
21317 size of prologues/epilogues by calling our own save/restore-the-world
21318 routines. */
21320 static void
21321 compute_save_world_info (rs6000_stack_t *info_ptr)
21323 info_ptr->world_save_p = 1;
21324 info_ptr->world_save_p
21325 = (WORLD_SAVE_P (info_ptr)
21326 && DEFAULT_ABI == ABI_DARWIN
21327 && !cfun->has_nonlocal_label
21328 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21329 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21330 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21331 && info_ptr->cr_save_p);
21333 /* This will not work in conjunction with sibcalls. Make sure there
21334 are none. (This check is expensive, but seldom executed.) */
21335 if (WORLD_SAVE_P (info_ptr))
21337 rtx_insn *insn;
21338 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21339 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21341 info_ptr->world_save_p = 0;
21342 break;
21346 if (WORLD_SAVE_P (info_ptr))
21348 /* Even if we're not touching VRsave, make sure there's room on the
21349 stack for it, if it looks like we're calling SAVE_WORLD, which
21350 will attempt to save it. */
21351 info_ptr->vrsave_size = 4;
21353 /* If we are going to save the world, we need to save the link register too. */
21354 info_ptr->lr_save_p = 1;
21356 /* "Save" the VRsave register too if we're saving the world. */
21357 if (info_ptr->vrsave_mask == 0)
21358 info_ptr->vrsave_mask = compute_vrsave_mask ();
21360 /* Because the Darwin register save/restore routines only handle
21361 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21362 check. */
21363 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21364 && (info_ptr->first_altivec_reg_save
21365 >= FIRST_SAVED_ALTIVEC_REGNO));
21367 return;
21371 static void
21372 is_altivec_return_reg (rtx reg, void *xyes)
21374 bool *yes = (bool *) xyes;
21375 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21376 *yes = true;
21380 /* Look for user-defined global regs in the range FIRST to LAST-1.
21381 We should not restore these, and so cannot use lmw or out-of-line
21382 restore functions if there are any. We also can't save them
21383 (well, emit frame notes for them), because frame unwinding during
21384 exception handling will restore saved registers. */
21386 static bool
21387 global_regs_p (unsigned first, unsigned last)
21389 while (first < last)
21390 if (global_regs[first++])
21391 return true;
21392 return false;
21395 /* Determine the strategy for savings/restoring registers. */
21397 enum {
21398 SAVRES_MULTIPLE = 0x1,
21399 SAVE_INLINE_FPRS = 0x2,
21400 SAVE_INLINE_GPRS = 0x4,
21401 REST_INLINE_FPRS = 0x8,
21402 REST_INLINE_GPRS = 0x10,
21403 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21404 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21405 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21406 SAVE_INLINE_VRS = 0x100,
21407 REST_INLINE_VRS = 0x200
21410 static int
21411 rs6000_savres_strategy (rs6000_stack_t *info,
21412 bool using_static_chain_p)
21414 int strategy = 0;
21415 bool lr_save_p;
21417 if (TARGET_MULTIPLE
21418 && !TARGET_POWERPC64
21419 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21420 && info->first_gp_reg_save < 31
21421 && !global_regs_p (info->first_gp_reg_save, 32))
21422 strategy |= SAVRES_MULTIPLE;
21424 if (crtl->calls_eh_return
21425 || cfun->machine->ra_need_lr)
21426 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21427 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21428 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21430 if (info->first_fp_reg_save == 64
21431 /* The out-of-line FP routines use double-precision stores;
21432 we can't use those routines if we don't have such stores. */
21433 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21434 || global_regs_p (info->first_fp_reg_save, 64))
21435 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21437 if (info->first_gp_reg_save == 32
21438 || (!(strategy & SAVRES_MULTIPLE)
21439 && global_regs_p (info->first_gp_reg_save, 32)))
21440 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21442 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21443 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21444 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21446 /* Define cutoff for using out-of-line functions to save registers. */
21447 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21449 if (!optimize_size)
21451 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21452 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21453 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21455 else
21457 /* Prefer out-of-line restore if it will exit. */
21458 if (info->first_fp_reg_save > 61)
21459 strategy |= SAVE_INLINE_FPRS;
21460 if (info->first_gp_reg_save > 29)
21462 if (info->first_fp_reg_save == 64)
21463 strategy |= SAVE_INLINE_GPRS;
21464 else
21465 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21467 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21468 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21471 else if (DEFAULT_ABI == ABI_DARWIN)
21473 if (info->first_fp_reg_save > 60)
21474 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21475 if (info->first_gp_reg_save > 29)
21476 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21477 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21479 else
21481 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21482 if (info->first_fp_reg_save > 61)
21483 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21484 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21485 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21488 /* Don't bother to try to save things out-of-line if r11 is occupied
21489 by the static chain. It would require too much fiddling and the
21490 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21491 pointer on Darwin, and AIX uses r1 or r12. */
21492 if (using_static_chain_p
21493 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21494 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21495 | SAVE_INLINE_GPRS
21496 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21498 /* We can only use the out-of-line routines to restore if we've
21499 saved all the registers from first_fp_reg_save in the prologue.
21500 Otherwise, we risk loading garbage. */
21501 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21503 int i;
21505 for (i = info->first_fp_reg_save; i < 64; i++)
21506 if (!save_reg_p (i))
21508 strategy |= REST_INLINE_FPRS;
21509 break;
21513 /* If we are going to use store multiple, then don't even bother
21514 with the out-of-line routines, since the store-multiple
21515 instruction will always be smaller. */
21516 if ((strategy & SAVRES_MULTIPLE))
21517 strategy |= SAVE_INLINE_GPRS;
21519 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21520 saved is an out-of-line save or restore. Set up the value for
21521 the next test (excluding out-of-line gpr restore). */
21522 lr_save_p = (info->lr_save_p
21523 || !(strategy & SAVE_INLINE_GPRS)
21524 || !(strategy & SAVE_INLINE_FPRS)
21525 || !(strategy & SAVE_INLINE_VRS)
21526 || !(strategy & REST_INLINE_FPRS)
21527 || !(strategy & REST_INLINE_VRS));
21529 /* The situation is more complicated with load multiple. We'd
21530 prefer to use the out-of-line routines for restores, since the
21531 "exit" out-of-line routines can handle the restore of LR and the
21532 frame teardown. However if doesn't make sense to use the
21533 out-of-line routine if that is the only reason we'd need to save
21534 LR, and we can't use the "exit" out-of-line gpr restore if we
21535 have saved some fprs; In those cases it is advantageous to use
21536 load multiple when available. */
21537 if ((strategy & SAVRES_MULTIPLE)
21538 && (!lr_save_p
21539 || info->first_fp_reg_save != 64))
21540 strategy |= REST_INLINE_GPRS;
21542 /* Saving CR interferes with the exit routines used on the SPE, so
21543 just punt here. */
21544 if (TARGET_SPE_ABI
21545 && info->spe_64bit_regs_used
21546 && info->cr_save_p)
21547 strategy |= REST_INLINE_GPRS;
21549 /* We can only use load multiple or the out-of-line routines to
21550 restore if we've used store multiple or out-of-line routines
21551 in the prologue, i.e. if we've saved all the registers from
21552 first_gp_reg_save. Otherwise, we risk loading garbage. */
21553 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21554 == SAVE_INLINE_GPRS)
21556 int i;
21558 for (i = info->first_gp_reg_save; i < 32; i++)
21559 if (!save_reg_p (i))
21561 strategy |= REST_INLINE_GPRS;
21562 break;
21566 if (TARGET_ELF && TARGET_64BIT)
21568 if (!(strategy & SAVE_INLINE_FPRS))
21569 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21570 else if (!(strategy & SAVE_INLINE_GPRS)
21571 && info->first_fp_reg_save == 64)
21572 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21574 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21575 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21577 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21578 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21580 return strategy;
21583 /* Calculate the stack information for the current function. This is
21584 complicated by having two separate calling sequences, the AIX calling
21585 sequence and the V.4 calling sequence.
21587 AIX (and Darwin/Mac OS X) stack frames look like:
21588 32-bit 64-bit
21589 SP----> +---------------------------------------+
21590 | back chain to caller | 0 0
21591 +---------------------------------------+
21592 | saved CR | 4 8 (8-11)
21593 +---------------------------------------+
21594 | saved LR | 8 16
21595 +---------------------------------------+
21596 | reserved for compilers | 12 24
21597 +---------------------------------------+
21598 | reserved for binders | 16 32
21599 +---------------------------------------+
21600 | saved TOC pointer | 20 40
21601 +---------------------------------------+
21602 | Parameter save area (P) | 24 48
21603 +---------------------------------------+
21604 | Alloca space (A) | 24+P etc.
21605 +---------------------------------------+
21606 | Local variable space (L) | 24+P+A
21607 +---------------------------------------+
21608 | Float/int conversion temporary (X) | 24+P+A+L
21609 +---------------------------------------+
21610 | Save area for AltiVec registers (W) | 24+P+A+L+X
21611 +---------------------------------------+
21612 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21613 +---------------------------------------+
21614 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21615 +---------------------------------------+
21616 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21617 +---------------------------------------+
21618 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21619 +---------------------------------------+
21620 old SP->| back chain to caller's caller |
21621 +---------------------------------------+
21623 The required alignment for AIX configurations is two words (i.e., 8
21624 or 16 bytes).
21626 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21628 SP----> +---------------------------------------+
21629 | Back chain to caller | 0
21630 +---------------------------------------+
21631 | Save area for CR | 8
21632 +---------------------------------------+
21633 | Saved LR | 16
21634 +---------------------------------------+
21635 | Saved TOC pointer | 24
21636 +---------------------------------------+
21637 | Parameter save area (P) | 32
21638 +---------------------------------------+
21639 | Alloca space (A) | 32+P
21640 +---------------------------------------+
21641 | Local variable space (L) | 32+P+A
21642 +---------------------------------------+
21643 | Save area for AltiVec registers (W) | 32+P+A+L
21644 +---------------------------------------+
21645 | AltiVec alignment padding (Y) | 32+P+A+L+W
21646 +---------------------------------------+
21647 | Save area for GP registers (G) | 32+P+A+L+W+Y
21648 +---------------------------------------+
21649 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21650 +---------------------------------------+
21651 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21652 +---------------------------------------+
21655 V.4 stack frames look like:
21657 SP----> +---------------------------------------+
21658 | back chain to caller | 0
21659 +---------------------------------------+
21660 | caller's saved LR | 4
21661 +---------------------------------------+
21662 | Parameter save area (P) | 8
21663 +---------------------------------------+
21664 | Alloca space (A) | 8+P
21665 +---------------------------------------+
21666 | Varargs save area (V) | 8+P+A
21667 +---------------------------------------+
21668 | Local variable space (L) | 8+P+A+V
21669 +---------------------------------------+
21670 | Float/int conversion temporary (X) | 8+P+A+V+L
21671 +---------------------------------------+
21672 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21673 +---------------------------------------+
21674 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21675 +---------------------------------------+
21676 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21677 +---------------------------------------+
21678 | SPE: area for 64-bit GP registers |
21679 +---------------------------------------+
21680 | SPE alignment padding |
21681 +---------------------------------------+
21682 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21683 +---------------------------------------+
21684 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21685 +---------------------------------------+
21686 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21687 +---------------------------------------+
21688 old SP->| back chain to caller's caller |
21689 +---------------------------------------+
21691 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21692 given. (But note below and in sysv4.h that we require only 8 and
21693 may round up the size of our stack frame anyways. The historical
21694 reason is early versions of powerpc-linux which didn't properly
21695 align the stack at program startup. A happy side-effect is that
21696 -mno-eabi libraries can be used with -meabi programs.)
21698 The EABI configuration defaults to the V.4 layout. However,
21699 the stack alignment requirements may differ. If -mno-eabi is not
21700 given, the required stack alignment is 8 bytes; if -mno-eabi is
21701 given, the required alignment is 16 bytes. (But see V.4 comment
21702 above.) */
21704 #ifndef ABI_STACK_BOUNDARY
21705 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21706 #endif
21708 static rs6000_stack_t *
21709 rs6000_stack_info (void)
21711 /* We should never be called for thunks, we are not set up for that. */
21712 gcc_assert (!cfun->is_thunk);
21714 rs6000_stack_t *info_ptr = &stack_info;
21715 int reg_size = TARGET_32BIT ? 4 : 8;
21716 int ehrd_size;
21717 int ehcr_size;
21718 int save_align;
21719 int first_gp;
21720 HOST_WIDE_INT non_fixed_size;
21721 bool using_static_chain_p;
21723 if (reload_completed && info_ptr->reload_completed)
21724 return info_ptr;
21726 memset (info_ptr, 0, sizeof (*info_ptr));
21727 info_ptr->reload_completed = reload_completed;
21729 if (TARGET_SPE)
21731 /* Cache value so we don't rescan instruction chain over and over. */
21732 if (cfun->machine->insn_chain_scanned_p == 0)
21733 cfun->machine->insn_chain_scanned_p
21734 = spe_func_has_64bit_regs_p () + 1;
21735 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21738 /* Select which calling sequence. */
21739 info_ptr->abi = DEFAULT_ABI;
21741 /* Calculate which registers need to be saved & save area size. */
21742 info_ptr->first_gp_reg_save = first_reg_to_save ();
21743 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21744 even if it currently looks like we won't. Reload may need it to
21745 get at a constant; if so, it will have already created a constant
21746 pool entry for it. */
21747 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21748 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21749 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21750 && crtl->uses_const_pool
21751 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21752 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21753 else
21754 first_gp = info_ptr->first_gp_reg_save;
21756 info_ptr->gp_size = reg_size * (32 - first_gp);
21758 /* For the SPE, we have an additional upper 32-bits on each GPR.
21759 Ideally we should save the entire 64-bits only when the upper
21760 half is used in SIMD instructions. Since we only record
21761 registers live (not the size they are used in), this proves
21762 difficult because we'd have to traverse the instruction chain at
21763 the right time, taking reload into account. This is a real pain,
21764 so we opt to save the GPRs in 64-bits always if but one register
21765 gets used in 64-bits. Otherwise, all the registers in the frame
21766 get saved in 32-bits.
21768 So... since when we save all GPRs (except the SP) in 64-bits, the
21769 traditional GP save area will be empty. */
21770 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21771 info_ptr->gp_size = 0;
21773 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21774 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21776 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21777 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21778 - info_ptr->first_altivec_reg_save);
21780 /* Does this function call anything? */
21781 info_ptr->calls_p = (! crtl->is_leaf
21782 || cfun->machine->ra_needs_full_frame);
21784 /* Determine if we need to save the condition code registers. */
21785 if (df_regs_ever_live_p (CR2_REGNO)
21786 || df_regs_ever_live_p (CR3_REGNO)
21787 || df_regs_ever_live_p (CR4_REGNO))
21789 info_ptr->cr_save_p = 1;
21790 if (DEFAULT_ABI == ABI_V4)
21791 info_ptr->cr_size = reg_size;
21794 /* If the current function calls __builtin_eh_return, then we need
21795 to allocate stack space for registers that will hold data for
21796 the exception handler. */
21797 if (crtl->calls_eh_return)
21799 unsigned int i;
21800 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21801 continue;
21803 /* SPE saves EH registers in 64-bits. */
21804 ehrd_size = i * (TARGET_SPE_ABI
21805 && info_ptr->spe_64bit_regs_used != 0
21806 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21808 else
21809 ehrd_size = 0;
21811 /* In the ELFv2 ABI, we also need to allocate space for separate
21812 CR field save areas if the function calls __builtin_eh_return. */
21813 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21815 /* This hard-codes that we have three call-saved CR fields. */
21816 ehcr_size = 3 * reg_size;
21817 /* We do *not* use the regular CR save mechanism. */
21818 info_ptr->cr_save_p = 0;
21820 else
21821 ehcr_size = 0;
21823 /* Determine various sizes. */
21824 info_ptr->reg_size = reg_size;
21825 info_ptr->fixed_size = RS6000_SAVE_AREA;
21826 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21827 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21828 TARGET_ALTIVEC ? 16 : 8);
21829 if (FRAME_GROWS_DOWNWARD)
21830 info_ptr->vars_size
21831 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21832 + info_ptr->parm_size,
21833 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21834 - (info_ptr->fixed_size + info_ptr->vars_size
21835 + info_ptr->parm_size);
21837 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21838 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21839 else
21840 info_ptr->spe_gp_size = 0;
21842 if (TARGET_ALTIVEC_ABI)
21843 info_ptr->vrsave_mask = compute_vrsave_mask ();
21844 else
21845 info_ptr->vrsave_mask = 0;
21847 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21848 info_ptr->vrsave_size = 4;
21849 else
21850 info_ptr->vrsave_size = 0;
21852 compute_save_world_info (info_ptr);
21854 /* Calculate the offsets. */
21855 switch (DEFAULT_ABI)
21857 case ABI_NONE:
21858 default:
21859 gcc_unreachable ();
21861 case ABI_AIX:
21862 case ABI_ELFv2:
21863 case ABI_DARWIN:
21864 info_ptr->fp_save_offset = - info_ptr->fp_size;
21865 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21867 if (TARGET_ALTIVEC_ABI)
21869 info_ptr->vrsave_save_offset
21870 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21872 /* Align stack so vector save area is on a quadword boundary.
21873 The padding goes above the vectors. */
21874 if (info_ptr->altivec_size != 0)
21875 info_ptr->altivec_padding_size
21876 = info_ptr->vrsave_save_offset & 0xF;
21877 else
21878 info_ptr->altivec_padding_size = 0;
21880 info_ptr->altivec_save_offset
21881 = info_ptr->vrsave_save_offset
21882 - info_ptr->altivec_padding_size
21883 - info_ptr->altivec_size;
21884 gcc_assert (info_ptr->altivec_size == 0
21885 || info_ptr->altivec_save_offset % 16 == 0);
21887 /* Adjust for AltiVec case. */
21888 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21890 else
21891 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21893 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21894 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21895 info_ptr->lr_save_offset = 2*reg_size;
21896 break;
21898 case ABI_V4:
21899 info_ptr->fp_save_offset = - info_ptr->fp_size;
21900 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21901 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21903 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21905 /* Align stack so SPE GPR save area is aligned on a
21906 double-word boundary. */
21907 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21908 info_ptr->spe_padding_size
21909 = 8 - (-info_ptr->cr_save_offset % 8);
21910 else
21911 info_ptr->spe_padding_size = 0;
21913 info_ptr->spe_gp_save_offset
21914 = info_ptr->cr_save_offset
21915 - info_ptr->spe_padding_size
21916 - info_ptr->spe_gp_size;
21918 /* Adjust for SPE case. */
21919 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21921 else if (TARGET_ALTIVEC_ABI)
21923 info_ptr->vrsave_save_offset
21924 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21926 /* Align stack so vector save area is on a quadword boundary. */
21927 if (info_ptr->altivec_size != 0)
21928 info_ptr->altivec_padding_size
21929 = 16 - (-info_ptr->vrsave_save_offset % 16);
21930 else
21931 info_ptr->altivec_padding_size = 0;
21933 info_ptr->altivec_save_offset
21934 = info_ptr->vrsave_save_offset
21935 - info_ptr->altivec_padding_size
21936 - info_ptr->altivec_size;
21938 /* Adjust for AltiVec case. */
21939 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21941 else
21942 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21943 info_ptr->ehrd_offset -= ehrd_size;
21944 info_ptr->lr_save_offset = reg_size;
21945 break;
21948 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21949 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21950 + info_ptr->gp_size
21951 + info_ptr->altivec_size
21952 + info_ptr->altivec_padding_size
21953 + info_ptr->spe_gp_size
21954 + info_ptr->spe_padding_size
21955 + ehrd_size
21956 + ehcr_size
21957 + info_ptr->cr_size
21958 + info_ptr->vrsave_size,
21959 save_align);
21961 non_fixed_size = (info_ptr->vars_size
21962 + info_ptr->parm_size
21963 + info_ptr->save_size);
21965 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21966 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21968 /* Determine if we need to save the link register. */
21969 if (info_ptr->calls_p
21970 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21971 && crtl->profile
21972 && !TARGET_PROFILE_KERNEL)
21973 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21974 #ifdef TARGET_RELOCATABLE
21975 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21976 #endif
21977 || rs6000_ra_ever_killed ())
21978 info_ptr->lr_save_p = 1;
21980 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21981 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21982 && call_used_regs[STATIC_CHAIN_REGNUM]);
21983 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21984 using_static_chain_p);
21986 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21987 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21988 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21989 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21990 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21991 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21992 info_ptr->lr_save_p = 1;
21994 if (info_ptr->lr_save_p)
21995 df_set_regs_ever_live (LR_REGNO, true);
21997 /* Determine if we need to allocate any stack frame:
21999 For AIX we need to push the stack if a frame pointer is needed
22000 (because the stack might be dynamically adjusted), if we are
22001 debugging, if we make calls, or if the sum of fp_save, gp_save,
22002 and local variables are more than the space needed to save all
22003 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
22004 + 18*8 = 288 (GPR13 reserved).
22006 For V.4 we don't have the stack cushion that AIX uses, but assume
22007 that the debugger can handle stackless frames. */
22009 if (info_ptr->calls_p)
22010 info_ptr->push_p = 1;
22012 else if (DEFAULT_ABI == ABI_V4)
22013 info_ptr->push_p = non_fixed_size != 0;
22015 else if (frame_pointer_needed)
22016 info_ptr->push_p = 1;
22018 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
22019 info_ptr->push_p = 1;
22021 else
22022 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
22024 return info_ptr;
22027 /* Return true if the current function uses any GPRs in 64-bit SIMD
22028 mode. */
22030 static bool
22031 spe_func_has_64bit_regs_p (void)
22033 rtx_insn *insns, *insn;
22035 /* Functions that save and restore all the call-saved registers will
22036 need to save/restore the registers in 64-bits. */
22037 if (crtl->calls_eh_return
22038 || cfun->calls_setjmp
22039 || crtl->has_nonlocal_goto)
22040 return true;
22042 insns = get_insns ();
22044 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
22046 if (INSN_P (insn))
22048 rtx i;
22050 /* FIXME: This should be implemented with attributes...
22052 (set_attr "spe64" "true")....then,
22053 if (get_spe64(insn)) return true;
22055 It's the only reliable way to do the stuff below. */
22057 i = PATTERN (insn);
22058 if (GET_CODE (i) == SET)
22060 machine_mode mode = GET_MODE (SET_SRC (i));
22062 if (SPE_VECTOR_MODE (mode))
22063 return true;
22064 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
22065 return true;
22070 return false;
22073 static void
22074 debug_stack_info (rs6000_stack_t *info)
22076 const char *abi_string;
22078 if (! info)
22079 info = rs6000_stack_info ();
22081 fprintf (stderr, "\nStack information for function %s:\n",
22082 ((current_function_decl && DECL_NAME (current_function_decl))
22083 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
22084 : "<unknown>"));
22086 switch (info->abi)
22088 default: abi_string = "Unknown"; break;
22089 case ABI_NONE: abi_string = "NONE"; break;
22090 case ABI_AIX: abi_string = "AIX"; break;
22091 case ABI_ELFv2: abi_string = "ELFv2"; break;
22092 case ABI_DARWIN: abi_string = "Darwin"; break;
22093 case ABI_V4: abi_string = "V.4"; break;
22096 fprintf (stderr, "\tABI = %5s\n", abi_string);
22098 if (TARGET_ALTIVEC_ABI)
22099 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
22101 if (TARGET_SPE_ABI)
22102 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
22104 if (info->first_gp_reg_save != 32)
22105 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
22107 if (info->first_fp_reg_save != 64)
22108 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
22110 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
22111 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
22112 info->first_altivec_reg_save);
22114 if (info->lr_save_p)
22115 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
22117 if (info->cr_save_p)
22118 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
22120 if (info->vrsave_mask)
22121 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
22123 if (info->push_p)
22124 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
22126 if (info->calls_p)
22127 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
22129 if (info->gp_size)
22130 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
22132 if (info->fp_size)
22133 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
22135 if (info->altivec_size)
22136 fprintf (stderr, "\taltivec_save_offset = %5d\n",
22137 info->altivec_save_offset);
22139 if (info->spe_gp_size == 0)
22140 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
22141 info->spe_gp_save_offset);
22143 if (info->vrsave_size)
22144 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
22145 info->vrsave_save_offset);
22147 if (info->lr_save_p)
22148 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
22150 if (info->cr_save_p)
22151 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
22153 if (info->varargs_save_offset)
22154 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
22156 if (info->total_size)
22157 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
22158 info->total_size);
22160 if (info->vars_size)
22161 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
22162 info->vars_size);
22164 if (info->parm_size)
22165 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
22167 if (info->fixed_size)
22168 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
22170 if (info->gp_size)
22171 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
22173 if (info->spe_gp_size)
22174 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
22176 if (info->fp_size)
22177 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
22179 if (info->altivec_size)
22180 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
22182 if (info->vrsave_size)
22183 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
22185 if (info->altivec_padding_size)
22186 fprintf (stderr, "\taltivec_padding_size= %5d\n",
22187 info->altivec_padding_size);
22189 if (info->spe_padding_size)
22190 fprintf (stderr, "\tspe_padding_size = %5d\n",
22191 info->spe_padding_size);
22193 if (info->cr_size)
22194 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
22196 if (info->save_size)
22197 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
22199 if (info->reg_size != 4)
22200 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
22202 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
22204 fprintf (stderr, "\n");
22208 rs6000_return_addr (int count, rtx frame)
22210 /* Currently we don't optimize very well between prolog and body
22211 code and for PIC code the code can be actually quite bad, so
22212 don't try to be too clever here. */
22213 if (count != 0
22214 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22216 cfun->machine->ra_needs_full_frame = 1;
22218 return
22219 gen_rtx_MEM
22220 (Pmode,
22221 memory_address
22222 (Pmode,
22223 plus_constant (Pmode,
22224 copy_to_reg
22225 (gen_rtx_MEM (Pmode,
22226 memory_address (Pmode, frame))),
22227 RETURN_ADDRESS_OFFSET)));
22230 cfun->machine->ra_need_lr = 1;
22231 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22234 /* Say whether a function is a candidate for sibcall handling or not. */
22236 static bool
22237 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22239 tree fntype;
22241 if (decl)
22242 fntype = TREE_TYPE (decl);
22243 else
22244 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22246 /* We can't do it if the called function has more vector parameters
22247 than the current function; there's nowhere to put the VRsave code. */
22248 if (TARGET_ALTIVEC_ABI
22249 && TARGET_ALTIVEC_VRSAVE
22250 && !(decl && decl == current_function_decl))
22252 function_args_iterator args_iter;
22253 tree type;
22254 int nvreg = 0;
22256 /* Functions with vector parameters are required to have a
22257 prototype, so the argument type info must be available
22258 here. */
22259 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22260 if (TREE_CODE (type) == VECTOR_TYPE
22261 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22262 nvreg++;
22264 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22265 if (TREE_CODE (type) == VECTOR_TYPE
22266 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22267 nvreg--;
22269 if (nvreg > 0)
22270 return false;
22273 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22274 functions, because the callee may have a different TOC pointer to
22275 the caller and there's no way to ensure we restore the TOC when
22276 we return. With the secure-plt SYSV ABI we can't make non-local
22277 calls when -fpic/PIC because the plt call stubs use r30. */
22278 if (DEFAULT_ABI == ABI_DARWIN
22279 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22280 && decl
22281 && !DECL_EXTERNAL (decl)
22282 && (*targetm.binds_local_p) (decl))
22283 || (DEFAULT_ABI == ABI_V4
22284 && (!TARGET_SECURE_PLT
22285 || !flag_pic
22286 || (decl
22287 && (*targetm.binds_local_p) (decl)))))
22289 tree attr_list = TYPE_ATTRIBUTES (fntype);
22291 if (!lookup_attribute ("longcall", attr_list)
22292 || lookup_attribute ("shortcall", attr_list))
22293 return true;
22296 return false;
22299 static int
22300 rs6000_ra_ever_killed (void)
22302 rtx_insn *top;
22303 rtx reg;
22304 rtx_insn *insn;
22306 if (cfun->is_thunk)
22307 return 0;
22309 if (cfun->machine->lr_save_state)
22310 return cfun->machine->lr_save_state - 1;
22312 /* regs_ever_live has LR marked as used if any sibcalls are present,
22313 but this should not force saving and restoring in the
22314 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22315 clobbers LR, so that is inappropriate. */
22317 /* Also, the prologue can generate a store into LR that
22318 doesn't really count, like this:
22320 move LR->R0
22321 bcl to set PIC register
22322 move LR->R31
22323 move R0->LR
22325 When we're called from the epilogue, we need to avoid counting
22326 this as a store. */
22328 push_topmost_sequence ();
22329 top = get_insns ();
22330 pop_topmost_sequence ();
22331 reg = gen_rtx_REG (Pmode, LR_REGNO);
22333 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22335 if (INSN_P (insn))
22337 if (CALL_P (insn))
22339 if (!SIBLING_CALL_P (insn))
22340 return 1;
22342 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22343 return 1;
22344 else if (set_of (reg, insn) != NULL_RTX
22345 && !prologue_epilogue_contains (insn))
22346 return 1;
22349 return 0;
22352 /* Emit instructions needed to load the TOC register.
22353 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22354 a constant pool; or for SVR4 -fpic. */
22356 void
22357 rs6000_emit_load_toc_table (int fromprolog)
22359 rtx dest;
22360 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22362 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22364 char buf[30];
22365 rtx lab, tmp1, tmp2, got;
22367 lab = gen_label_rtx ();
22368 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22369 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22370 if (flag_pic == 2)
22371 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22372 else
22373 got = rs6000_got_sym ();
22374 tmp1 = tmp2 = dest;
22375 if (!fromprolog)
22377 tmp1 = gen_reg_rtx (Pmode);
22378 tmp2 = gen_reg_rtx (Pmode);
22380 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22381 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22382 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22383 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22385 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22387 emit_insn (gen_load_toc_v4_pic_si ());
22388 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22390 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22392 char buf[30];
22393 rtx temp0 = (fromprolog
22394 ? gen_rtx_REG (Pmode, 0)
22395 : gen_reg_rtx (Pmode));
22397 if (fromprolog)
22399 rtx symF, symL;
22401 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22402 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22404 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22405 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22407 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22408 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22409 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22411 else
22413 rtx tocsym, lab;
22415 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22416 lab = gen_label_rtx ();
22417 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22418 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22419 if (TARGET_LINK_STACK)
22420 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22421 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22423 emit_insn (gen_addsi3 (dest, temp0, dest));
22425 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22427 /* This is for AIX code running in non-PIC ELF32. */
22428 char buf[30];
22429 rtx realsym;
22430 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22431 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22433 emit_insn (gen_elf_high (dest, realsym));
22434 emit_insn (gen_elf_low (dest, dest, realsym));
22436 else
22438 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22440 if (TARGET_32BIT)
22441 emit_insn (gen_load_toc_aix_si (dest));
22442 else
22443 emit_insn (gen_load_toc_aix_di (dest));
22447 /* Emit instructions to restore the link register after determining where
22448 its value has been stored. */
22450 void
22451 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22453 rs6000_stack_t *info = rs6000_stack_info ();
22454 rtx operands[2];
22456 operands[0] = source;
22457 operands[1] = scratch;
22459 if (info->lr_save_p)
22461 rtx frame_rtx = stack_pointer_rtx;
22462 HOST_WIDE_INT sp_offset = 0;
22463 rtx tmp;
22465 if (frame_pointer_needed
22466 || cfun->calls_alloca
22467 || info->total_size > 32767)
22469 tmp = gen_frame_mem (Pmode, frame_rtx);
22470 emit_move_insn (operands[1], tmp);
22471 frame_rtx = operands[1];
22473 else if (info->push_p)
22474 sp_offset = info->total_size;
22476 tmp = plus_constant (Pmode, frame_rtx,
22477 info->lr_save_offset + sp_offset);
22478 tmp = gen_frame_mem (Pmode, tmp);
22479 emit_move_insn (tmp, operands[0]);
22481 else
22482 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22484 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22485 state of lr_save_p so any change from here on would be a bug. In
22486 particular, stop rs6000_ra_ever_killed from considering the SET
22487 of lr we may have added just above. */
22488 cfun->machine->lr_save_state = info->lr_save_p + 1;
22491 static GTY(()) alias_set_type set = -1;
22493 alias_set_type
22494 get_TOC_alias_set (void)
22496 if (set == -1)
22497 set = new_alias_set ();
22498 return set;
22501 /* This returns nonzero if the current function uses the TOC. This is
22502 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22503 is generated by the ABI_V4 load_toc_* patterns. */
22504 #if TARGET_ELF
22505 static int
22506 uses_TOC (void)
22508 rtx_insn *insn;
22510 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22511 if (INSN_P (insn))
22513 rtx pat = PATTERN (insn);
22514 int i;
22516 if (GET_CODE (pat) == PARALLEL)
22517 for (i = 0; i < XVECLEN (pat, 0); i++)
22519 rtx sub = XVECEXP (pat, 0, i);
22520 if (GET_CODE (sub) == USE)
22522 sub = XEXP (sub, 0);
22523 if (GET_CODE (sub) == UNSPEC
22524 && XINT (sub, 1) == UNSPEC_TOC)
22525 return 1;
22529 return 0;
22531 #endif
22534 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22536 rtx tocrel, tocreg, hi;
22538 if (TARGET_DEBUG_ADDR)
22540 if (GET_CODE (symbol) == SYMBOL_REF)
22541 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22542 XSTR (symbol, 0));
22543 else
22545 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22546 GET_RTX_NAME (GET_CODE (symbol)));
22547 debug_rtx (symbol);
22551 if (!can_create_pseudo_p ())
22552 df_set_regs_ever_live (TOC_REGISTER, true);
22554 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22555 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22556 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22557 return tocrel;
22559 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22560 if (largetoc_reg != NULL)
22562 emit_move_insn (largetoc_reg, hi);
22563 hi = largetoc_reg;
22565 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22568 /* Issue assembly directives that create a reference to the given DWARF
22569 FRAME_TABLE_LABEL from the current function section. */
22570 void
22571 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22573 fprintf (asm_out_file, "\t.ref %s\n",
22574 (* targetm.strip_name_encoding) (frame_table_label));
22577 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22578 and the change to the stack pointer. */
22580 static void
22581 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22583 rtvec p;
22584 int i;
22585 rtx regs[3];
22587 i = 0;
22588 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22589 if (hard_frame_needed)
22590 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22591 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22592 || (hard_frame_needed
22593 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22594 regs[i++] = fp;
22596 p = rtvec_alloc (i);
22597 while (--i >= 0)
22599 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22600 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
22603 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22606 /* Emit the correct code for allocating stack space, as insns.
22607 If COPY_REG, make sure a copy of the old frame is left there.
22608 The generated code may use hard register 0 as a temporary. */
22610 static rtx_insn *
22611 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22613 rtx_insn *insn;
22614 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22615 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22616 rtx todec = gen_int_mode (-size, Pmode);
22617 rtx par, set, mem;
22619 if (INTVAL (todec) != -size)
22621 warning (0, "stack frame too large");
22622 emit_insn (gen_trap ());
22623 return 0;
22626 if (crtl->limit_stack)
22628 if (REG_P (stack_limit_rtx)
22629 && REGNO (stack_limit_rtx) > 1
22630 && REGNO (stack_limit_rtx) <= 31)
22632 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22633 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22634 const0_rtx));
22636 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22637 && TARGET_32BIT
22638 && DEFAULT_ABI == ABI_V4)
22640 rtx toload = gen_rtx_CONST (VOIDmode,
22641 gen_rtx_PLUS (Pmode,
22642 stack_limit_rtx,
22643 GEN_INT (size)));
22645 emit_insn (gen_elf_high (tmp_reg, toload));
22646 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22647 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22648 const0_rtx));
22650 else
22651 warning (0, "stack limit expression is not supported");
22654 if (copy_reg)
22656 if (copy_off != 0)
22657 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22658 else
22659 emit_move_insn (copy_reg, stack_reg);
22662 if (size > 32767)
22664 /* Need a note here so that try_split doesn't get confused. */
22665 if (get_last_insn () == NULL_RTX)
22666 emit_note (NOTE_INSN_DELETED);
22667 insn = emit_move_insn (tmp_reg, todec);
22668 try_split (PATTERN (insn), insn, 0);
22669 todec = tmp_reg;
22672 insn = emit_insn (TARGET_32BIT
22673 ? gen_movsi_update_stack (stack_reg, stack_reg,
22674 todec, stack_reg)
22675 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22676 todec, stack_reg));
22677 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22678 it now and set the alias set/attributes. The above gen_*_update
22679 calls will generate a PARALLEL with the MEM set being the first
22680 operation. */
22681 par = PATTERN (insn);
22682 gcc_assert (GET_CODE (par) == PARALLEL);
22683 set = XVECEXP (par, 0, 0);
22684 gcc_assert (GET_CODE (set) == SET);
22685 mem = SET_DEST (set);
22686 gcc_assert (MEM_P (mem));
22687 MEM_NOTRAP_P (mem) = 1;
22688 set_mem_alias_set (mem, get_frame_alias_set ());
22690 RTX_FRAME_RELATED_P (insn) = 1;
22691 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22692 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
22693 GEN_INT (-size))));
22694 return insn;
22697 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22699 #if PROBE_INTERVAL > 32768
22700 #error Cannot use indexed addressing mode for stack probing
22701 #endif
22703 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22704 inclusive. These are offsets from the current stack pointer. */
22706 static void
22707 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22709 /* See if we have a constant small number of probes to generate. If so,
22710 that's the easy case. */
22711 if (first + size <= 32768)
22713 HOST_WIDE_INT i;
22715 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22716 it exceeds SIZE. If only one probe is needed, this will not
22717 generate any code. Then probe at FIRST + SIZE. */
22718 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22719 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22720 -(first + i)));
22722 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22723 -(first + size)));
22726 /* Otherwise, do the same as above, but in a loop. Note that we must be
22727 extra careful with variables wrapping around because we might be at
22728 the very top (or the very bottom) of the address space and we have
22729 to be able to handle this case properly; in particular, we use an
22730 equality test for the loop condition. */
22731 else
22733 HOST_WIDE_INT rounded_size;
22734 rtx r12 = gen_rtx_REG (Pmode, 12);
22735 rtx r0 = gen_rtx_REG (Pmode, 0);
22737 /* Sanity check for the addressing mode we're going to use. */
22738 gcc_assert (first <= 32768);
22740 /* Step 1: round SIZE to the previous multiple of the interval. */
22742 rounded_size = size & -PROBE_INTERVAL;
22745 /* Step 2: compute initial and final value of the loop counter. */
22747 /* TEST_ADDR = SP + FIRST. */
22748 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
22749 -first)));
22751 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22752 if (rounded_size > 32768)
22754 emit_move_insn (r0, GEN_INT (-rounded_size));
22755 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
22757 else
22758 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
22759 -rounded_size)));
22762 /* Step 3: the loop
22764 while (TEST_ADDR != LAST_ADDR)
22766 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22767 probe at TEST_ADDR
22770 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22771 until it is equal to ROUNDED_SIZE. */
22773 if (TARGET_64BIT)
22774 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22775 else
22776 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22779 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22780 that SIZE is equal to ROUNDED_SIZE. */
22782 if (size != rounded_size)
22783 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22787 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22788 absolute addresses. */
22790 const char *
22791 output_probe_stack_range (rtx reg1, rtx reg2)
22793 static int labelno = 0;
22794 char loop_lab[32], end_lab[32];
22795 rtx xops[2];
22797 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22798 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22800 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22802 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22803 xops[0] = reg1;
22804 xops[1] = reg2;
22805 if (TARGET_64BIT)
22806 output_asm_insn ("cmpd 0,%0,%1", xops);
22807 else
22808 output_asm_insn ("cmpw 0,%0,%1", xops);
22810 fputs ("\tbeq 0,", asm_out_file);
22811 assemble_name_raw (asm_out_file, end_lab);
22812 fputc ('\n', asm_out_file);
22814 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22815 xops[1] = GEN_INT (-PROBE_INTERVAL);
22816 output_asm_insn ("addi %0,%0,%1", xops);
22818 /* Probe at TEST_ADDR and branch. */
22819 xops[1] = gen_rtx_REG (Pmode, 0);
22820 output_asm_insn ("stw %1,0(%0)", xops);
22821 fprintf (asm_out_file, "\tb ");
22822 assemble_name_raw (asm_out_file, loop_lab);
22823 fputc ('\n', asm_out_file);
22825 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22827 return "";
22830 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22831 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22832 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22833 deduce these equivalences by itself so it wasn't necessary to hold
22834 its hand so much. Don't be tempted to always supply d2_f_d_e with
22835 the actual cfa register, ie. r31 when we are using a hard frame
22836 pointer. That fails when saving regs off r1, and sched moves the
22837 r31 setup past the reg saves. */
22839 static rtx
22840 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22841 rtx reg2, rtx rreg)
22843 rtx real, temp;
22845 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22847 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22848 int i;
22850 gcc_checking_assert (val == 0);
22851 real = PATTERN (insn);
22852 if (GET_CODE (real) == PARALLEL)
22853 for (i = 0; i < XVECLEN (real, 0); i++)
22854 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22856 rtx set = XVECEXP (real, 0, i);
22858 RTX_FRAME_RELATED_P (set) = 1;
22860 RTX_FRAME_RELATED_P (insn) = 1;
22861 return insn;
22864 /* copy_rtx will not make unique copies of registers, so we need to
22865 ensure we don't have unwanted sharing here. */
22866 if (reg == reg2)
22867 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22869 if (reg == rreg)
22870 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22872 real = copy_rtx (PATTERN (insn));
22874 if (reg2 != NULL_RTX)
22875 real = replace_rtx (real, reg2, rreg);
22877 if (REGNO (reg) == STACK_POINTER_REGNUM)
22878 gcc_checking_assert (val == 0);
22879 else
22880 real = replace_rtx (real, reg,
22881 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22882 STACK_POINTER_REGNUM),
22883 GEN_INT (val)));
22885 /* We expect that 'real' is either a SET or a PARALLEL containing
22886 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22887 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22889 if (GET_CODE (real) == SET)
22891 rtx set = real;
22893 temp = simplify_rtx (SET_SRC (set));
22894 if (temp)
22895 SET_SRC (set) = temp;
22896 temp = simplify_rtx (SET_DEST (set));
22897 if (temp)
22898 SET_DEST (set) = temp;
22899 if (GET_CODE (SET_DEST (set)) == MEM)
22901 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22902 if (temp)
22903 XEXP (SET_DEST (set), 0) = temp;
22906 else
22908 int i;
22910 gcc_assert (GET_CODE (real) == PARALLEL);
22911 for (i = 0; i < XVECLEN (real, 0); i++)
22912 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22914 rtx set = XVECEXP (real, 0, i);
22916 temp = simplify_rtx (SET_SRC (set));
22917 if (temp)
22918 SET_SRC (set) = temp;
22919 temp = simplify_rtx (SET_DEST (set));
22920 if (temp)
22921 SET_DEST (set) = temp;
22922 if (GET_CODE (SET_DEST (set)) == MEM)
22924 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22925 if (temp)
22926 XEXP (SET_DEST (set), 0) = temp;
22928 RTX_FRAME_RELATED_P (set) = 1;
22932 RTX_FRAME_RELATED_P (insn) = 1;
22933 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22935 return insn;
22938 /* Returns an insn that has a vrsave set operation with the
22939 appropriate CLOBBERs. */
22941 static rtx
22942 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22944 int nclobs, i;
22945 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22946 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22948 clobs[0]
22949 = gen_rtx_SET (vrsave,
22950 gen_rtx_UNSPEC_VOLATILE (SImode,
22951 gen_rtvec (2, reg, vrsave),
22952 UNSPECV_SET_VRSAVE));
22954 nclobs = 1;
22956 /* We need to clobber the registers in the mask so the scheduler
22957 does not move sets to VRSAVE before sets of AltiVec registers.
22959 However, if the function receives nonlocal gotos, reload will set
22960 all call saved registers live. We will end up with:
22962 (set (reg 999) (mem))
22963 (parallel [ (set (reg vrsave) (unspec blah))
22964 (clobber (reg 999))])
22966 The clobber will cause the store into reg 999 to be dead, and
22967 flow will attempt to delete an epilogue insn. In this case, we
22968 need an unspec use/set of the register. */
22970 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22971 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22973 if (!epiloguep || call_used_regs [i])
22974 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22975 gen_rtx_REG (V4SImode, i));
22976 else
22978 rtx reg = gen_rtx_REG (V4SImode, i);
22980 clobs[nclobs++]
22981 = gen_rtx_SET (reg,
22982 gen_rtx_UNSPEC (V4SImode,
22983 gen_rtvec (1, reg), 27));
22987 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22989 for (i = 0; i < nclobs; ++i)
22990 XVECEXP (insn, 0, i) = clobs[i];
22992 return insn;
22995 static rtx
22996 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22998 rtx addr, mem;
23000 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
23001 mem = gen_frame_mem (GET_MODE (reg), addr);
23002 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
23005 static rtx
23006 gen_frame_load (rtx reg, rtx frame_reg, int offset)
23008 return gen_frame_set (reg, frame_reg, offset, false);
23011 static rtx
23012 gen_frame_store (rtx reg, rtx frame_reg, int offset)
23014 return gen_frame_set (reg, frame_reg, offset, true);
23017 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
23018 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
23020 static rtx
23021 emit_frame_save (rtx frame_reg, machine_mode mode,
23022 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
23024 rtx reg, insn;
23026 /* Some cases that need register indexed addressing. */
23027 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
23028 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23029 || (TARGET_E500_DOUBLE && mode == DFmode)
23030 || (TARGET_SPE_ABI
23031 && SPE_VECTOR_MODE (mode)
23032 && !SPE_CONST_OFFSET_OK (offset))));
23034 reg = gen_rtx_REG (mode, regno);
23035 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
23036 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
23037 NULL_RTX, NULL_RTX);
23040 /* Emit an offset memory reference suitable for a frame store, while
23041 converting to a valid addressing mode. */
23043 static rtx
23044 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
23046 rtx int_rtx, offset_rtx;
23048 int_rtx = GEN_INT (offset);
23050 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
23051 || (TARGET_E500_DOUBLE && mode == DFmode))
23053 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
23054 emit_move_insn (offset_rtx, int_rtx);
23056 else
23057 offset_rtx = int_rtx;
23059 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
23062 #ifndef TARGET_FIX_AND_CONTINUE
23063 #define TARGET_FIX_AND_CONTINUE 0
23064 #endif
23066 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
23067 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
23068 #define LAST_SAVRES_REGISTER 31
23069 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
23071 enum {
23072 SAVRES_LR = 0x1,
23073 SAVRES_SAVE = 0x2,
23074 SAVRES_REG = 0x0c,
23075 SAVRES_GPR = 0,
23076 SAVRES_FPR = 4,
23077 SAVRES_VR = 8
23080 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
23082 /* Temporary holding space for an out-of-line register save/restore
23083 routine name. */
23084 static char savres_routine_name[30];
23086 /* Return the name for an out-of-line register save/restore routine.
23087 We are saving/restoring GPRs if GPR is true. */
23089 static char *
23090 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
23092 const char *prefix = "";
23093 const char *suffix = "";
23095 /* Different targets are supposed to define
23096 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
23097 routine name could be defined with:
23099 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
23101 This is a nice idea in practice, but in reality, things are
23102 complicated in several ways:
23104 - ELF targets have save/restore routines for GPRs.
23106 - SPE targets use different prefixes for 32/64-bit registers, and
23107 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
23109 - PPC64 ELF targets have routines for save/restore of GPRs that
23110 differ in what they do with the link register, so having a set
23111 prefix doesn't work. (We only use one of the save routines at
23112 the moment, though.)
23114 - PPC32 elf targets have "exit" versions of the restore routines
23115 that restore the link register and can save some extra space.
23116 These require an extra suffix. (There are also "tail" versions
23117 of the restore routines and "GOT" versions of the save routines,
23118 but we don't generate those at present. Same problems apply,
23119 though.)
23121 We deal with all this by synthesizing our own prefix/suffix and
23122 using that for the simple sprintf call shown above. */
23123 if (TARGET_SPE)
23125 /* No floating point saves on the SPE. */
23126 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
23128 if ((sel & SAVRES_SAVE))
23129 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
23130 else
23131 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
23133 if ((sel & SAVRES_LR))
23134 suffix = "_x";
23136 else if (DEFAULT_ABI == ABI_V4)
23138 if (TARGET_64BIT)
23139 goto aix_names;
23141 if ((sel & SAVRES_REG) == SAVRES_GPR)
23142 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
23143 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23144 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
23145 else if ((sel & SAVRES_REG) == SAVRES_VR)
23146 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23147 else
23148 abort ();
23150 if ((sel & SAVRES_LR))
23151 suffix = "_x";
23153 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23155 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
23156 /* No out-of-line save/restore routines for GPRs on AIX. */
23157 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
23158 #endif
23160 aix_names:
23161 if ((sel & SAVRES_REG) == SAVRES_GPR)
23162 prefix = ((sel & SAVRES_SAVE)
23163 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
23164 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
23165 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23167 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
23168 if ((sel & SAVRES_LR))
23169 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
23170 else
23171 #endif
23173 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
23174 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
23177 else if ((sel & SAVRES_REG) == SAVRES_VR)
23178 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23179 else
23180 abort ();
23183 if (DEFAULT_ABI == ABI_DARWIN)
23185 /* The Darwin approach is (slightly) different, in order to be
23186 compatible with code generated by the system toolchain. There is a
23187 single symbol for the start of save sequence, and the code here
23188 embeds an offset into that code on the basis of the first register
23189 to be saved. */
23190 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
23191 if ((sel & SAVRES_REG) == SAVRES_GPR)
23192 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
23193 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
23194 (regno - 13) * 4, prefix, regno);
23195 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23196 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
23197 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
23198 else if ((sel & SAVRES_REG) == SAVRES_VR)
23199 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
23200 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
23201 else
23202 abort ();
23204 else
23205 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
23207 return savres_routine_name;
23210 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23211 We are saving/restoring GPRs if GPR is true. */
23213 static rtx
23214 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23216 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23217 ? info->first_gp_reg_save
23218 : (sel & SAVRES_REG) == SAVRES_FPR
23219 ? info->first_fp_reg_save - 32
23220 : (sel & SAVRES_REG) == SAVRES_VR
23221 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23222 : -1);
23223 rtx sym;
23224 int select = sel;
23226 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23227 versions of the gpr routines. */
23228 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23229 && info->spe_64bit_regs_used)
23230 select ^= SAVRES_FPR ^ SAVRES_GPR;
23232 /* Don't generate bogus routine names. */
23233 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23234 && regno <= LAST_SAVRES_REGISTER
23235 && select >= 0 && select <= 12);
23237 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23239 if (sym == NULL)
23241 char *name;
23243 name = rs6000_savres_routine_name (info, regno, sel);
23245 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23246 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23247 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23250 return sym;
23253 /* Emit a sequence of insns, including a stack tie if needed, for
23254 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23255 reset the stack pointer, but move the base of the frame into
23256 reg UPDT_REGNO for use by out-of-line register restore routines. */
23258 static rtx
23259 rs6000_emit_stack_reset (rs6000_stack_t *info,
23260 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23261 unsigned updt_regno)
23263 rtx updt_reg_rtx;
23265 /* This blockage is needed so that sched doesn't decide to move
23266 the sp change before the register restores. */
23267 if (DEFAULT_ABI == ABI_V4
23268 || (TARGET_SPE_ABI
23269 && info->spe_64bit_regs_used != 0
23270 && info->first_gp_reg_save != 32))
23271 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23273 /* If we are restoring registers out-of-line, we will be using the
23274 "exit" variants of the restore routines, which will reset the
23275 stack for us. But we do need to point updt_reg into the
23276 right place for those routines. */
23277 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23279 if (frame_off != 0)
23280 return emit_insn (gen_add3_insn (updt_reg_rtx,
23281 frame_reg_rtx, GEN_INT (frame_off)));
23282 else if (REGNO (frame_reg_rtx) != updt_regno)
23283 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23285 return NULL_RTX;
23288 /* Return the register number used as a pointer by out-of-line
23289 save/restore functions. */
23291 static inline unsigned
23292 ptr_regno_for_savres (int sel)
23294 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23295 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23296 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23299 /* Construct a parallel rtx describing the effect of a call to an
23300 out-of-line register save/restore routine, and emit the insn
23301 or jump_insn as appropriate. */
23303 static rtx
23304 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23305 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23306 machine_mode reg_mode, int sel)
23308 int i;
23309 int offset, start_reg, end_reg, n_regs, use_reg;
23310 int reg_size = GET_MODE_SIZE (reg_mode);
23311 rtx sym;
23312 rtvec p;
23313 rtx par, insn;
23315 offset = 0;
23316 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23317 ? info->first_gp_reg_save
23318 : (sel & SAVRES_REG) == SAVRES_FPR
23319 ? info->first_fp_reg_save
23320 : (sel & SAVRES_REG) == SAVRES_VR
23321 ? info->first_altivec_reg_save
23322 : -1);
23323 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23324 ? 32
23325 : (sel & SAVRES_REG) == SAVRES_FPR
23326 ? 64
23327 : (sel & SAVRES_REG) == SAVRES_VR
23328 ? LAST_ALTIVEC_REGNO + 1
23329 : -1);
23330 n_regs = end_reg - start_reg;
23331 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23332 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23333 + n_regs);
23335 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23336 RTVEC_ELT (p, offset++) = ret_rtx;
23338 RTVEC_ELT (p, offset++)
23339 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23341 sym = rs6000_savres_routine_sym (info, sel);
23342 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23344 use_reg = ptr_regno_for_savres (sel);
23345 if ((sel & SAVRES_REG) == SAVRES_VR)
23347 /* Vector regs are saved/restored using [reg+reg] addressing. */
23348 RTVEC_ELT (p, offset++)
23349 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23350 RTVEC_ELT (p, offset++)
23351 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23353 else
23354 RTVEC_ELT (p, offset++)
23355 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23357 for (i = 0; i < end_reg - start_reg; i++)
23358 RTVEC_ELT (p, i + offset)
23359 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23360 frame_reg_rtx, save_area_offset + reg_size * i,
23361 (sel & SAVRES_SAVE) != 0);
23363 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23364 RTVEC_ELT (p, i + offset)
23365 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23367 par = gen_rtx_PARALLEL (VOIDmode, p);
23369 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23371 insn = emit_jump_insn (par);
23372 JUMP_LABEL (insn) = ret_rtx;
23374 else
23375 insn = emit_insn (par);
23376 return insn;
23379 /* Emit code to store CR fields that need to be saved into REG. */
23381 static void
23382 rs6000_emit_move_from_cr (rtx reg)
23384 /* Only the ELFv2 ABI allows storing only selected fields. */
23385 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23387 int i, cr_reg[8], count = 0;
23389 /* Collect CR fields that must be saved. */
23390 for (i = 0; i < 8; i++)
23391 if (save_reg_p (CR0_REGNO + i))
23392 cr_reg[count++] = i;
23394 /* If it's just a single one, use mfcrf. */
23395 if (count == 1)
23397 rtvec p = rtvec_alloc (1);
23398 rtvec r = rtvec_alloc (2);
23399 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23400 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23401 RTVEC_ELT (p, 0)
23402 = gen_rtx_SET (reg,
23403 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23405 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23406 return;
23409 /* ??? It might be better to handle count == 2 / 3 cases here
23410 as well, using logical operations to combine the values. */
23413 emit_insn (gen_movesi_from_cr (reg));
23416 /* Determine whether the gp REG is really used. */
23418 static bool
23419 rs6000_reg_live_or_pic_offset_p (int reg)
23421 /* If the function calls eh_return, claim used all the registers that would
23422 be checked for liveness otherwise. This is required for the PIC offset
23423 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23424 register allocation purposes in this case. */
23426 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23427 && (!call_used_regs[reg]
23428 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23429 && !TARGET_SINGLE_PIC_BASE
23430 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23431 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23432 && !TARGET_SINGLE_PIC_BASE
23433 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23434 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23437 /* Return whether the split-stack arg pointer (r12) is used. */
23439 static bool
23440 split_stack_arg_pointer_used_p (void)
23442 /* If the pseudo holding the arg pointer is no longer a pseudo,
23443 then the arg pointer is used. */
23444 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
23445 && (!REG_P (cfun->machine->split_stack_arg_pointer)
23446 || (REGNO (cfun->machine->split_stack_arg_pointer)
23447 < FIRST_PSEUDO_REGISTER)))
23448 return true;
23450 /* Unfortunately we also need to do some code scanning, since
23451 r12 may have been substituted for the pseudo. */
23452 rtx_insn *insn;
23453 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun);
23454 FOR_BB_INSNS (bb, insn)
23455 if (NONDEBUG_INSN_P (insn))
23457 /* A call destroys r12. */
23458 if (CALL_P (insn))
23459 return false;
23461 df_ref use;
23462 FOR_EACH_INSN_USE (use, insn)
23464 rtx x = DF_REF_REG (use);
23465 if (REG_P (x) && REGNO (x) == 12)
23466 return true;
23468 df_ref def;
23469 FOR_EACH_INSN_DEF (def, insn)
23471 rtx x = DF_REF_REG (def);
23472 if (REG_P (x) && REGNO (x) == 12)
23473 return false;
23476 return bitmap_bit_p (DF_LR_OUT (bb), 12);
23479 /* Emit function prologue as insns. */
23481 void
23482 rs6000_emit_prologue (void)
23484 rs6000_stack_t *info = rs6000_stack_info ();
23485 machine_mode reg_mode = Pmode;
23486 int reg_size = TARGET_32BIT ? 4 : 8;
23487 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23488 rtx frame_reg_rtx = sp_reg_rtx;
23489 unsigned int cr_save_regno;
23490 rtx cr_save_rtx = NULL_RTX;
23491 rtx insn;
23492 int strategy;
23493 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23494 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23495 && call_used_regs[STATIC_CHAIN_REGNUM]);
23496 /* Offset to top of frame for frame_reg and sp respectively. */
23497 HOST_WIDE_INT frame_off = 0;
23498 HOST_WIDE_INT sp_off = 0;
23499 /* sp_adjust is the stack adjusting instruction, tracked so that the
23500 insn setting up the split-stack arg pointer can be emitted just
23501 prior to it, when r12 is not used here for other purposes. */
23502 rtx_insn *sp_adjust = 0;
23504 #ifdef ENABLE_CHECKING
23505 /* Track and check usage of r0, r11, r12. */
23506 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23507 #define START_USE(R) do \
23509 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23510 reg_inuse |= 1 << (R); \
23511 } while (0)
23512 #define END_USE(R) do \
23514 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23515 reg_inuse &= ~(1 << (R)); \
23516 } while (0)
23517 #define NOT_INUSE(R) do \
23519 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23520 } while (0)
23521 #else
23522 #define START_USE(R) do {} while (0)
23523 #define END_USE(R) do {} while (0)
23524 #define NOT_INUSE(R) do {} while (0)
23525 #endif
23527 if (DEFAULT_ABI == ABI_ELFv2)
23529 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23531 /* With -mminimal-toc we may generate an extra use of r2 below. */
23532 if (!TARGET_SINGLE_PIC_BASE
23533 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23534 cfun->machine->r2_setup_needed = true;
23538 if (flag_stack_usage_info)
23539 current_function_static_stack_size = info->total_size;
23541 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23543 HOST_WIDE_INT size = info->total_size;
23545 if (crtl->is_leaf && !cfun->calls_alloca)
23547 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23548 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23549 size - STACK_CHECK_PROTECT);
23551 else if (size > 0)
23552 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23555 if (TARGET_FIX_AND_CONTINUE)
23557 /* gdb on darwin arranges to forward a function from the old
23558 address by modifying the first 5 instructions of the function
23559 to branch to the overriding function. This is necessary to
23560 permit function pointers that point to the old function to
23561 actually forward to the new function. */
23562 emit_insn (gen_nop ());
23563 emit_insn (gen_nop ());
23564 emit_insn (gen_nop ());
23565 emit_insn (gen_nop ());
23566 emit_insn (gen_nop ());
23569 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23571 reg_mode = V2SImode;
23572 reg_size = 8;
23575 /* Handle world saves specially here. */
23576 if (WORLD_SAVE_P (info))
23578 int i, j, sz;
23579 rtx treg;
23580 rtvec p;
23581 rtx reg0;
23583 /* save_world expects lr in r0. */
23584 reg0 = gen_rtx_REG (Pmode, 0);
23585 if (info->lr_save_p)
23587 insn = emit_move_insn (reg0,
23588 gen_rtx_REG (Pmode, LR_REGNO));
23589 RTX_FRAME_RELATED_P (insn) = 1;
23592 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23593 assumptions about the offsets of various bits of the stack
23594 frame. */
23595 gcc_assert (info->gp_save_offset == -220
23596 && info->fp_save_offset == -144
23597 && info->lr_save_offset == 8
23598 && info->cr_save_offset == 4
23599 && info->push_p
23600 && info->lr_save_p
23601 && (!crtl->calls_eh_return
23602 || info->ehrd_offset == -432)
23603 && info->vrsave_save_offset == -224
23604 && info->altivec_save_offset == -416);
23606 treg = gen_rtx_REG (SImode, 11);
23607 emit_move_insn (treg, GEN_INT (-info->total_size));
23609 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23610 in R11. It also clobbers R12, so beware! */
23612 /* Preserve CR2 for save_world prologues */
23613 sz = 5;
23614 sz += 32 - info->first_gp_reg_save;
23615 sz += 64 - info->first_fp_reg_save;
23616 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23617 p = rtvec_alloc (sz);
23618 j = 0;
23619 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23620 gen_rtx_REG (SImode,
23621 LR_REGNO));
23622 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23623 gen_rtx_SYMBOL_REF (Pmode,
23624 "*save_world"));
23625 /* We do floats first so that the instruction pattern matches
23626 properly. */
23627 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23628 RTVEC_ELT (p, j++)
23629 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23630 ? DFmode : SFmode,
23631 info->first_fp_reg_save + i),
23632 frame_reg_rtx,
23633 info->fp_save_offset + frame_off + 8 * i);
23634 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23635 RTVEC_ELT (p, j++)
23636 = gen_frame_store (gen_rtx_REG (V4SImode,
23637 info->first_altivec_reg_save + i),
23638 frame_reg_rtx,
23639 info->altivec_save_offset + frame_off + 16 * i);
23640 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23641 RTVEC_ELT (p, j++)
23642 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23643 frame_reg_rtx,
23644 info->gp_save_offset + frame_off + reg_size * i);
23646 /* CR register traditionally saved as CR2. */
23647 RTVEC_ELT (p, j++)
23648 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23649 frame_reg_rtx, info->cr_save_offset + frame_off);
23650 /* Explain about use of R0. */
23651 if (info->lr_save_p)
23652 RTVEC_ELT (p, j++)
23653 = gen_frame_store (reg0,
23654 frame_reg_rtx, info->lr_save_offset + frame_off);
23655 /* Explain what happens to the stack pointer. */
23657 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23658 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
23661 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23662 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23663 treg, GEN_INT (-info->total_size));
23664 sp_off = frame_off = info->total_size;
23667 strategy = info->savres_strategy;
23669 /* For V.4, update stack before we do any saving and set back pointer. */
23670 if (! WORLD_SAVE_P (info)
23671 && info->push_p
23672 && (DEFAULT_ABI == ABI_V4
23673 || crtl->calls_eh_return))
23675 bool need_r11 = (TARGET_SPE
23676 ? (!(strategy & SAVE_INLINE_GPRS)
23677 && info->spe_64bit_regs_used == 0)
23678 : (!(strategy & SAVE_INLINE_FPRS)
23679 || !(strategy & SAVE_INLINE_GPRS)
23680 || !(strategy & SAVE_INLINE_VRS)));
23681 int ptr_regno = -1;
23682 rtx ptr_reg = NULL_RTX;
23683 int ptr_off = 0;
23685 if (info->total_size < 32767)
23686 frame_off = info->total_size;
23687 else if (need_r11)
23688 ptr_regno = 11;
23689 else if (info->cr_save_p
23690 || info->lr_save_p
23691 || info->first_fp_reg_save < 64
23692 || info->first_gp_reg_save < 32
23693 || info->altivec_size != 0
23694 || info->vrsave_size != 0
23695 || crtl->calls_eh_return)
23696 ptr_regno = 12;
23697 else
23699 /* The prologue won't be saving any regs so there is no need
23700 to set up a frame register to access any frame save area.
23701 We also won't be using frame_off anywhere below, but set
23702 the correct value anyway to protect against future
23703 changes to this function. */
23704 frame_off = info->total_size;
23706 if (ptr_regno != -1)
23708 /* Set up the frame offset to that needed by the first
23709 out-of-line save function. */
23710 START_USE (ptr_regno);
23711 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23712 frame_reg_rtx = ptr_reg;
23713 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23714 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23715 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23716 ptr_off = info->gp_save_offset + info->gp_size;
23717 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23718 ptr_off = info->altivec_save_offset + info->altivec_size;
23719 frame_off = -ptr_off;
23721 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
23722 ptr_reg, ptr_off);
23723 if (REGNO (frame_reg_rtx) == 12)
23724 sp_adjust = 0;
23725 sp_off = info->total_size;
23726 if (frame_reg_rtx != sp_reg_rtx)
23727 rs6000_emit_stack_tie (frame_reg_rtx, false);
23730 /* If we use the link register, get it into r0. */
23731 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23733 rtx addr, reg, mem;
23735 reg = gen_rtx_REG (Pmode, 0);
23736 START_USE (0);
23737 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23738 RTX_FRAME_RELATED_P (insn) = 1;
23740 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23741 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23743 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23744 GEN_INT (info->lr_save_offset + frame_off));
23745 mem = gen_rtx_MEM (Pmode, addr);
23746 /* This should not be of rs6000_sr_alias_set, because of
23747 __builtin_return_address. */
23749 insn = emit_move_insn (mem, reg);
23750 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23751 NULL_RTX, NULL_RTX);
23752 END_USE (0);
23756 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23757 r12 will be needed by out-of-line gpr restore. */
23758 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23759 && !(strategy & (SAVE_INLINE_GPRS
23760 | SAVE_NOINLINE_GPRS_SAVES_LR))
23761 ? 11 : 12);
23762 if (!WORLD_SAVE_P (info)
23763 && info->cr_save_p
23764 && REGNO (frame_reg_rtx) != cr_save_regno
23765 && !(using_static_chain_p && cr_save_regno == 11)
23766 && !(flag_split_stack && cr_save_regno == 12 && sp_adjust))
23768 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23769 START_USE (cr_save_regno);
23770 rs6000_emit_move_from_cr (cr_save_rtx);
23773 /* Do any required saving of fpr's. If only one or two to save, do
23774 it ourselves. Otherwise, call function. */
23775 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23777 int i;
23778 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23779 if (save_reg_p (info->first_fp_reg_save + i))
23780 emit_frame_save (frame_reg_rtx,
23781 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23782 ? DFmode : SFmode),
23783 info->first_fp_reg_save + i,
23784 info->fp_save_offset + frame_off + 8 * i,
23785 sp_off - frame_off);
23787 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23789 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23790 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23791 unsigned ptr_regno = ptr_regno_for_savres (sel);
23792 rtx ptr_reg = frame_reg_rtx;
23794 if (REGNO (frame_reg_rtx) == ptr_regno)
23795 gcc_checking_assert (frame_off == 0);
23796 else
23798 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23799 NOT_INUSE (ptr_regno);
23800 emit_insn (gen_add3_insn (ptr_reg,
23801 frame_reg_rtx, GEN_INT (frame_off)));
23803 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23804 info->fp_save_offset,
23805 info->lr_save_offset,
23806 DFmode, sel);
23807 rs6000_frame_related (insn, ptr_reg, sp_off,
23808 NULL_RTX, NULL_RTX);
23809 if (lr)
23810 END_USE (0);
23813 /* Save GPRs. This is done as a PARALLEL if we are using
23814 the store-multiple instructions. */
23815 if (!WORLD_SAVE_P (info)
23816 && TARGET_SPE_ABI
23817 && info->spe_64bit_regs_used != 0
23818 && info->first_gp_reg_save != 32)
23820 int i;
23821 rtx spe_save_area_ptr;
23822 HOST_WIDE_INT save_off;
23823 int ool_adjust = 0;
23825 /* Determine whether we can address all of the registers that need
23826 to be saved with an offset from frame_reg_rtx that fits in
23827 the small const field for SPE memory instructions. */
23828 int spe_regs_addressable
23829 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23830 + reg_size * (32 - info->first_gp_reg_save - 1))
23831 && (strategy & SAVE_INLINE_GPRS));
23833 if (spe_regs_addressable)
23835 spe_save_area_ptr = frame_reg_rtx;
23836 save_off = frame_off;
23838 else
23840 /* Make r11 point to the start of the SPE save area. We need
23841 to be careful here if r11 is holding the static chain. If
23842 it is, then temporarily save it in r0. */
23843 HOST_WIDE_INT offset;
23845 if (!(strategy & SAVE_INLINE_GPRS))
23846 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23847 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23848 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23849 save_off = frame_off - offset;
23851 if (using_static_chain_p)
23853 rtx r0 = gen_rtx_REG (Pmode, 0);
23855 START_USE (0);
23856 gcc_assert (info->first_gp_reg_save > 11);
23858 emit_move_insn (r0, spe_save_area_ptr);
23860 else if (REGNO (frame_reg_rtx) != 11)
23861 START_USE (11);
23863 emit_insn (gen_addsi3 (spe_save_area_ptr,
23864 frame_reg_rtx, GEN_INT (offset)));
23865 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23866 frame_off = -info->spe_gp_save_offset + ool_adjust;
23869 if ((strategy & SAVE_INLINE_GPRS))
23871 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23872 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23873 emit_frame_save (spe_save_area_ptr, reg_mode,
23874 info->first_gp_reg_save + i,
23875 (info->spe_gp_save_offset + save_off
23876 + reg_size * i),
23877 sp_off - save_off);
23879 else
23881 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23882 info->spe_gp_save_offset + save_off,
23883 0, reg_mode,
23884 SAVRES_SAVE | SAVRES_GPR);
23886 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23887 NULL_RTX, NULL_RTX);
23890 /* Move the static chain pointer back. */
23891 if (!spe_regs_addressable)
23893 if (using_static_chain_p)
23895 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23896 END_USE (0);
23898 else if (REGNO (frame_reg_rtx) != 11)
23899 END_USE (11);
23902 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23904 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23905 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23906 unsigned ptr_regno = ptr_regno_for_savres (sel);
23907 rtx ptr_reg = frame_reg_rtx;
23908 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23909 int end_save = info->gp_save_offset + info->gp_size;
23910 int ptr_off;
23912 if (ptr_regno == 12)
23913 sp_adjust = 0;
23914 if (!ptr_set_up)
23915 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23917 /* Need to adjust r11 (r12) if we saved any FPRs. */
23918 if (end_save + frame_off != 0)
23920 rtx offset = GEN_INT (end_save + frame_off);
23922 if (ptr_set_up)
23923 frame_off = -end_save;
23924 else
23925 NOT_INUSE (ptr_regno);
23926 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23928 else if (!ptr_set_up)
23930 NOT_INUSE (ptr_regno);
23931 emit_move_insn (ptr_reg, frame_reg_rtx);
23933 ptr_off = -end_save;
23934 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23935 info->gp_save_offset + ptr_off,
23936 info->lr_save_offset + ptr_off,
23937 reg_mode, sel);
23938 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23939 NULL_RTX, NULL_RTX);
23940 if (lr)
23941 END_USE (0);
23943 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23945 rtvec p;
23946 int i;
23947 p = rtvec_alloc (32 - info->first_gp_reg_save);
23948 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23949 RTVEC_ELT (p, i)
23950 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23951 frame_reg_rtx,
23952 info->gp_save_offset + frame_off + reg_size * i);
23953 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23954 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23955 NULL_RTX, NULL_RTX);
23957 else if (!WORLD_SAVE_P (info))
23959 int i;
23960 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23961 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23962 emit_frame_save (frame_reg_rtx, reg_mode,
23963 info->first_gp_reg_save + i,
23964 info->gp_save_offset + frame_off + reg_size * i,
23965 sp_off - frame_off);
23968 if (crtl->calls_eh_return)
23970 unsigned int i;
23971 rtvec p;
23973 for (i = 0; ; ++i)
23975 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23976 if (regno == INVALID_REGNUM)
23977 break;
23980 p = rtvec_alloc (i);
23982 for (i = 0; ; ++i)
23984 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23985 if (regno == INVALID_REGNUM)
23986 break;
23988 insn
23989 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23990 sp_reg_rtx,
23991 info->ehrd_offset + sp_off + reg_size * (int) i);
23992 RTVEC_ELT (p, i) = insn;
23993 RTX_FRAME_RELATED_P (insn) = 1;
23996 insn = emit_insn (gen_blockage ());
23997 RTX_FRAME_RELATED_P (insn) = 1;
23998 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
24001 /* In AIX ABI we need to make sure r2 is really saved. */
24002 if (TARGET_AIX && crtl->calls_eh_return)
24004 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
24005 rtx save_insn, join_insn, note;
24006 long toc_restore_insn;
24008 tmp_reg = gen_rtx_REG (Pmode, 11);
24009 tmp_reg_si = gen_rtx_REG (SImode, 11);
24010 if (using_static_chain_p)
24012 START_USE (0);
24013 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
24015 else
24016 START_USE (11);
24017 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
24018 /* Peek at instruction to which this function returns. If it's
24019 restoring r2, then we know we've already saved r2. We can't
24020 unconditionally save r2 because the value we have will already
24021 be updated if we arrived at this function via a plt call or
24022 toc adjusting stub. */
24023 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
24024 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
24025 + RS6000_TOC_SAVE_SLOT);
24026 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
24027 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
24028 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
24029 validate_condition_mode (EQ, CCUNSmode);
24030 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
24031 emit_insn (gen_rtx_SET (compare_result,
24032 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
24033 toc_save_done = gen_label_rtx ();
24034 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
24035 gen_rtx_EQ (VOIDmode, compare_result,
24036 const0_rtx),
24037 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
24038 pc_rtx);
24039 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
24040 JUMP_LABEL (jump) = toc_save_done;
24041 LABEL_NUSES (toc_save_done) += 1;
24043 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
24044 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
24045 sp_off - frame_off);
24047 emit_label (toc_save_done);
24049 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
24050 have a CFG that has different saves along different paths.
24051 Move the note to a dummy blockage insn, which describes that
24052 R2 is unconditionally saved after the label. */
24053 /* ??? An alternate representation might be a special insn pattern
24054 containing both the branch and the store. That might let the
24055 code that minimizes the number of DW_CFA_advance opcodes better
24056 freedom in placing the annotations. */
24057 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
24058 if (note)
24059 remove_note (save_insn, note);
24060 else
24061 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
24062 copy_rtx (PATTERN (save_insn)), NULL_RTX);
24063 RTX_FRAME_RELATED_P (save_insn) = 0;
24065 join_insn = emit_insn (gen_blockage ());
24066 REG_NOTES (join_insn) = note;
24067 RTX_FRAME_RELATED_P (join_insn) = 1;
24069 if (using_static_chain_p)
24071 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
24072 END_USE (0);
24074 else
24075 END_USE (11);
24078 /* Save CR if we use any that must be preserved. */
24079 if (!WORLD_SAVE_P (info) && info->cr_save_p)
24081 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
24082 GEN_INT (info->cr_save_offset + frame_off));
24083 rtx mem = gen_frame_mem (SImode, addr);
24085 /* If we didn't copy cr before, do so now using r0. */
24086 if (cr_save_rtx == NULL_RTX)
24088 START_USE (0);
24089 cr_save_rtx = gen_rtx_REG (SImode, 0);
24090 rs6000_emit_move_from_cr (cr_save_rtx);
24093 /* Saving CR requires a two-instruction sequence: one instruction
24094 to move the CR to a general-purpose register, and a second
24095 instruction that stores the GPR to memory.
24097 We do not emit any DWARF CFI records for the first of these,
24098 because we cannot properly represent the fact that CR is saved in
24099 a register. One reason is that we cannot express that multiple
24100 CR fields are saved; another reason is that on 64-bit, the size
24101 of the CR register in DWARF (4 bytes) differs from the size of
24102 a general-purpose register.
24104 This means if any intervening instruction were to clobber one of
24105 the call-saved CR fields, we'd have incorrect CFI. To prevent
24106 this from happening, we mark the store to memory as a use of
24107 those CR fields, which prevents any such instruction from being
24108 scheduled in between the two instructions. */
24109 rtx crsave_v[9];
24110 int n_crsave = 0;
24111 int i;
24113 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
24114 for (i = 0; i < 8; i++)
24115 if (save_reg_p (CR0_REGNO + i))
24116 crsave_v[n_crsave++]
24117 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24119 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
24120 gen_rtvec_v (n_crsave, crsave_v)));
24121 END_USE (REGNO (cr_save_rtx));
24123 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
24124 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
24125 so we need to construct a frame expression manually. */
24126 RTX_FRAME_RELATED_P (insn) = 1;
24128 /* Update address to be stack-pointer relative, like
24129 rs6000_frame_related would do. */
24130 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
24131 GEN_INT (info->cr_save_offset + sp_off));
24132 mem = gen_frame_mem (SImode, addr);
24134 if (DEFAULT_ABI == ABI_ELFv2)
24136 /* In the ELFv2 ABI we generate separate CFI records for each
24137 CR field that was actually saved. They all point to the
24138 same 32-bit stack slot. */
24139 rtx crframe[8];
24140 int n_crframe = 0;
24142 for (i = 0; i < 8; i++)
24143 if (save_reg_p (CR0_REGNO + i))
24145 crframe[n_crframe]
24146 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
24148 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
24149 n_crframe++;
24152 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24153 gen_rtx_PARALLEL (VOIDmode,
24154 gen_rtvec_v (n_crframe, crframe)));
24156 else
24158 /* In other ABIs, by convention, we use a single CR regnum to
24159 represent the fact that all call-saved CR fields are saved.
24160 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
24161 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
24162 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
24166 /* In the ELFv2 ABI we need to save all call-saved CR fields into
24167 *separate* slots if the routine calls __builtin_eh_return, so
24168 that they can be independently restored by the unwinder. */
24169 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24171 int i, cr_off = info->ehcr_offset;
24172 rtx crsave;
24174 /* ??? We might get better performance by using multiple mfocrf
24175 instructions. */
24176 crsave = gen_rtx_REG (SImode, 0);
24177 emit_insn (gen_movesi_from_cr (crsave));
24179 for (i = 0; i < 8; i++)
24180 if (!call_used_regs[CR0_REGNO + i])
24182 rtvec p = rtvec_alloc (2);
24183 RTVEC_ELT (p, 0)
24184 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
24185 RTVEC_ELT (p, 1)
24186 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24188 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24190 RTX_FRAME_RELATED_P (insn) = 1;
24191 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24192 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
24193 sp_reg_rtx, cr_off + sp_off));
24195 cr_off += reg_size;
24199 /* Update stack and set back pointer unless this is V.4,
24200 for which it was done previously. */
24201 if (!WORLD_SAVE_P (info) && info->push_p
24202 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
24204 rtx ptr_reg = NULL;
24205 int ptr_off = 0;
24207 /* If saving altivec regs we need to be able to address all save
24208 locations using a 16-bit offset. */
24209 if ((strategy & SAVE_INLINE_VRS) == 0
24210 || (info->altivec_size != 0
24211 && (info->altivec_save_offset + info->altivec_size - 16
24212 + info->total_size - frame_off) > 32767)
24213 || (info->vrsave_size != 0
24214 && (info->vrsave_save_offset
24215 + info->total_size - frame_off) > 32767))
24217 int sel = SAVRES_SAVE | SAVRES_VR;
24218 unsigned ptr_regno = ptr_regno_for_savres (sel);
24220 if (using_static_chain_p
24221 && ptr_regno == STATIC_CHAIN_REGNUM)
24222 ptr_regno = 12;
24223 if (REGNO (frame_reg_rtx) != ptr_regno)
24224 START_USE (ptr_regno);
24225 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24226 frame_reg_rtx = ptr_reg;
24227 ptr_off = info->altivec_save_offset + info->altivec_size;
24228 frame_off = -ptr_off;
24230 else if (REGNO (frame_reg_rtx) == 1)
24231 frame_off = info->total_size;
24232 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
24233 ptr_reg, ptr_off);
24234 if (REGNO (frame_reg_rtx) == 12)
24235 sp_adjust = 0;
24236 sp_off = info->total_size;
24237 if (frame_reg_rtx != sp_reg_rtx)
24238 rs6000_emit_stack_tie (frame_reg_rtx, false);
24241 /* Set frame pointer, if needed. */
24242 if (frame_pointer_needed)
24244 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
24245 sp_reg_rtx);
24246 RTX_FRAME_RELATED_P (insn) = 1;
24249 /* Save AltiVec registers if needed. Save here because the red zone does
24250 not always include AltiVec registers. */
24251 if (!WORLD_SAVE_P (info)
24252 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
24254 int end_save = info->altivec_save_offset + info->altivec_size;
24255 int ptr_off;
24256 /* Oddly, the vector save/restore functions point r0 at the end
24257 of the save area, then use r11 or r12 to load offsets for
24258 [reg+reg] addressing. */
24259 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24260 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24261 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24263 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24264 NOT_INUSE (0);
24265 if (scratch_regno == 12)
24266 sp_adjust = 0;
24267 if (end_save + frame_off != 0)
24269 rtx offset = GEN_INT (end_save + frame_off);
24271 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24273 else
24274 emit_move_insn (ptr_reg, frame_reg_rtx);
24276 ptr_off = -end_save;
24277 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24278 info->altivec_save_offset + ptr_off,
24279 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24280 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24281 NULL_RTX, NULL_RTX);
24282 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24284 /* The oddity mentioned above clobbered our frame reg. */
24285 emit_move_insn (frame_reg_rtx, ptr_reg);
24286 frame_off = ptr_off;
24289 else if (!WORLD_SAVE_P (info)
24290 && info->altivec_size != 0)
24292 int i;
24294 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24295 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24297 rtx areg, savereg, mem;
24298 int offset;
24300 offset = (info->altivec_save_offset + frame_off
24301 + 16 * (i - info->first_altivec_reg_save));
24303 savereg = gen_rtx_REG (V4SImode, i);
24305 NOT_INUSE (0);
24306 areg = gen_rtx_REG (Pmode, 0);
24307 emit_move_insn (areg, GEN_INT (offset));
24309 /* AltiVec addressing mode is [reg+reg]. */
24310 mem = gen_frame_mem (V4SImode,
24311 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24313 /* Rather than emitting a generic move, force use of the stvx
24314 instruction, which we always want. In particular we don't
24315 want xxpermdi/stxvd2x for little endian. */
24316 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
24318 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24319 areg, GEN_INT (offset));
24323 /* VRSAVE is a bit vector representing which AltiVec registers
24324 are used. The OS uses this to determine which vector
24325 registers to save on a context switch. We need to save
24326 VRSAVE on the stack frame, add whatever AltiVec registers we
24327 used in this function, and do the corresponding magic in the
24328 epilogue. */
24330 if (!WORLD_SAVE_P (info)
24331 && info->vrsave_size != 0)
24333 rtx reg, vrsave;
24334 int offset;
24335 int save_regno;
24337 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24338 be using r12 as frame_reg_rtx and r11 as the static chain
24339 pointer for nested functions. */
24340 save_regno = 12;
24341 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24342 && !using_static_chain_p)
24343 save_regno = 11;
24344 else if (flag_split_stack || REGNO (frame_reg_rtx) == 12)
24346 save_regno = 11;
24347 if (using_static_chain_p)
24348 save_regno = 0;
24351 NOT_INUSE (save_regno);
24352 reg = gen_rtx_REG (SImode, save_regno);
24353 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24354 if (TARGET_MACHO)
24355 emit_insn (gen_get_vrsave_internal (reg));
24356 else
24357 emit_insn (gen_rtx_SET (reg, vrsave));
24359 /* Save VRSAVE. */
24360 offset = info->vrsave_save_offset + frame_off;
24361 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24363 /* Include the registers in the mask. */
24364 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24366 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24369 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24370 if (!TARGET_SINGLE_PIC_BASE
24371 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24372 || (DEFAULT_ABI == ABI_V4
24373 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24374 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24376 /* If emit_load_toc_table will use the link register, we need to save
24377 it. We use R12 for this purpose because emit_load_toc_table
24378 can use register 0. This allows us to use a plain 'blr' to return
24379 from the procedure more often. */
24380 int save_LR_around_toc_setup = (TARGET_ELF
24381 && DEFAULT_ABI == ABI_V4
24382 && flag_pic
24383 && ! info->lr_save_p
24384 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24385 if (save_LR_around_toc_setup)
24387 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24388 rtx tmp = gen_rtx_REG (Pmode, 12);
24390 sp_adjust = 0;
24391 insn = emit_move_insn (tmp, lr);
24392 RTX_FRAME_RELATED_P (insn) = 1;
24394 rs6000_emit_load_toc_table (TRUE);
24396 insn = emit_move_insn (lr, tmp);
24397 add_reg_note (insn, REG_CFA_RESTORE, lr);
24398 RTX_FRAME_RELATED_P (insn) = 1;
24400 else
24401 rs6000_emit_load_toc_table (TRUE);
24404 #if TARGET_MACHO
24405 if (!TARGET_SINGLE_PIC_BASE
24406 && DEFAULT_ABI == ABI_DARWIN
24407 && flag_pic && crtl->uses_pic_offset_table)
24409 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24410 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24412 /* Save and restore LR locally around this call (in R0). */
24413 if (!info->lr_save_p)
24414 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24416 emit_insn (gen_load_macho_picbase (src));
24418 emit_move_insn (gen_rtx_REG (Pmode,
24419 RS6000_PIC_OFFSET_TABLE_REGNUM),
24420 lr);
24422 if (!info->lr_save_p)
24423 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24425 #endif
24427 /* If we need to, save the TOC register after doing the stack setup.
24428 Do not emit eh frame info for this save. The unwinder wants info,
24429 conceptually attached to instructions in this function, about
24430 register values in the caller of this function. This R2 may have
24431 already been changed from the value in the caller.
24432 We don't attempt to write accurate DWARF EH frame info for R2
24433 because code emitted by gcc for a (non-pointer) function call
24434 doesn't save and restore R2. Instead, R2 is managed out-of-line
24435 by a linker generated plt call stub when the function resides in
24436 a shared library. This behaviour is costly to describe in DWARF,
24437 both in terms of the size of DWARF info and the time taken in the
24438 unwinder to interpret it. R2 changes, apart from the
24439 calls_eh_return case earlier in this function, are handled by
24440 linux-unwind.h frob_update_context. */
24441 if (rs6000_save_toc_in_prologue_p ())
24443 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24444 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24447 if (flag_split_stack && split_stack_arg_pointer_used_p ())
24449 /* Set up the arg pointer (r12) for -fsplit-stack code. If
24450 __morestack was called, it left the arg pointer to the old
24451 stack in r29. Otherwise, the arg pointer is the top of the
24452 current frame. */
24453 if (sp_adjust)
24455 rtx r12 = gen_rtx_REG (Pmode, 12);
24456 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
24457 emit_insn_before (set_r12, sp_adjust);
24459 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
24461 rtx r12 = gen_rtx_REG (Pmode, 12);
24462 if (frame_off == 0)
24463 emit_move_insn (r12, frame_reg_rtx);
24464 else
24465 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
24467 if (info->push_p)
24469 rtx r12 = gen_rtx_REG (Pmode, 12);
24470 rtx r29 = gen_rtx_REG (Pmode, 29);
24471 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
24472 rtx not_more = gen_label_rtx ();
24473 rtx jump;
24475 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
24476 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
24477 gen_rtx_LABEL_REF (VOIDmode, not_more),
24478 pc_rtx);
24479 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
24480 JUMP_LABEL (jump) = not_more;
24481 LABEL_NUSES (not_more) += 1;
24482 emit_move_insn (r12, r29);
24483 emit_label (not_more);
24488 /* Output .extern statements for the save/restore routines we use. */
24490 static void
24491 rs6000_output_savres_externs (FILE *file)
24493 rs6000_stack_t *info = rs6000_stack_info ();
24495 if (TARGET_DEBUG_STACK)
24496 debug_stack_info (info);
24498 /* Write .extern for any function we will call to save and restore
24499 fp values. */
24500 if (info->first_fp_reg_save < 64
24501 && !TARGET_MACHO
24502 && !TARGET_ELF)
24504 char *name;
24505 int regno = info->first_fp_reg_save - 32;
24507 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24509 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24510 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24511 name = rs6000_savres_routine_name (info, regno, sel);
24512 fprintf (file, "\t.extern %s\n", name);
24514 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24516 bool lr = (info->savres_strategy
24517 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24518 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24519 name = rs6000_savres_routine_name (info, regno, sel);
24520 fprintf (file, "\t.extern %s\n", name);
24525 /* Write function prologue. */
24527 static void
24528 rs6000_output_function_prologue (FILE *file,
24529 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24531 if (!cfun->is_thunk)
24532 rs6000_output_savres_externs (file);
24534 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24535 immediately after the global entry point label. */
24536 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24538 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24540 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24541 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24543 fputs ("\t.localentry\t", file);
24544 assemble_name (file, name);
24545 fputs (",.-", file);
24546 assemble_name (file, name);
24547 fputs ("\n", file);
24550 /* Output -mprofile-kernel code. This needs to be done here instead of
24551 in output_function_profile since it must go after the ELFv2 ABI
24552 local entry point. */
24553 if (TARGET_PROFILE_KERNEL && crtl->profile)
24555 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24556 gcc_assert (!TARGET_32BIT);
24558 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24560 /* In the ELFv2 ABI we have no compiler stack word. It must be
24561 the resposibility of _mcount to preserve the static chain
24562 register if required. */
24563 if (DEFAULT_ABI != ABI_ELFv2
24564 && cfun->static_chain_decl != NULL)
24566 asm_fprintf (file, "\tstd %s,24(%s)\n",
24567 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24568 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24569 asm_fprintf (file, "\tld %s,24(%s)\n",
24570 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24572 else
24573 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24576 rs6000_pic_labelno++;
24579 /* Non-zero if vmx regs are restored before the frame pop, zero if
24580 we restore after the pop when possible. */
24581 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24583 /* Restoring cr is a two step process: loading a reg from the frame
24584 save, then moving the reg to cr. For ABI_V4 we must let the
24585 unwinder know that the stack location is no longer valid at or
24586 before the stack deallocation, but we can't emit a cfa_restore for
24587 cr at the stack deallocation like we do for other registers.
24588 The trouble is that it is possible for the move to cr to be
24589 scheduled after the stack deallocation. So say exactly where cr
24590 is located on each of the two insns. */
24592 static rtx
24593 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24595 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24596 rtx reg = gen_rtx_REG (SImode, regno);
24597 rtx_insn *insn = emit_move_insn (reg, mem);
24599 if (!exit_func && DEFAULT_ABI == ABI_V4)
24601 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24602 rtx set = gen_rtx_SET (reg, cr);
24604 add_reg_note (insn, REG_CFA_REGISTER, set);
24605 RTX_FRAME_RELATED_P (insn) = 1;
24607 return reg;
24610 /* Reload CR from REG. */
24612 static void
24613 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24615 int count = 0;
24616 int i;
24618 if (using_mfcr_multiple)
24620 for (i = 0; i < 8; i++)
24621 if (save_reg_p (CR0_REGNO + i))
24622 count++;
24623 gcc_assert (count);
24626 if (using_mfcr_multiple && count > 1)
24628 rtx_insn *insn;
24629 rtvec p;
24630 int ndx;
24632 p = rtvec_alloc (count);
24634 ndx = 0;
24635 for (i = 0; i < 8; i++)
24636 if (save_reg_p (CR0_REGNO + i))
24638 rtvec r = rtvec_alloc (2);
24639 RTVEC_ELT (r, 0) = reg;
24640 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24641 RTVEC_ELT (p, ndx) =
24642 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
24643 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24644 ndx++;
24646 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24647 gcc_assert (ndx == count);
24649 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24650 CR field separately. */
24651 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24653 for (i = 0; i < 8; i++)
24654 if (save_reg_p (CR0_REGNO + i))
24655 add_reg_note (insn, REG_CFA_RESTORE,
24656 gen_rtx_REG (SImode, CR0_REGNO + i));
24658 RTX_FRAME_RELATED_P (insn) = 1;
24661 else
24662 for (i = 0; i < 8; i++)
24663 if (save_reg_p (CR0_REGNO + i))
24665 rtx insn = emit_insn (gen_movsi_to_cr_one
24666 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24668 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24669 CR field separately, attached to the insn that in fact
24670 restores this particular CR field. */
24671 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24673 add_reg_note (insn, REG_CFA_RESTORE,
24674 gen_rtx_REG (SImode, CR0_REGNO + i));
24676 RTX_FRAME_RELATED_P (insn) = 1;
24680 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24681 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24682 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24684 rtx_insn *insn = get_last_insn ();
24685 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24687 add_reg_note (insn, REG_CFA_RESTORE, cr);
24688 RTX_FRAME_RELATED_P (insn) = 1;
24692 /* Like cr, the move to lr instruction can be scheduled after the
24693 stack deallocation, but unlike cr, its stack frame save is still
24694 valid. So we only need to emit the cfa_restore on the correct
24695 instruction. */
24697 static void
24698 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24700 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24701 rtx reg = gen_rtx_REG (Pmode, regno);
24703 emit_move_insn (reg, mem);
24706 static void
24707 restore_saved_lr (int regno, bool exit_func)
24709 rtx reg = gen_rtx_REG (Pmode, regno);
24710 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24711 rtx_insn *insn = emit_move_insn (lr, reg);
24713 if (!exit_func && flag_shrink_wrap)
24715 add_reg_note (insn, REG_CFA_RESTORE, lr);
24716 RTX_FRAME_RELATED_P (insn) = 1;
24720 static rtx
24721 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24723 if (DEFAULT_ABI == ABI_ELFv2)
24725 int i;
24726 for (i = 0; i < 8; i++)
24727 if (save_reg_p (CR0_REGNO + i))
24729 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24730 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24731 cfa_restores);
24734 else if (info->cr_save_p)
24735 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24736 gen_rtx_REG (SImode, CR2_REGNO),
24737 cfa_restores);
24739 if (info->lr_save_p)
24740 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24741 gen_rtx_REG (Pmode, LR_REGNO),
24742 cfa_restores);
24743 return cfa_restores;
24746 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24747 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24748 below stack pointer not cloberred by signals. */
24750 static inline bool
24751 offset_below_red_zone_p (HOST_WIDE_INT offset)
24753 return offset < (DEFAULT_ABI == ABI_V4
24755 : TARGET_32BIT ? -220 : -288);
24758 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24760 static void
24761 emit_cfa_restores (rtx cfa_restores)
24763 rtx_insn *insn = get_last_insn ();
24764 rtx *loc = &REG_NOTES (insn);
24766 while (*loc)
24767 loc = &XEXP (*loc, 1);
24768 *loc = cfa_restores;
24769 RTX_FRAME_RELATED_P (insn) = 1;
24772 /* Emit function epilogue as insns. */
24774 void
24775 rs6000_emit_epilogue (int sibcall)
24777 rs6000_stack_t *info;
24778 int restoring_GPRs_inline;
24779 int restoring_FPRs_inline;
24780 int using_load_multiple;
24781 int using_mtcr_multiple;
24782 int use_backchain_to_restore_sp;
24783 int restore_lr;
24784 int strategy;
24785 HOST_WIDE_INT frame_off = 0;
24786 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24787 rtx frame_reg_rtx = sp_reg_rtx;
24788 rtx cfa_restores = NULL_RTX;
24789 rtx insn;
24790 rtx cr_save_reg = NULL_RTX;
24791 machine_mode reg_mode = Pmode;
24792 int reg_size = TARGET_32BIT ? 4 : 8;
24793 int i;
24794 bool exit_func;
24795 unsigned ptr_regno;
24797 info = rs6000_stack_info ();
24799 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24801 reg_mode = V2SImode;
24802 reg_size = 8;
24805 strategy = info->savres_strategy;
24806 using_load_multiple = strategy & SAVRES_MULTIPLE;
24807 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24808 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24809 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24810 || rs6000_cpu == PROCESSOR_PPC603
24811 || rs6000_cpu == PROCESSOR_PPC750
24812 || optimize_size);
24813 /* Restore via the backchain when we have a large frame, since this
24814 is more efficient than an addis, addi pair. The second condition
24815 here will not trigger at the moment; We don't actually need a
24816 frame pointer for alloca, but the generic parts of the compiler
24817 give us one anyway. */
24818 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
24819 ? info->lr_save_offset
24820 : 0) > 32767
24821 || (cfun->calls_alloca
24822 && !frame_pointer_needed));
24823 restore_lr = (info->lr_save_p
24824 && (restoring_FPRs_inline
24825 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24826 && (restoring_GPRs_inline
24827 || info->first_fp_reg_save < 64));
24829 if (WORLD_SAVE_P (info))
24831 int i, j;
24832 char rname[30];
24833 const char *alloc_rname;
24834 rtvec p;
24836 /* eh_rest_world_r10 will return to the location saved in the LR
24837 stack slot (which is not likely to be our caller.)
24838 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24839 rest_world is similar, except any R10 parameter is ignored.
24840 The exception-handling stuff that was here in 2.95 is no
24841 longer necessary. */
24843 p = rtvec_alloc (9
24845 + 32 - info->first_gp_reg_save
24846 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24847 + 63 + 1 - info->first_fp_reg_save);
24849 strcpy (rname, ((crtl->calls_eh_return) ?
24850 "*eh_rest_world_r10" : "*rest_world"));
24851 alloc_rname = ggc_strdup (rname);
24853 j = 0;
24854 RTVEC_ELT (p, j++) = ret_rtx;
24855 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24856 gen_rtx_REG (Pmode,
24857 LR_REGNO));
24858 RTVEC_ELT (p, j++)
24859 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24860 /* The instruction pattern requires a clobber here;
24861 it is shared with the restVEC helper. */
24862 RTVEC_ELT (p, j++)
24863 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24866 /* CR register traditionally saved as CR2. */
24867 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24868 RTVEC_ELT (p, j++)
24869 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24870 if (flag_shrink_wrap)
24872 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24873 gen_rtx_REG (Pmode, LR_REGNO),
24874 cfa_restores);
24875 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24879 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24881 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24882 RTVEC_ELT (p, j++)
24883 = gen_frame_load (reg,
24884 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24885 if (flag_shrink_wrap)
24886 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24888 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24890 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24891 RTVEC_ELT (p, j++)
24892 = gen_frame_load (reg,
24893 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24894 if (flag_shrink_wrap)
24895 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24897 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24899 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24900 ? DFmode : SFmode),
24901 info->first_fp_reg_save + i);
24902 RTVEC_ELT (p, j++)
24903 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24904 if (flag_shrink_wrap)
24905 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24907 RTVEC_ELT (p, j++)
24908 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24909 RTVEC_ELT (p, j++)
24910 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24911 RTVEC_ELT (p, j++)
24912 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24913 RTVEC_ELT (p, j++)
24914 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24915 RTVEC_ELT (p, j++)
24916 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24917 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24919 if (flag_shrink_wrap)
24921 REG_NOTES (insn) = cfa_restores;
24922 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24923 RTX_FRAME_RELATED_P (insn) = 1;
24925 return;
24928 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24929 if (info->push_p)
24930 frame_off = info->total_size;
24932 /* Restore AltiVec registers if we must do so before adjusting the
24933 stack. */
24934 if (info->altivec_size != 0
24935 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24936 || (DEFAULT_ABI != ABI_V4
24937 && offset_below_red_zone_p (info->altivec_save_offset))))
24939 int i;
24940 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24942 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24943 if (use_backchain_to_restore_sp)
24945 int frame_regno = 11;
24947 if ((strategy & REST_INLINE_VRS) == 0)
24949 /* Of r11 and r12, select the one not clobbered by an
24950 out-of-line restore function for the frame register. */
24951 frame_regno = 11 + 12 - scratch_regno;
24953 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24954 emit_move_insn (frame_reg_rtx,
24955 gen_rtx_MEM (Pmode, sp_reg_rtx));
24956 frame_off = 0;
24958 else if (frame_pointer_needed)
24959 frame_reg_rtx = hard_frame_pointer_rtx;
24961 if ((strategy & REST_INLINE_VRS) == 0)
24963 int end_save = info->altivec_save_offset + info->altivec_size;
24964 int ptr_off;
24965 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24966 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24968 if (end_save + frame_off != 0)
24970 rtx offset = GEN_INT (end_save + frame_off);
24972 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24974 else
24975 emit_move_insn (ptr_reg, frame_reg_rtx);
24977 ptr_off = -end_save;
24978 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24979 info->altivec_save_offset + ptr_off,
24980 0, V4SImode, SAVRES_VR);
24982 else
24984 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24985 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24987 rtx addr, areg, mem, reg;
24989 areg = gen_rtx_REG (Pmode, 0);
24990 emit_move_insn
24991 (areg, GEN_INT (info->altivec_save_offset
24992 + frame_off
24993 + 16 * (i - info->first_altivec_reg_save)));
24995 /* AltiVec addressing mode is [reg+reg]. */
24996 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24997 mem = gen_frame_mem (V4SImode, addr);
24999 reg = gen_rtx_REG (V4SImode, i);
25000 /* Rather than emitting a generic move, force use of the
25001 lvx instruction, which we always want. In particular
25002 we don't want lxvd2x/xxpermdi for little endian. */
25003 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
25007 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25008 if (((strategy & REST_INLINE_VRS) == 0
25009 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25010 && (flag_shrink_wrap
25011 || (offset_below_red_zone_p
25012 (info->altivec_save_offset
25013 + 16 * (i - info->first_altivec_reg_save)))))
25015 rtx reg = gen_rtx_REG (V4SImode, i);
25016 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25020 /* Restore VRSAVE if we must do so before adjusting the stack. */
25021 if (info->vrsave_size != 0
25022 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25023 || (DEFAULT_ABI != ABI_V4
25024 && offset_below_red_zone_p (info->vrsave_save_offset))))
25026 rtx reg;
25028 if (frame_reg_rtx == sp_reg_rtx)
25030 if (use_backchain_to_restore_sp)
25032 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25033 emit_move_insn (frame_reg_rtx,
25034 gen_rtx_MEM (Pmode, sp_reg_rtx));
25035 frame_off = 0;
25037 else if (frame_pointer_needed)
25038 frame_reg_rtx = hard_frame_pointer_rtx;
25041 reg = gen_rtx_REG (SImode, 12);
25042 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25043 info->vrsave_save_offset + frame_off));
25045 emit_insn (generate_set_vrsave (reg, info, 1));
25048 insn = NULL_RTX;
25049 /* If we have a large stack frame, restore the old stack pointer
25050 using the backchain. */
25051 if (use_backchain_to_restore_sp)
25053 if (frame_reg_rtx == sp_reg_rtx)
25055 /* Under V.4, don't reset the stack pointer until after we're done
25056 loading the saved registers. */
25057 if (DEFAULT_ABI == ABI_V4)
25058 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25060 insn = emit_move_insn (frame_reg_rtx,
25061 gen_rtx_MEM (Pmode, sp_reg_rtx));
25062 frame_off = 0;
25064 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25065 && DEFAULT_ABI == ABI_V4)
25066 /* frame_reg_rtx has been set up by the altivec restore. */
25068 else
25070 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
25071 frame_reg_rtx = sp_reg_rtx;
25074 /* If we have a frame pointer, we can restore the old stack pointer
25075 from it. */
25076 else if (frame_pointer_needed)
25078 frame_reg_rtx = sp_reg_rtx;
25079 if (DEFAULT_ABI == ABI_V4)
25080 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25081 /* Prevent reordering memory accesses against stack pointer restore. */
25082 else if (cfun->calls_alloca
25083 || offset_below_red_zone_p (-info->total_size))
25084 rs6000_emit_stack_tie (frame_reg_rtx, true);
25086 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
25087 GEN_INT (info->total_size)));
25088 frame_off = 0;
25090 else if (info->push_p
25091 && DEFAULT_ABI != ABI_V4
25092 && !crtl->calls_eh_return)
25094 /* Prevent reordering memory accesses against stack pointer restore. */
25095 if (cfun->calls_alloca
25096 || offset_below_red_zone_p (-info->total_size))
25097 rs6000_emit_stack_tie (frame_reg_rtx, false);
25098 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
25099 GEN_INT (info->total_size)));
25100 frame_off = 0;
25102 if (insn && frame_reg_rtx == sp_reg_rtx)
25104 if (cfa_restores)
25106 REG_NOTES (insn) = cfa_restores;
25107 cfa_restores = NULL_RTX;
25109 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25110 RTX_FRAME_RELATED_P (insn) = 1;
25113 /* Restore AltiVec registers if we have not done so already. */
25114 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25115 && info->altivec_size != 0
25116 && (DEFAULT_ABI == ABI_V4
25117 || !offset_below_red_zone_p (info->altivec_save_offset)))
25119 int i;
25121 if ((strategy & REST_INLINE_VRS) == 0)
25123 int end_save = info->altivec_save_offset + info->altivec_size;
25124 int ptr_off;
25125 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
25126 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
25127 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
25129 if (end_save + frame_off != 0)
25131 rtx offset = GEN_INT (end_save + frame_off);
25133 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25135 else
25136 emit_move_insn (ptr_reg, frame_reg_rtx);
25138 ptr_off = -end_save;
25139 insn = rs6000_emit_savres_rtx (info, scratch_reg,
25140 info->altivec_save_offset + ptr_off,
25141 0, V4SImode, SAVRES_VR);
25142 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
25144 /* Frame reg was clobbered by out-of-line save. Restore it
25145 from ptr_reg, and if we are calling out-of-line gpr or
25146 fpr restore set up the correct pointer and offset. */
25147 unsigned newptr_regno = 1;
25148 if (!restoring_GPRs_inline)
25150 bool lr = info->gp_save_offset + info->gp_size == 0;
25151 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25152 newptr_regno = ptr_regno_for_savres (sel);
25153 end_save = info->gp_save_offset + info->gp_size;
25155 else if (!restoring_FPRs_inline)
25157 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
25158 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25159 newptr_regno = ptr_regno_for_savres (sel);
25160 end_save = info->fp_save_offset + info->fp_size;
25163 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
25164 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
25166 if (end_save + ptr_off != 0)
25168 rtx offset = GEN_INT (end_save + ptr_off);
25170 frame_off = -end_save;
25171 if (TARGET_32BIT)
25172 emit_insn (gen_addsi3_carry (frame_reg_rtx,
25173 ptr_reg, offset));
25174 else
25175 emit_insn (gen_adddi3_carry (frame_reg_rtx,
25176 ptr_reg, offset));
25178 else
25180 frame_off = ptr_off;
25181 emit_move_insn (frame_reg_rtx, ptr_reg);
25185 else
25187 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25188 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25190 rtx addr, areg, mem, reg;
25192 areg = gen_rtx_REG (Pmode, 0);
25193 emit_move_insn
25194 (areg, GEN_INT (info->altivec_save_offset
25195 + frame_off
25196 + 16 * (i - info->first_altivec_reg_save)));
25198 /* AltiVec addressing mode is [reg+reg]. */
25199 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
25200 mem = gen_frame_mem (V4SImode, addr);
25202 reg = gen_rtx_REG (V4SImode, i);
25203 /* Rather than emitting a generic move, force use of the
25204 lvx instruction, which we always want. In particular
25205 we don't want lxvd2x/xxpermdi for little endian. */
25206 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
25210 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25211 if (((strategy & REST_INLINE_VRS) == 0
25212 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25213 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25215 rtx reg = gen_rtx_REG (V4SImode, i);
25216 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25220 /* Restore VRSAVE if we have not done so already. */
25221 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25222 && info->vrsave_size != 0
25223 && (DEFAULT_ABI == ABI_V4
25224 || !offset_below_red_zone_p (info->vrsave_save_offset)))
25226 rtx reg;
25228 reg = gen_rtx_REG (SImode, 12);
25229 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25230 info->vrsave_save_offset + frame_off));
25232 emit_insn (generate_set_vrsave (reg, info, 1));
25235 /* If we exit by an out-of-line restore function on ABI_V4 then that
25236 function will deallocate the stack, so we don't need to worry
25237 about the unwinder restoring cr from an invalid stack frame
25238 location. */
25239 exit_func = (!restoring_FPRs_inline
25240 || (!restoring_GPRs_inline
25241 && info->first_fp_reg_save == 64));
25243 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
25244 *separate* slots if the routine calls __builtin_eh_return, so
25245 that they can be independently restored by the unwinder. */
25246 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25248 int i, cr_off = info->ehcr_offset;
25250 for (i = 0; i < 8; i++)
25251 if (!call_used_regs[CR0_REGNO + i])
25253 rtx reg = gen_rtx_REG (SImode, 0);
25254 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25255 cr_off + frame_off));
25257 insn = emit_insn (gen_movsi_to_cr_one
25258 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25260 if (!exit_func && flag_shrink_wrap)
25262 add_reg_note (insn, REG_CFA_RESTORE,
25263 gen_rtx_REG (SImode, CR0_REGNO + i));
25265 RTX_FRAME_RELATED_P (insn) = 1;
25268 cr_off += reg_size;
25272 /* Get the old lr if we saved it. If we are restoring registers
25273 out-of-line, then the out-of-line routines can do this for us. */
25274 if (restore_lr && restoring_GPRs_inline)
25275 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25277 /* Get the old cr if we saved it. */
25278 if (info->cr_save_p)
25280 unsigned cr_save_regno = 12;
25282 if (!restoring_GPRs_inline)
25284 /* Ensure we don't use the register used by the out-of-line
25285 gpr register restore below. */
25286 bool lr = info->gp_save_offset + info->gp_size == 0;
25287 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25288 int gpr_ptr_regno = ptr_regno_for_savres (sel);
25290 if (gpr_ptr_regno == 12)
25291 cr_save_regno = 11;
25292 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
25294 else if (REGNO (frame_reg_rtx) == 12)
25295 cr_save_regno = 11;
25297 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
25298 info->cr_save_offset + frame_off,
25299 exit_func);
25302 /* Set LR here to try to overlap restores below. */
25303 if (restore_lr && restoring_GPRs_inline)
25304 restore_saved_lr (0, exit_func);
25306 /* Load exception handler data registers, if needed. */
25307 if (crtl->calls_eh_return)
25309 unsigned int i, regno;
25311 if (TARGET_AIX)
25313 rtx reg = gen_rtx_REG (reg_mode, 2);
25314 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25315 frame_off + RS6000_TOC_SAVE_SLOT));
25318 for (i = 0; ; ++i)
25320 rtx mem;
25322 regno = EH_RETURN_DATA_REGNO (i);
25323 if (regno == INVALID_REGNUM)
25324 break;
25326 /* Note: possible use of r0 here to address SPE regs. */
25327 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25328 info->ehrd_offset + frame_off
25329 + reg_size * (int) i);
25331 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25335 /* Restore GPRs. This is done as a PARALLEL if we are using
25336 the load-multiple instructions. */
25337 if (TARGET_SPE_ABI
25338 && info->spe_64bit_regs_used
25339 && info->first_gp_reg_save != 32)
25341 /* Determine whether we can address all of the registers that need
25342 to be saved with an offset from frame_reg_rtx that fits in
25343 the small const field for SPE memory instructions. */
25344 int spe_regs_addressable
25345 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25346 + reg_size * (32 - info->first_gp_reg_save - 1))
25347 && restoring_GPRs_inline);
25349 if (!spe_regs_addressable)
25351 int ool_adjust = 0;
25352 rtx old_frame_reg_rtx = frame_reg_rtx;
25353 /* Make r11 point to the start of the SPE save area. We worried about
25354 not clobbering it when we were saving registers in the prologue.
25355 There's no need to worry here because the static chain is passed
25356 anew to every function. */
25358 if (!restoring_GPRs_inline)
25359 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25360 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25361 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25362 GEN_INT (info->spe_gp_save_offset
25363 + frame_off
25364 - ool_adjust)));
25365 /* Keep the invariant that frame_reg_rtx + frame_off points
25366 at the top of the stack frame. */
25367 frame_off = -info->spe_gp_save_offset + ool_adjust;
25370 if (restoring_GPRs_inline)
25372 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25374 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25375 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25377 rtx offset, addr, mem, reg;
25379 /* We're doing all this to ensure that the immediate offset
25380 fits into the immediate field of 'evldd'. */
25381 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25383 offset = GEN_INT (spe_offset + reg_size * i);
25384 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25385 mem = gen_rtx_MEM (V2SImode, addr);
25386 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25388 emit_move_insn (reg, mem);
25391 else
25392 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25393 info->spe_gp_save_offset + frame_off,
25394 info->lr_save_offset + frame_off,
25395 reg_mode,
25396 SAVRES_GPR | SAVRES_LR);
25398 else if (!restoring_GPRs_inline)
25400 /* We are jumping to an out-of-line function. */
25401 rtx ptr_reg;
25402 int end_save = info->gp_save_offset + info->gp_size;
25403 bool can_use_exit = end_save == 0;
25404 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25405 int ptr_off;
25407 /* Emit stack reset code if we need it. */
25408 ptr_regno = ptr_regno_for_savres (sel);
25409 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25410 if (can_use_exit)
25411 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25412 else if (end_save + frame_off != 0)
25413 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25414 GEN_INT (end_save + frame_off)));
25415 else if (REGNO (frame_reg_rtx) != ptr_regno)
25416 emit_move_insn (ptr_reg, frame_reg_rtx);
25417 if (REGNO (frame_reg_rtx) == ptr_regno)
25418 frame_off = -end_save;
25420 if (can_use_exit && info->cr_save_p)
25421 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25423 ptr_off = -end_save;
25424 rs6000_emit_savres_rtx (info, ptr_reg,
25425 info->gp_save_offset + ptr_off,
25426 info->lr_save_offset + ptr_off,
25427 reg_mode, sel);
25429 else if (using_load_multiple)
25431 rtvec p;
25432 p = rtvec_alloc (32 - info->first_gp_reg_save);
25433 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25434 RTVEC_ELT (p, i)
25435 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25436 frame_reg_rtx,
25437 info->gp_save_offset + frame_off + reg_size * i);
25438 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25440 else
25442 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25443 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25444 emit_insn (gen_frame_load
25445 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25446 frame_reg_rtx,
25447 info->gp_save_offset + frame_off + reg_size * i));
25450 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25452 /* If the frame pointer was used then we can't delay emitting
25453 a REG_CFA_DEF_CFA note. This must happen on the insn that
25454 restores the frame pointer, r31. We may have already emitted
25455 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25456 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25457 be harmless if emitted. */
25458 if (frame_pointer_needed)
25460 insn = get_last_insn ();
25461 add_reg_note (insn, REG_CFA_DEF_CFA,
25462 plus_constant (Pmode, frame_reg_rtx, frame_off));
25463 RTX_FRAME_RELATED_P (insn) = 1;
25466 /* Set up cfa_restores. We always need these when
25467 shrink-wrapping. If not shrink-wrapping then we only need
25468 the cfa_restore when the stack location is no longer valid.
25469 The cfa_restores must be emitted on or before the insn that
25470 invalidates the stack, and of course must not be emitted
25471 before the insn that actually does the restore. The latter
25472 is why it is a bad idea to emit the cfa_restores as a group
25473 on the last instruction here that actually does a restore:
25474 That insn may be reordered with respect to others doing
25475 restores. */
25476 if (flag_shrink_wrap
25477 && !restoring_GPRs_inline
25478 && info->first_fp_reg_save == 64)
25479 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25481 for (i = info->first_gp_reg_save; i < 32; i++)
25482 if (!restoring_GPRs_inline
25483 || using_load_multiple
25484 || rs6000_reg_live_or_pic_offset_p (i))
25486 rtx reg = gen_rtx_REG (reg_mode, i);
25488 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25492 if (!restoring_GPRs_inline
25493 && info->first_fp_reg_save == 64)
25495 /* We are jumping to an out-of-line function. */
25496 if (cfa_restores)
25497 emit_cfa_restores (cfa_restores);
25498 return;
25501 if (restore_lr && !restoring_GPRs_inline)
25503 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25504 restore_saved_lr (0, exit_func);
25507 /* Restore fpr's if we need to do it without calling a function. */
25508 if (restoring_FPRs_inline)
25509 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25510 if (save_reg_p (info->first_fp_reg_save + i))
25512 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25513 ? DFmode : SFmode),
25514 info->first_fp_reg_save + i);
25515 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25516 info->fp_save_offset + frame_off + 8 * i));
25517 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25518 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25521 /* If we saved cr, restore it here. Just those that were used. */
25522 if (info->cr_save_p)
25523 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25525 /* If this is V.4, unwind the stack pointer after all of the loads
25526 have been done, or set up r11 if we are restoring fp out of line. */
25527 ptr_regno = 1;
25528 if (!restoring_FPRs_inline)
25530 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25531 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25532 ptr_regno = ptr_regno_for_savres (sel);
25535 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25536 if (REGNO (frame_reg_rtx) == ptr_regno)
25537 frame_off = 0;
25539 if (insn && restoring_FPRs_inline)
25541 if (cfa_restores)
25543 REG_NOTES (insn) = cfa_restores;
25544 cfa_restores = NULL_RTX;
25546 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25547 RTX_FRAME_RELATED_P (insn) = 1;
25550 if (crtl->calls_eh_return)
25552 rtx sa = EH_RETURN_STACKADJ_RTX;
25553 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25556 if (!sibcall)
25558 rtvec p;
25559 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25560 if (! restoring_FPRs_inline)
25562 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25563 RTVEC_ELT (p, 0) = ret_rtx;
25565 else
25567 if (cfa_restores)
25569 /* We can't hang the cfa_restores off a simple return,
25570 since the shrink-wrap code sometimes uses an existing
25571 return. This means there might be a path from
25572 pre-prologue code to this return, and dwarf2cfi code
25573 wants the eh_frame unwinder state to be the same on
25574 all paths to any point. So we need to emit the
25575 cfa_restores before the return. For -m64 we really
25576 don't need epilogue cfa_restores at all, except for
25577 this irritating dwarf2cfi with shrink-wrap
25578 requirement; The stack red-zone means eh_frame info
25579 from the prologue telling the unwinder to restore
25580 from the stack is perfectly good right to the end of
25581 the function. */
25582 emit_insn (gen_blockage ());
25583 emit_cfa_restores (cfa_restores);
25584 cfa_restores = NULL_RTX;
25586 p = rtvec_alloc (2);
25587 RTVEC_ELT (p, 0) = simple_return_rtx;
25590 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25591 ? gen_rtx_USE (VOIDmode,
25592 gen_rtx_REG (Pmode, LR_REGNO))
25593 : gen_rtx_CLOBBER (VOIDmode,
25594 gen_rtx_REG (Pmode, LR_REGNO)));
25596 /* If we have to restore more than two FP registers, branch to the
25597 restore function. It will return to our caller. */
25598 if (! restoring_FPRs_inline)
25600 int i;
25601 int reg;
25602 rtx sym;
25604 if (flag_shrink_wrap)
25605 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25607 sym = rs6000_savres_routine_sym (info,
25608 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25609 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25610 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25611 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25613 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25615 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25617 RTVEC_ELT (p, i + 4)
25618 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25619 if (flag_shrink_wrap)
25620 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25621 cfa_restores);
25625 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25628 if (cfa_restores)
25630 if (sibcall)
25631 /* Ensure the cfa_restores are hung off an insn that won't
25632 be reordered above other restores. */
25633 emit_insn (gen_blockage ());
25635 emit_cfa_restores (cfa_restores);
25639 /* Write function epilogue. */
25641 static void
25642 rs6000_output_function_epilogue (FILE *file,
25643 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25645 #if TARGET_MACHO
25646 macho_branch_islands ();
25647 /* Mach-O doesn't support labels at the end of objects, so if
25648 it looks like we might want one, insert a NOP. */
25650 rtx_insn *insn = get_last_insn ();
25651 rtx_insn *deleted_debug_label = NULL;
25652 while (insn
25653 && NOTE_P (insn)
25654 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25656 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25657 notes only, instead set their CODE_LABEL_NUMBER to -1,
25658 otherwise there would be code generation differences
25659 in between -g and -g0. */
25660 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25661 deleted_debug_label = insn;
25662 insn = PREV_INSN (insn);
25664 if (insn
25665 && (LABEL_P (insn)
25666 || (NOTE_P (insn)
25667 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25668 fputs ("\tnop\n", file);
25669 else if (deleted_debug_label)
25670 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25671 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25672 CODE_LABEL_NUMBER (insn) = -1;
25674 #endif
25676 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25677 on its format.
25679 We don't output a traceback table if -finhibit-size-directive was
25680 used. The documentation for -finhibit-size-directive reads
25681 ``don't output a @code{.size} assembler directive, or anything
25682 else that would cause trouble if the function is split in the
25683 middle, and the two halves are placed at locations far apart in
25684 memory.'' The traceback table has this property, since it
25685 includes the offset from the start of the function to the
25686 traceback table itself.
25688 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25689 different traceback table. */
25690 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25691 && ! flag_inhibit_size_directive
25692 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25694 const char *fname = NULL;
25695 const char *language_string = lang_hooks.name;
25696 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25697 int i;
25698 int optional_tbtab;
25699 rs6000_stack_t *info = rs6000_stack_info ();
25701 if (rs6000_traceback == traceback_full)
25702 optional_tbtab = 1;
25703 else if (rs6000_traceback == traceback_part)
25704 optional_tbtab = 0;
25705 else
25706 optional_tbtab = !optimize_size && !TARGET_ELF;
25708 if (optional_tbtab)
25710 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25711 while (*fname == '.') /* V.4 encodes . in the name */
25712 fname++;
25714 /* Need label immediately before tbtab, so we can compute
25715 its offset from the function start. */
25716 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25717 ASM_OUTPUT_LABEL (file, fname);
25720 /* The .tbtab pseudo-op can only be used for the first eight
25721 expressions, since it can't handle the possibly variable
25722 length fields that follow. However, if you omit the optional
25723 fields, the assembler outputs zeros for all optional fields
25724 anyways, giving each variable length field is minimum length
25725 (as defined in sys/debug.h). Thus we can not use the .tbtab
25726 pseudo-op at all. */
25728 /* An all-zero word flags the start of the tbtab, for debuggers
25729 that have to find it by searching forward from the entry
25730 point or from the current pc. */
25731 fputs ("\t.long 0\n", file);
25733 /* Tbtab format type. Use format type 0. */
25734 fputs ("\t.byte 0,", file);
25736 /* Language type. Unfortunately, there does not seem to be any
25737 official way to discover the language being compiled, so we
25738 use language_string.
25739 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25740 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25741 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
25742 either, so for now use 0. */
25743 if (lang_GNU_C ()
25744 || ! strcmp (language_string, "GNU GIMPLE")
25745 || ! strcmp (language_string, "GNU Go")
25746 || ! strcmp (language_string, "libgccjit"))
25747 i = 0;
25748 else if (! strcmp (language_string, "GNU F77")
25749 || lang_GNU_Fortran ())
25750 i = 1;
25751 else if (! strcmp (language_string, "GNU Pascal"))
25752 i = 2;
25753 else if (! strcmp (language_string, "GNU Ada"))
25754 i = 3;
25755 else if (lang_GNU_CXX ()
25756 || ! strcmp (language_string, "GNU Objective-C++"))
25757 i = 9;
25758 else if (! strcmp (language_string, "GNU Java"))
25759 i = 13;
25760 else if (! strcmp (language_string, "GNU Objective-C"))
25761 i = 14;
25762 else
25763 gcc_unreachable ();
25764 fprintf (file, "%d,", i);
25766 /* 8 single bit fields: global linkage (not set for C extern linkage,
25767 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25768 from start of procedure stored in tbtab, internal function, function
25769 has controlled storage, function has no toc, function uses fp,
25770 function logs/aborts fp operations. */
25771 /* Assume that fp operations are used if any fp reg must be saved. */
25772 fprintf (file, "%d,",
25773 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25775 /* 6 bitfields: function is interrupt handler, name present in
25776 proc table, function calls alloca, on condition directives
25777 (controls stack walks, 3 bits), saves condition reg, saves
25778 link reg. */
25779 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25780 set up as a frame pointer, even when there is no alloca call. */
25781 fprintf (file, "%d,",
25782 ((optional_tbtab << 6)
25783 | ((optional_tbtab & frame_pointer_needed) << 5)
25784 | (info->cr_save_p << 1)
25785 | (info->lr_save_p)));
25787 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25788 (6 bits). */
25789 fprintf (file, "%d,",
25790 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25792 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25793 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25795 if (optional_tbtab)
25797 /* Compute the parameter info from the function decl argument
25798 list. */
25799 tree decl;
25800 int next_parm_info_bit = 31;
25802 for (decl = DECL_ARGUMENTS (current_function_decl);
25803 decl; decl = DECL_CHAIN (decl))
25805 rtx parameter = DECL_INCOMING_RTL (decl);
25806 machine_mode mode = GET_MODE (parameter);
25808 if (GET_CODE (parameter) == REG)
25810 if (SCALAR_FLOAT_MODE_P (mode))
25812 int bits;
25814 float_parms++;
25816 switch (mode)
25818 case SFmode:
25819 case SDmode:
25820 bits = 0x2;
25821 break;
25823 case DFmode:
25824 case DDmode:
25825 case TFmode:
25826 case TDmode:
25827 bits = 0x3;
25828 break;
25830 default:
25831 gcc_unreachable ();
25834 /* If only one bit will fit, don't or in this entry. */
25835 if (next_parm_info_bit > 0)
25836 parm_info |= (bits << (next_parm_info_bit - 1));
25837 next_parm_info_bit -= 2;
25839 else
25841 fixed_parms += ((GET_MODE_SIZE (mode)
25842 + (UNITS_PER_WORD - 1))
25843 / UNITS_PER_WORD);
25844 next_parm_info_bit -= 1;
25850 /* Number of fixed point parameters. */
25851 /* This is actually the number of words of fixed point parameters; thus
25852 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25853 fprintf (file, "%d,", fixed_parms);
25855 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25856 all on stack. */
25857 /* This is actually the number of fp registers that hold parameters;
25858 and thus the maximum value is 13. */
25859 /* Set parameters on stack bit if parameters are not in their original
25860 registers, regardless of whether they are on the stack? Xlc
25861 seems to set the bit when not optimizing. */
25862 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25864 if (! optional_tbtab)
25865 return;
25867 /* Optional fields follow. Some are variable length. */
25869 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25870 11 double float. */
25871 /* There is an entry for each parameter in a register, in the order that
25872 they occur in the parameter list. Any intervening arguments on the
25873 stack are ignored. If the list overflows a long (max possible length
25874 34 bits) then completely leave off all elements that don't fit. */
25875 /* Only emit this long if there was at least one parameter. */
25876 if (fixed_parms || float_parms)
25877 fprintf (file, "\t.long %d\n", parm_info);
25879 /* Offset from start of code to tb table. */
25880 fputs ("\t.long ", file);
25881 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25882 RS6000_OUTPUT_BASENAME (file, fname);
25883 putc ('-', file);
25884 rs6000_output_function_entry (file, fname);
25885 putc ('\n', file);
25887 /* Interrupt handler mask. */
25888 /* Omit this long, since we never set the interrupt handler bit
25889 above. */
25891 /* Number of CTL (controlled storage) anchors. */
25892 /* Omit this long, since the has_ctl bit is never set above. */
25894 /* Displacement into stack of each CTL anchor. */
25895 /* Omit this list of longs, because there are no CTL anchors. */
25897 /* Length of function name. */
25898 if (*fname == '*')
25899 ++fname;
25900 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25902 /* Function name. */
25903 assemble_string (fname, strlen (fname));
25905 /* Register for alloca automatic storage; this is always reg 31.
25906 Only emit this if the alloca bit was set above. */
25907 if (frame_pointer_needed)
25908 fputs ("\t.byte 31\n", file);
25910 fputs ("\t.align 2\n", file);
25914 /* -fsplit-stack support. */
25916 /* A SYMBOL_REF for __morestack. */
25917 static GTY(()) rtx morestack_ref;
25919 static rtx
25920 gen_add3_const (rtx rt, rtx ra, long c)
25922 if (TARGET_64BIT)
25923 return gen_adddi3 (rt, ra, GEN_INT (c));
25924 else
25925 return gen_addsi3 (rt, ra, GEN_INT (c));
25928 /* Emit -fsplit-stack prologue, which goes before the regular function
25929 prologue (at local entry point in the case of ELFv2). */
25931 void
25932 rs6000_expand_split_stack_prologue (void)
25934 rs6000_stack_t *info = rs6000_stack_info ();
25935 unsigned HOST_WIDE_INT allocate;
25936 long alloc_hi, alloc_lo;
25937 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
25938 rtx_insn *insn;
25940 gcc_assert (flag_split_stack && reload_completed);
25942 if (!info->push_p)
25943 return;
25945 allocate = info->total_size;
25946 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
25948 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
25949 return;
25951 if (morestack_ref == NULL_RTX)
25953 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
25954 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
25955 | SYMBOL_FLAG_FUNCTION);
25958 r0 = gen_rtx_REG (Pmode, 0);
25959 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
25960 r12 = gen_rtx_REG (Pmode, 12);
25961 emit_insn (gen_load_split_stack_limit (r0));
25962 /* Always emit two insns here to calculate the requested stack,
25963 so that the linker can edit them when adjusting size for calling
25964 non-split-stack code. */
25965 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
25966 alloc_lo = -allocate - alloc_hi;
25967 if (alloc_hi != 0)
25969 emit_insn (gen_add3_const (r12, r1, alloc_hi));
25970 if (alloc_lo != 0)
25971 emit_insn (gen_add3_const (r12, r12, alloc_lo));
25972 else
25973 emit_insn (gen_nop ());
25975 else
25977 emit_insn (gen_add3_const (r12, r1, alloc_lo));
25978 emit_insn (gen_nop ());
25981 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
25982 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
25983 ok_label = gen_label_rtx ();
25984 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25985 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
25986 gen_rtx_LABEL_REF (VOIDmode, ok_label),
25987 pc_rtx);
25988 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25989 JUMP_LABEL (jump) = ok_label;
25990 /* Mark the jump as very likely to be taken. */
25991 add_int_reg_note (jump, REG_BR_PROB,
25992 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
25994 lr = gen_rtx_REG (Pmode, LR_REGNO);
25995 insn = emit_move_insn (r0, lr);
25996 RTX_FRAME_RELATED_P (insn) = 1;
25997 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
25998 RTX_FRAME_RELATED_P (insn) = 1;
26000 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
26001 const0_rtx, const0_rtx));
26002 call_fusage = NULL_RTX;
26003 use_reg (&call_fusage, r12);
26004 add_function_usage_to (insn, call_fusage);
26005 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
26006 insn = emit_move_insn (lr, r0);
26007 add_reg_note (insn, REG_CFA_RESTORE, lr);
26008 RTX_FRAME_RELATED_P (insn) = 1;
26009 emit_insn (gen_split_stack_return ());
26011 emit_label (ok_label);
26012 LABEL_NUSES (ok_label) = 1;
26015 /* Return the internal arg pointer used for function incoming
26016 arguments. When -fsplit-stack, the arg pointer is r12 so we need
26017 to copy it to a pseudo in order for it to be preserved over calls
26018 and suchlike. We'd really like to use a pseudo here for the
26019 internal arg pointer but data-flow analysis is not prepared to
26020 accept pseudos as live at the beginning of a function. */
26022 static rtx
26023 rs6000_internal_arg_pointer (void)
26025 if (flag_split_stack)
26027 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
26029 rtx pat;
26031 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
26032 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
26034 /* Put the pseudo initialization right after the note at the
26035 beginning of the function. */
26036 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
26037 gen_rtx_REG (Pmode, 12));
26038 push_topmost_sequence ();
26039 emit_insn_after (pat, get_insns ());
26040 pop_topmost_sequence ();
26042 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
26043 FIRST_PARM_OFFSET (current_function_decl));
26045 return virtual_incoming_args_rtx;
26048 /* We may have to tell the dataflow pass that the split stack prologue
26049 is initializing a register. */
26051 static void
26052 rs6000_live_on_entry (bitmap regs)
26054 if (flag_split_stack)
26055 bitmap_set_bit (regs, 12);
26058 /* Emit -fsplit-stack dynamic stack allocation space check. */
26060 void
26061 rs6000_split_stack_space_check (rtx size, rtx label)
26063 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26064 rtx limit = gen_reg_rtx (Pmode);
26065 rtx requested = gen_reg_rtx (Pmode);
26066 rtx cmp = gen_reg_rtx (CCUNSmode);
26067 rtx jump;
26069 emit_insn (gen_load_split_stack_limit (limit));
26070 if (CONST_INT_P (size))
26071 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
26072 else
26074 size = force_reg (Pmode, size);
26075 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
26077 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
26078 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26079 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
26080 gen_rtx_LABEL_REF (VOIDmode, label),
26081 pc_rtx);
26082 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26083 JUMP_LABEL (jump) = label;
26086 /* A C compound statement that outputs the assembler code for a thunk
26087 function, used to implement C++ virtual function calls with
26088 multiple inheritance. The thunk acts as a wrapper around a virtual
26089 function, adjusting the implicit object parameter before handing
26090 control off to the real function.
26092 First, emit code to add the integer DELTA to the location that
26093 contains the incoming first argument. Assume that this argument
26094 contains a pointer, and is the one used to pass the `this' pointer
26095 in C++. This is the incoming argument *before* the function
26096 prologue, e.g. `%o0' on a sparc. The addition must preserve the
26097 values of all other incoming arguments.
26099 After the addition, emit code to jump to FUNCTION, which is a
26100 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
26101 not touch the return address. Hence returning from FUNCTION will
26102 return to whoever called the current `thunk'.
26104 The effect must be as if FUNCTION had been called directly with the
26105 adjusted first argument. This macro is responsible for emitting
26106 all of the code for a thunk function; output_function_prologue()
26107 and output_function_epilogue() are not invoked.
26109 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
26110 been extracted from it.) It might possibly be useful on some
26111 targets, but probably not.
26113 If you do not define this macro, the target-independent code in the
26114 C++ frontend will generate a less efficient heavyweight thunk that
26115 calls FUNCTION instead of jumping to it. The generic approach does
26116 not support varargs. */
26118 static void
26119 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
26120 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
26121 tree function)
26123 rtx this_rtx, funexp;
26124 rtx_insn *insn;
26126 reload_completed = 1;
26127 epilogue_completed = 1;
26129 /* Mark the end of the (empty) prologue. */
26130 emit_note (NOTE_INSN_PROLOGUE_END);
26132 /* Find the "this" pointer. If the function returns a structure,
26133 the structure return pointer is in r3. */
26134 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
26135 this_rtx = gen_rtx_REG (Pmode, 4);
26136 else
26137 this_rtx = gen_rtx_REG (Pmode, 3);
26139 /* Apply the constant offset, if required. */
26140 if (delta)
26141 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
26143 /* Apply the offset from the vtable, if required. */
26144 if (vcall_offset)
26146 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
26147 rtx tmp = gen_rtx_REG (Pmode, 12);
26149 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
26150 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
26152 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
26153 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
26155 else
26157 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
26159 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
26161 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
26164 /* Generate a tail call to the target function. */
26165 if (!TREE_USED (function))
26167 assemble_external (function);
26168 TREE_USED (function) = 1;
26170 funexp = XEXP (DECL_RTL (function), 0);
26171 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26173 #if TARGET_MACHO
26174 if (MACHOPIC_INDIRECT)
26175 funexp = machopic_indirect_call_target (funexp);
26176 #endif
26178 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
26179 generate sibcall RTL explicitly. */
26180 insn = emit_call_insn (
26181 gen_rtx_PARALLEL (VOIDmode,
26182 gen_rtvec (4,
26183 gen_rtx_CALL (VOIDmode,
26184 funexp, const0_rtx),
26185 gen_rtx_USE (VOIDmode, const0_rtx),
26186 gen_rtx_USE (VOIDmode,
26187 gen_rtx_REG (SImode,
26188 LR_REGNO)),
26189 simple_return_rtx)));
26190 SIBLING_CALL_P (insn) = 1;
26191 emit_barrier ();
26193 /* Ensure we have a global entry point for the thunk. ??? We could
26194 avoid that if the target routine doesn't need a global entry point,
26195 but we do not know whether this is the case at this point. */
26196 if (DEFAULT_ABI == ABI_ELFv2)
26197 cfun->machine->r2_setup_needed = true;
26199 /* Run just enough of rest_of_compilation to get the insns emitted.
26200 There's not really enough bulk here to make other passes such as
26201 instruction scheduling worth while. Note that use_thunk calls
26202 assemble_start_function and assemble_end_function. */
26203 insn = get_insns ();
26204 shorten_branches (insn);
26205 final_start_function (insn, file, 1);
26206 final (insn, file, 1);
26207 final_end_function ();
26209 reload_completed = 0;
26210 epilogue_completed = 0;
26213 /* A quick summary of the various types of 'constant-pool tables'
26214 under PowerPC:
26216 Target Flags Name One table per
26217 AIX (none) AIX TOC object file
26218 AIX -mfull-toc AIX TOC object file
26219 AIX -mminimal-toc AIX minimal TOC translation unit
26220 SVR4/EABI (none) SVR4 SDATA object file
26221 SVR4/EABI -fpic SVR4 pic object file
26222 SVR4/EABI -fPIC SVR4 PIC translation unit
26223 SVR4/EABI -mrelocatable EABI TOC function
26224 SVR4/EABI -maix AIX TOC object file
26225 SVR4/EABI -maix -mminimal-toc
26226 AIX minimal TOC translation unit
26228 Name Reg. Set by entries contains:
26229 made by addrs? fp? sum?
26231 AIX TOC 2 crt0 as Y option option
26232 AIX minimal TOC 30 prolog gcc Y Y option
26233 SVR4 SDATA 13 crt0 gcc N Y N
26234 SVR4 pic 30 prolog ld Y not yet N
26235 SVR4 PIC 30 prolog gcc Y option option
26236 EABI TOC 30 prolog gcc Y option option
26240 /* Hash functions for the hash table. */
26242 static unsigned
26243 rs6000_hash_constant (rtx k)
26245 enum rtx_code code = GET_CODE (k);
26246 machine_mode mode = GET_MODE (k);
26247 unsigned result = (code << 3) ^ mode;
26248 const char *format;
26249 int flen, fidx;
26251 format = GET_RTX_FORMAT (code);
26252 flen = strlen (format);
26253 fidx = 0;
26255 switch (code)
26257 case LABEL_REF:
26258 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
26260 case CONST_WIDE_INT:
26262 int i;
26263 flen = CONST_WIDE_INT_NUNITS (k);
26264 for (i = 0; i < flen; i++)
26265 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
26266 return result;
26269 case CONST_DOUBLE:
26270 if (mode != VOIDmode)
26271 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
26272 flen = 2;
26273 break;
26275 case CODE_LABEL:
26276 fidx = 3;
26277 break;
26279 default:
26280 break;
26283 for (; fidx < flen; fidx++)
26284 switch (format[fidx])
26286 case 's':
26288 unsigned i, len;
26289 const char *str = XSTR (k, fidx);
26290 len = strlen (str);
26291 result = result * 613 + len;
26292 for (i = 0; i < len; i++)
26293 result = result * 613 + (unsigned) str[i];
26294 break;
26296 case 'u':
26297 case 'e':
26298 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
26299 break;
26300 case 'i':
26301 case 'n':
26302 result = result * 613 + (unsigned) XINT (k, fidx);
26303 break;
26304 case 'w':
26305 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
26306 result = result * 613 + (unsigned) XWINT (k, fidx);
26307 else
26309 size_t i;
26310 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
26311 result = result * 613 + (unsigned) (XWINT (k, fidx)
26312 >> CHAR_BIT * i);
26314 break;
26315 case '0':
26316 break;
26317 default:
26318 gcc_unreachable ();
26321 return result;
26324 hashval_t
26325 toc_hasher::hash (toc_hash_struct *thc)
26327 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
26330 /* Compare H1 and H2 for equivalence. */
26332 bool
26333 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
26335 rtx r1 = h1->key;
26336 rtx r2 = h2->key;
26338 if (h1->key_mode != h2->key_mode)
26339 return 0;
26341 return rtx_equal_p (r1, r2);
26344 /* These are the names given by the C++ front-end to vtables, and
26345 vtable-like objects. Ideally, this logic should not be here;
26346 instead, there should be some programmatic way of inquiring as
26347 to whether or not an object is a vtable. */
26349 #define VTABLE_NAME_P(NAME) \
26350 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
26351 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
26352 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
26353 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
26354 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
26356 #ifdef NO_DOLLAR_IN_LABEL
26357 /* Return a GGC-allocated character string translating dollar signs in
26358 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
26360 const char *
26361 rs6000_xcoff_strip_dollar (const char *name)
26363 char *strip, *p;
26364 const char *q;
26365 size_t len;
26367 q = (const char *) strchr (name, '$');
26369 if (q == 0 || q == name)
26370 return name;
26372 len = strlen (name);
26373 strip = XALLOCAVEC (char, len + 1);
26374 strcpy (strip, name);
26375 p = strip + (q - name);
26376 while (p)
26378 *p = '_';
26379 p = strchr (p + 1, '$');
26382 return ggc_alloc_string (strip, len);
26384 #endif
26386 void
26387 rs6000_output_symbol_ref (FILE *file, rtx x)
26389 /* Currently C++ toc references to vtables can be emitted before it
26390 is decided whether the vtable is public or private. If this is
26391 the case, then the linker will eventually complain that there is
26392 a reference to an unknown section. Thus, for vtables only,
26393 we emit the TOC reference to reference the symbol and not the
26394 section. */
26395 const char *name = XSTR (x, 0);
26397 tree decl = SYMBOL_REF_DECL (x);
26398 if (decl /* sync condition with assemble_external () */
26399 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
26400 && (TREE_CODE (decl) == VAR_DECL
26401 || TREE_CODE (decl) == FUNCTION_DECL)
26402 && name[strlen (name) - 1] != ']')
26404 name = concat (name,
26405 (TREE_CODE (decl) == FUNCTION_DECL
26406 ? "[DS]" : "[UA]"),
26407 NULL);
26408 XSTR (x, 0) = name;
26411 if (VTABLE_NAME_P (name))
26413 RS6000_OUTPUT_BASENAME (file, name);
26415 else
26416 assemble_name (file, name);
26419 /* Output a TOC entry. We derive the entry name from what is being
26420 written. */
26422 void
26423 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
26425 char buf[256];
26426 const char *name = buf;
26427 rtx base = x;
26428 HOST_WIDE_INT offset = 0;
26430 gcc_assert (!TARGET_NO_TOC);
26432 /* When the linker won't eliminate them, don't output duplicate
26433 TOC entries (this happens on AIX if there is any kind of TOC,
26434 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
26435 CODE_LABELs. */
26436 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
26438 struct toc_hash_struct *h;
26440 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
26441 time because GGC is not initialized at that point. */
26442 if (toc_hash_table == NULL)
26443 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
26445 h = ggc_alloc<toc_hash_struct> ();
26446 h->key = x;
26447 h->key_mode = mode;
26448 h->labelno = labelno;
26450 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
26451 if (*found == NULL)
26452 *found = h;
26453 else /* This is indeed a duplicate.
26454 Set this label equal to that label. */
26456 fputs ("\t.set ", file);
26457 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26458 fprintf (file, "%d,", labelno);
26459 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
26460 fprintf (file, "%d\n", ((*found)->labelno));
26462 #ifdef HAVE_AS_TLS
26463 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
26464 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
26465 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
26467 fputs ("\t.set ", file);
26468 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26469 fprintf (file, "%d,", labelno);
26470 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
26471 fprintf (file, "%d\n", ((*found)->labelno));
26473 #endif
26474 return;
26478 /* If we're going to put a double constant in the TOC, make sure it's
26479 aligned properly when strict alignment is on. */
26480 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
26481 && STRICT_ALIGNMENT
26482 && GET_MODE_BITSIZE (mode) >= 64
26483 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
26484 ASM_OUTPUT_ALIGN (file, 3);
26487 (*targetm.asm_out.internal_label) (file, "LC", labelno);
26489 /* Handle FP constants specially. Note that if we have a minimal
26490 TOC, things we put here aren't actually in the TOC, so we can allow
26491 FP constants. */
26492 if (GET_CODE (x) == CONST_DOUBLE &&
26493 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
26495 REAL_VALUE_TYPE rv;
26496 long k[4];
26498 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26499 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26500 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
26501 else
26502 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26504 if (TARGET_64BIT)
26506 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26507 fputs (DOUBLE_INT_ASM_OP, file);
26508 else
26509 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26510 k[0] & 0xffffffff, k[1] & 0xffffffff,
26511 k[2] & 0xffffffff, k[3] & 0xffffffff);
26512 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26513 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26514 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26515 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26516 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26517 return;
26519 else
26521 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26522 fputs ("\t.long ", file);
26523 else
26524 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26525 k[0] & 0xffffffff, k[1] & 0xffffffff,
26526 k[2] & 0xffffffff, k[3] & 0xffffffff);
26527 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26528 k[0] & 0xffffffff, k[1] & 0xffffffff,
26529 k[2] & 0xffffffff, k[3] & 0xffffffff);
26530 return;
26533 else if (GET_CODE (x) == CONST_DOUBLE &&
26534 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26536 REAL_VALUE_TYPE rv;
26537 long k[2];
26539 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26541 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26542 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26543 else
26544 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26546 if (TARGET_64BIT)
26548 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26549 fputs (DOUBLE_INT_ASM_OP, file);
26550 else
26551 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26552 k[0] & 0xffffffff, k[1] & 0xffffffff);
26553 fprintf (file, "0x%lx%08lx\n",
26554 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26555 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26556 return;
26558 else
26560 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26561 fputs ("\t.long ", file);
26562 else
26563 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26564 k[0] & 0xffffffff, k[1] & 0xffffffff);
26565 fprintf (file, "0x%lx,0x%lx\n",
26566 k[0] & 0xffffffff, k[1] & 0xffffffff);
26567 return;
26570 else if (GET_CODE (x) == CONST_DOUBLE &&
26571 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26573 REAL_VALUE_TYPE rv;
26574 long l;
26576 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26577 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26578 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26579 else
26580 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26582 if (TARGET_64BIT)
26584 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26585 fputs (DOUBLE_INT_ASM_OP, file);
26586 else
26587 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26588 if (WORDS_BIG_ENDIAN)
26589 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26590 else
26591 fprintf (file, "0x%lx\n", l & 0xffffffff);
26592 return;
26594 else
26596 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26597 fputs ("\t.long ", file);
26598 else
26599 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26600 fprintf (file, "0x%lx\n", l & 0xffffffff);
26601 return;
26604 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26606 unsigned HOST_WIDE_INT low;
26607 HOST_WIDE_INT high;
26609 low = INTVAL (x) & 0xffffffff;
26610 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26612 /* TOC entries are always Pmode-sized, so when big-endian
26613 smaller integer constants in the TOC need to be padded.
26614 (This is still a win over putting the constants in
26615 a separate constant pool, because then we'd have
26616 to have both a TOC entry _and_ the actual constant.)
26618 For a 32-bit target, CONST_INT values are loaded and shifted
26619 entirely within `low' and can be stored in one TOC entry. */
26621 /* It would be easy to make this work, but it doesn't now. */
26622 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26624 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26626 low |= high << 32;
26627 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26628 high = (HOST_WIDE_INT) low >> 32;
26629 low &= 0xffffffff;
26632 if (TARGET_64BIT)
26634 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26635 fputs (DOUBLE_INT_ASM_OP, file);
26636 else
26637 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26638 (long) high & 0xffffffff, (long) low & 0xffffffff);
26639 fprintf (file, "0x%lx%08lx\n",
26640 (long) high & 0xffffffff, (long) low & 0xffffffff);
26641 return;
26643 else
26645 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26647 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26648 fputs ("\t.long ", file);
26649 else
26650 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26651 (long) high & 0xffffffff, (long) low & 0xffffffff);
26652 fprintf (file, "0x%lx,0x%lx\n",
26653 (long) high & 0xffffffff, (long) low & 0xffffffff);
26655 else
26657 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26658 fputs ("\t.long ", file);
26659 else
26660 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26661 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26663 return;
26667 if (GET_CODE (x) == CONST)
26669 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26670 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26672 base = XEXP (XEXP (x, 0), 0);
26673 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26676 switch (GET_CODE (base))
26678 case SYMBOL_REF:
26679 name = XSTR (base, 0);
26680 break;
26682 case LABEL_REF:
26683 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26684 CODE_LABEL_NUMBER (XEXP (base, 0)));
26685 break;
26687 case CODE_LABEL:
26688 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26689 break;
26691 default:
26692 gcc_unreachable ();
26695 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26696 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26697 else
26699 fputs ("\t.tc ", file);
26700 RS6000_OUTPUT_BASENAME (file, name);
26702 if (offset < 0)
26703 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26704 else if (offset)
26705 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26707 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26708 after other TOC symbols, reducing overflow of small TOC access
26709 to [TC] symbols. */
26710 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26711 ? "[TE]," : "[TC],", file);
26714 /* Currently C++ toc references to vtables can be emitted before it
26715 is decided whether the vtable is public or private. If this is
26716 the case, then the linker will eventually complain that there is
26717 a TOC reference to an unknown section. Thus, for vtables only,
26718 we emit the TOC reference to reference the symbol and not the
26719 section. */
26720 if (VTABLE_NAME_P (name))
26722 RS6000_OUTPUT_BASENAME (file, name);
26723 if (offset < 0)
26724 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26725 else if (offset > 0)
26726 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26728 else
26729 output_addr_const (file, x);
26731 #if HAVE_AS_TLS
26732 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26733 && SYMBOL_REF_TLS_MODEL (base) != 0)
26735 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26736 fputs ("@le", file);
26737 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26738 fputs ("@ie", file);
26739 /* Use global-dynamic for local-dynamic. */
26740 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26741 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26743 putc ('\n', file);
26744 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26745 fputs ("\t.tc .", file);
26746 RS6000_OUTPUT_BASENAME (file, name);
26747 fputs ("[TC],", file);
26748 output_addr_const (file, x);
26749 fputs ("@m", file);
26752 #endif
26754 putc ('\n', file);
26757 /* Output an assembler pseudo-op to write an ASCII string of N characters
26758 starting at P to FILE.
26760 On the RS/6000, we have to do this using the .byte operation and
26761 write out special characters outside the quoted string.
26762 Also, the assembler is broken; very long strings are truncated,
26763 so we must artificially break them up early. */
26765 void
26766 output_ascii (FILE *file, const char *p, int n)
26768 char c;
26769 int i, count_string;
26770 const char *for_string = "\t.byte \"";
26771 const char *for_decimal = "\t.byte ";
26772 const char *to_close = NULL;
26774 count_string = 0;
26775 for (i = 0; i < n; i++)
26777 c = *p++;
26778 if (c >= ' ' && c < 0177)
26780 if (for_string)
26781 fputs (for_string, file);
26782 putc (c, file);
26784 /* Write two quotes to get one. */
26785 if (c == '"')
26787 putc (c, file);
26788 ++count_string;
26791 for_string = NULL;
26792 for_decimal = "\"\n\t.byte ";
26793 to_close = "\"\n";
26794 ++count_string;
26796 if (count_string >= 512)
26798 fputs (to_close, file);
26800 for_string = "\t.byte \"";
26801 for_decimal = "\t.byte ";
26802 to_close = NULL;
26803 count_string = 0;
26806 else
26808 if (for_decimal)
26809 fputs (for_decimal, file);
26810 fprintf (file, "%d", c);
26812 for_string = "\n\t.byte \"";
26813 for_decimal = ", ";
26814 to_close = "\n";
26815 count_string = 0;
26819 /* Now close the string if we have written one. Then end the line. */
26820 if (to_close)
26821 fputs (to_close, file);
26824 /* Generate a unique section name for FILENAME for a section type
26825 represented by SECTION_DESC. Output goes into BUF.
26827 SECTION_DESC can be any string, as long as it is different for each
26828 possible section type.
26830 We name the section in the same manner as xlc. The name begins with an
26831 underscore followed by the filename (after stripping any leading directory
26832 names) with the last period replaced by the string SECTION_DESC. If
26833 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26834 the name. */
26836 void
26837 rs6000_gen_section_name (char **buf, const char *filename,
26838 const char *section_desc)
26840 const char *q, *after_last_slash, *last_period = 0;
26841 char *p;
26842 int len;
26844 after_last_slash = filename;
26845 for (q = filename; *q; q++)
26847 if (*q == '/')
26848 after_last_slash = q + 1;
26849 else if (*q == '.')
26850 last_period = q;
26853 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26854 *buf = (char *) xmalloc (len);
26856 p = *buf;
26857 *p++ = '_';
26859 for (q = after_last_slash; *q; q++)
26861 if (q == last_period)
26863 strcpy (p, section_desc);
26864 p += strlen (section_desc);
26865 break;
26868 else if (ISALNUM (*q))
26869 *p++ = *q;
26872 if (last_period == 0)
26873 strcpy (p, section_desc);
26874 else
26875 *p = '\0';
26878 /* Emit profile function. */
26880 void
26881 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26883 /* Non-standard profiling for kernels, which just saves LR then calls
26884 _mcount without worrying about arg saves. The idea is to change
26885 the function prologue as little as possible as it isn't easy to
26886 account for arg save/restore code added just for _mcount. */
26887 if (TARGET_PROFILE_KERNEL)
26888 return;
26890 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26892 #ifndef NO_PROFILE_COUNTERS
26893 # define NO_PROFILE_COUNTERS 0
26894 #endif
26895 if (NO_PROFILE_COUNTERS)
26896 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26897 LCT_NORMAL, VOIDmode, 0);
26898 else
26900 char buf[30];
26901 const char *label_name;
26902 rtx fun;
26904 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26905 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26906 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26908 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26909 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26912 else if (DEFAULT_ABI == ABI_DARWIN)
26914 const char *mcount_name = RS6000_MCOUNT;
26915 int caller_addr_regno = LR_REGNO;
26917 /* Be conservative and always set this, at least for now. */
26918 crtl->uses_pic_offset_table = 1;
26920 #if TARGET_MACHO
26921 /* For PIC code, set up a stub and collect the caller's address
26922 from r0, which is where the prologue puts it. */
26923 if (MACHOPIC_INDIRECT
26924 && crtl->uses_pic_offset_table)
26925 caller_addr_regno = 0;
26926 #endif
26927 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26928 LCT_NORMAL, VOIDmode, 1,
26929 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26933 /* Write function profiler code. */
26935 void
26936 output_function_profiler (FILE *file, int labelno)
26938 char buf[100];
26940 switch (DEFAULT_ABI)
26942 default:
26943 gcc_unreachable ();
26945 case ABI_V4:
26946 if (!TARGET_32BIT)
26948 warning (0, "no profiling of 64-bit code for this ABI");
26949 return;
26951 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26952 fprintf (file, "\tmflr %s\n", reg_names[0]);
26953 if (NO_PROFILE_COUNTERS)
26955 asm_fprintf (file, "\tstw %s,4(%s)\n",
26956 reg_names[0], reg_names[1]);
26958 else if (TARGET_SECURE_PLT && flag_pic)
26960 if (TARGET_LINK_STACK)
26962 char name[32];
26963 get_ppc476_thunk_name (name);
26964 asm_fprintf (file, "\tbl %s\n", name);
26966 else
26967 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26968 asm_fprintf (file, "\tstw %s,4(%s)\n",
26969 reg_names[0], reg_names[1]);
26970 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26971 asm_fprintf (file, "\taddis %s,%s,",
26972 reg_names[12], reg_names[12]);
26973 assemble_name (file, buf);
26974 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26975 assemble_name (file, buf);
26976 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26978 else if (flag_pic == 1)
26980 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26981 asm_fprintf (file, "\tstw %s,4(%s)\n",
26982 reg_names[0], reg_names[1]);
26983 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26984 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26985 assemble_name (file, buf);
26986 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26988 else if (flag_pic > 1)
26990 asm_fprintf (file, "\tstw %s,4(%s)\n",
26991 reg_names[0], reg_names[1]);
26992 /* Now, we need to get the address of the label. */
26993 if (TARGET_LINK_STACK)
26995 char name[32];
26996 get_ppc476_thunk_name (name);
26997 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26998 assemble_name (file, buf);
26999 fputs ("-.\n1:", file);
27000 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
27001 asm_fprintf (file, "\taddi %s,%s,4\n",
27002 reg_names[11], reg_names[11]);
27004 else
27006 fputs ("\tbcl 20,31,1f\n\t.long ", file);
27007 assemble_name (file, buf);
27008 fputs ("-.\n1:", file);
27009 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
27011 asm_fprintf (file, "\tlwz %s,0(%s)\n",
27012 reg_names[0], reg_names[11]);
27013 asm_fprintf (file, "\tadd %s,%s,%s\n",
27014 reg_names[0], reg_names[0], reg_names[11]);
27016 else
27018 asm_fprintf (file, "\tlis %s,", reg_names[12]);
27019 assemble_name (file, buf);
27020 fputs ("@ha\n", file);
27021 asm_fprintf (file, "\tstw %s,4(%s)\n",
27022 reg_names[0], reg_names[1]);
27023 asm_fprintf (file, "\tla %s,", reg_names[0]);
27024 assemble_name (file, buf);
27025 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
27028 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
27029 fprintf (file, "\tbl %s%s\n",
27030 RS6000_MCOUNT, flag_pic ? "@plt" : "");
27031 break;
27033 case ABI_AIX:
27034 case ABI_ELFv2:
27035 case ABI_DARWIN:
27036 /* Don't do anything, done in output_profile_hook (). */
27037 break;
27043 /* The following variable value is the last issued insn. */
27045 static rtx last_scheduled_insn;
27047 /* The following variable helps to balance issuing of load and
27048 store instructions */
27050 static int load_store_pendulum;
27052 /* Power4 load update and store update instructions are cracked into a
27053 load or store and an integer insn which are executed in the same cycle.
27054 Branches have their own dispatch slot which does not count against the
27055 GCC issue rate, but it changes the program flow so there are no other
27056 instructions to issue in this cycle. */
27058 static int
27059 rs6000_variable_issue_1 (rtx_insn *insn, int more)
27061 last_scheduled_insn = insn;
27062 if (GET_CODE (PATTERN (insn)) == USE
27063 || GET_CODE (PATTERN (insn)) == CLOBBER)
27065 cached_can_issue_more = more;
27066 return cached_can_issue_more;
27069 if (insn_terminates_group_p (insn, current_group))
27071 cached_can_issue_more = 0;
27072 return cached_can_issue_more;
27075 /* If no reservation, but reach here */
27076 if (recog_memoized (insn) < 0)
27077 return more;
27079 if (rs6000_sched_groups)
27081 if (is_microcoded_insn (insn))
27082 cached_can_issue_more = 0;
27083 else if (is_cracked_insn (insn))
27084 cached_can_issue_more = more > 2 ? more - 2 : 0;
27085 else
27086 cached_can_issue_more = more - 1;
27088 return cached_can_issue_more;
27091 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
27092 return 0;
27094 cached_can_issue_more = more - 1;
27095 return cached_can_issue_more;
27098 static int
27099 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
27101 int r = rs6000_variable_issue_1 (insn, more);
27102 if (verbose)
27103 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
27104 return r;
27107 /* Adjust the cost of a scheduling dependency. Return the new cost of
27108 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
27110 static int
27111 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27113 enum attr_type attr_type;
27115 if (! recog_memoized (insn))
27116 return 0;
27118 switch (REG_NOTE_KIND (link))
27120 case REG_DEP_TRUE:
27122 /* Data dependency; DEP_INSN writes a register that INSN reads
27123 some cycles later. */
27125 /* Separate a load from a narrower, dependent store. */
27126 if (rs6000_sched_groups
27127 && GET_CODE (PATTERN (insn)) == SET
27128 && GET_CODE (PATTERN (dep_insn)) == SET
27129 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
27130 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
27131 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
27132 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
27133 return cost + 14;
27135 attr_type = get_attr_type (insn);
27137 switch (attr_type)
27139 case TYPE_JMPREG:
27140 /* Tell the first scheduling pass about the latency between
27141 a mtctr and bctr (and mtlr and br/blr). The first
27142 scheduling pass will not know about this latency since
27143 the mtctr instruction, which has the latency associated
27144 to it, will be generated by reload. */
27145 return 4;
27146 case TYPE_BRANCH:
27147 /* Leave some extra cycles between a compare and its
27148 dependent branch, to inhibit expensive mispredicts. */
27149 if ((rs6000_cpu_attr == CPU_PPC603
27150 || rs6000_cpu_attr == CPU_PPC604
27151 || rs6000_cpu_attr == CPU_PPC604E
27152 || rs6000_cpu_attr == CPU_PPC620
27153 || rs6000_cpu_attr == CPU_PPC630
27154 || rs6000_cpu_attr == CPU_PPC750
27155 || rs6000_cpu_attr == CPU_PPC7400
27156 || rs6000_cpu_attr == CPU_PPC7450
27157 || rs6000_cpu_attr == CPU_PPCE5500
27158 || rs6000_cpu_attr == CPU_PPCE6500
27159 || rs6000_cpu_attr == CPU_POWER4
27160 || rs6000_cpu_attr == CPU_POWER5
27161 || rs6000_cpu_attr == CPU_POWER7
27162 || rs6000_cpu_attr == CPU_POWER8
27163 || rs6000_cpu_attr == CPU_CELL)
27164 && recog_memoized (dep_insn)
27165 && (INSN_CODE (dep_insn) >= 0))
27167 switch (get_attr_type (dep_insn))
27169 case TYPE_CMP:
27170 case TYPE_FPCOMPARE:
27171 case TYPE_CR_LOGICAL:
27172 case TYPE_DELAYED_CR:
27173 return cost + 2;
27174 case TYPE_EXTS:
27175 case TYPE_MUL:
27176 if (get_attr_dot (dep_insn) == DOT_YES)
27177 return cost + 2;
27178 else
27179 break;
27180 case TYPE_SHIFT:
27181 if (get_attr_dot (dep_insn) == DOT_YES
27182 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
27183 return cost + 2;
27184 else
27185 break;
27186 default:
27187 break;
27189 break;
27191 case TYPE_STORE:
27192 case TYPE_FPSTORE:
27193 if ((rs6000_cpu == PROCESSOR_POWER6)
27194 && recog_memoized (dep_insn)
27195 && (INSN_CODE (dep_insn) >= 0))
27198 if (GET_CODE (PATTERN (insn)) != SET)
27199 /* If this happens, we have to extend this to schedule
27200 optimally. Return default for now. */
27201 return cost;
27203 /* Adjust the cost for the case where the value written
27204 by a fixed point operation is used as the address
27205 gen value on a store. */
27206 switch (get_attr_type (dep_insn))
27208 case TYPE_LOAD:
27209 case TYPE_CNTLZ:
27211 if (! store_data_bypass_p (dep_insn, insn))
27212 return get_attr_sign_extend (dep_insn)
27213 == SIGN_EXTEND_YES ? 6 : 4;
27214 break;
27216 case TYPE_SHIFT:
27218 if (! store_data_bypass_p (dep_insn, insn))
27219 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
27220 6 : 3;
27221 break;
27223 case TYPE_INTEGER:
27224 case TYPE_ADD:
27225 case TYPE_LOGICAL:
27226 case TYPE_EXTS:
27227 case TYPE_INSERT:
27229 if (! store_data_bypass_p (dep_insn, insn))
27230 return 3;
27231 break;
27233 case TYPE_STORE:
27234 case TYPE_FPLOAD:
27235 case TYPE_FPSTORE:
27237 if (get_attr_update (dep_insn) == UPDATE_YES
27238 && ! store_data_bypass_p (dep_insn, insn))
27239 return 3;
27240 break;
27242 case TYPE_MUL:
27244 if (! store_data_bypass_p (dep_insn, insn))
27245 return 17;
27246 break;
27248 case TYPE_DIV:
27250 if (! store_data_bypass_p (dep_insn, insn))
27251 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
27252 break;
27254 default:
27255 break;
27258 break;
27260 case TYPE_LOAD:
27261 if ((rs6000_cpu == PROCESSOR_POWER6)
27262 && recog_memoized (dep_insn)
27263 && (INSN_CODE (dep_insn) >= 0))
27266 /* Adjust the cost for the case where the value written
27267 by a fixed point instruction is used within the address
27268 gen portion of a subsequent load(u)(x) */
27269 switch (get_attr_type (dep_insn))
27271 case TYPE_LOAD:
27272 case TYPE_CNTLZ:
27274 if (set_to_load_agen (dep_insn, insn))
27275 return get_attr_sign_extend (dep_insn)
27276 == SIGN_EXTEND_YES ? 6 : 4;
27277 break;
27279 case TYPE_SHIFT:
27281 if (set_to_load_agen (dep_insn, insn))
27282 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
27283 6 : 3;
27284 break;
27286 case TYPE_INTEGER:
27287 case TYPE_ADD:
27288 case TYPE_LOGICAL:
27289 case TYPE_EXTS:
27290 case TYPE_INSERT:
27292 if (set_to_load_agen (dep_insn, insn))
27293 return 3;
27294 break;
27296 case TYPE_STORE:
27297 case TYPE_FPLOAD:
27298 case TYPE_FPSTORE:
27300 if (get_attr_update (dep_insn) == UPDATE_YES
27301 && set_to_load_agen (dep_insn, insn))
27302 return 3;
27303 break;
27305 case TYPE_MUL:
27307 if (set_to_load_agen (dep_insn, insn))
27308 return 17;
27309 break;
27311 case TYPE_DIV:
27313 if (set_to_load_agen (dep_insn, insn))
27314 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
27315 break;
27317 default:
27318 break;
27321 break;
27323 case TYPE_FPLOAD:
27324 if ((rs6000_cpu == PROCESSOR_POWER6)
27325 && get_attr_update (insn) == UPDATE_NO
27326 && recog_memoized (dep_insn)
27327 && (INSN_CODE (dep_insn) >= 0)
27328 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
27329 return 2;
27331 default:
27332 break;
27335 /* Fall out to return default cost. */
27337 break;
27339 case REG_DEP_OUTPUT:
27340 /* Output dependency; DEP_INSN writes a register that INSN writes some
27341 cycles later. */
27342 if ((rs6000_cpu == PROCESSOR_POWER6)
27343 && recog_memoized (dep_insn)
27344 && (INSN_CODE (dep_insn) >= 0))
27346 attr_type = get_attr_type (insn);
27348 switch (attr_type)
27350 case TYPE_FP:
27351 if (get_attr_type (dep_insn) == TYPE_FP)
27352 return 1;
27353 break;
27354 case TYPE_FPLOAD:
27355 if (get_attr_update (insn) == UPDATE_NO
27356 && get_attr_type (dep_insn) == TYPE_MFFGPR)
27357 return 2;
27358 break;
27359 default:
27360 break;
27363 case REG_DEP_ANTI:
27364 /* Anti dependency; DEP_INSN reads a register that INSN writes some
27365 cycles later. */
27366 return 0;
27368 default:
27369 gcc_unreachable ();
27372 return cost;
27375 /* Debug version of rs6000_adjust_cost. */
27377 static int
27378 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
27379 int cost)
27381 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
27383 if (ret != cost)
27385 const char *dep;
27387 switch (REG_NOTE_KIND (link))
27389 default: dep = "unknown depencency"; break;
27390 case REG_DEP_TRUE: dep = "data dependency"; break;
27391 case REG_DEP_OUTPUT: dep = "output dependency"; break;
27392 case REG_DEP_ANTI: dep = "anti depencency"; break;
27395 fprintf (stderr,
27396 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
27397 "%s, insn:\n", ret, cost, dep);
27399 debug_rtx (insn);
27402 return ret;
27405 /* The function returns a true if INSN is microcoded.
27406 Return false otherwise. */
27408 static bool
27409 is_microcoded_insn (rtx_insn *insn)
27411 if (!insn || !NONDEBUG_INSN_P (insn)
27412 || GET_CODE (PATTERN (insn)) == USE
27413 || GET_CODE (PATTERN (insn)) == CLOBBER)
27414 return false;
27416 if (rs6000_cpu_attr == CPU_CELL)
27417 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
27419 if (rs6000_sched_groups
27420 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27422 enum attr_type type = get_attr_type (insn);
27423 if ((type == TYPE_LOAD
27424 && get_attr_update (insn) == UPDATE_YES
27425 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
27426 || ((type == TYPE_LOAD || type == TYPE_STORE)
27427 && get_attr_update (insn) == UPDATE_YES
27428 && get_attr_indexed (insn) == INDEXED_YES)
27429 || type == TYPE_MFCR)
27430 return true;
27433 return false;
27436 /* The function returns true if INSN is cracked into 2 instructions
27437 by the processor (and therefore occupies 2 issue slots). */
27439 static bool
27440 is_cracked_insn (rtx_insn *insn)
27442 if (!insn || !NONDEBUG_INSN_P (insn)
27443 || GET_CODE (PATTERN (insn)) == USE
27444 || GET_CODE (PATTERN (insn)) == CLOBBER)
27445 return false;
27447 if (rs6000_sched_groups
27448 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
27450 enum attr_type type = get_attr_type (insn);
27451 if ((type == TYPE_LOAD
27452 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27453 && get_attr_update (insn) == UPDATE_NO)
27454 || (type == TYPE_LOAD
27455 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
27456 && get_attr_update (insn) == UPDATE_YES
27457 && get_attr_indexed (insn) == INDEXED_NO)
27458 || (type == TYPE_STORE
27459 && get_attr_update (insn) == UPDATE_YES
27460 && get_attr_indexed (insn) == INDEXED_NO)
27461 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
27462 && get_attr_update (insn) == UPDATE_YES)
27463 || type == TYPE_DELAYED_CR
27464 || (type == TYPE_EXTS
27465 && get_attr_dot (insn) == DOT_YES)
27466 || (type == TYPE_SHIFT
27467 && get_attr_dot (insn) == DOT_YES
27468 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
27469 || (type == TYPE_MUL
27470 && get_attr_dot (insn) == DOT_YES)
27471 || type == TYPE_DIV
27472 || (type == TYPE_INSERT
27473 && get_attr_size (insn) == SIZE_32))
27474 return true;
27477 return false;
27480 /* The function returns true if INSN can be issued only from
27481 the branch slot. */
27483 static bool
27484 is_branch_slot_insn (rtx_insn *insn)
27486 if (!insn || !NONDEBUG_INSN_P (insn)
27487 || GET_CODE (PATTERN (insn)) == USE
27488 || GET_CODE (PATTERN (insn)) == CLOBBER)
27489 return false;
27491 if (rs6000_sched_groups)
27493 enum attr_type type = get_attr_type (insn);
27494 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
27495 return true;
27496 return false;
27499 return false;
27502 /* The function returns true if out_inst sets a value that is
27503 used in the address generation computation of in_insn */
27504 static bool
27505 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27507 rtx out_set, in_set;
27509 /* For performance reasons, only handle the simple case where
27510 both loads are a single_set. */
27511 out_set = single_set (out_insn);
27512 if (out_set)
27514 in_set = single_set (in_insn);
27515 if (in_set)
27516 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27519 return false;
27522 /* Try to determine base/offset/size parts of the given MEM.
27523 Return true if successful, false if all the values couldn't
27524 be determined.
27526 This function only looks for REG or REG+CONST address forms.
27527 REG+REG address form will return false. */
27529 static bool
27530 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27531 HOST_WIDE_INT *size)
27533 rtx addr_rtx;
27534 if MEM_SIZE_KNOWN_P (mem)
27535 *size = MEM_SIZE (mem);
27536 else
27537 return false;
27539 addr_rtx = (XEXP (mem, 0));
27540 if (GET_CODE (addr_rtx) == PRE_MODIFY)
27541 addr_rtx = XEXP (addr_rtx, 1);
27543 *offset = 0;
27544 while (GET_CODE (addr_rtx) == PLUS
27545 && CONST_INT_P (XEXP (addr_rtx, 1)))
27547 *offset += INTVAL (XEXP (addr_rtx, 1));
27548 addr_rtx = XEXP (addr_rtx, 0);
27550 if (!REG_P (addr_rtx))
27551 return false;
27553 *base = addr_rtx;
27554 return true;
27557 /* The function returns true if the target storage location of
27558 mem1 is adjacent to the target storage location of mem2 */
27559 /* Return 1 if memory locations are adjacent. */
27561 static bool
27562 adjacent_mem_locations (rtx mem1, rtx mem2)
27564 rtx reg1, reg2;
27565 HOST_WIDE_INT off1, size1, off2, size2;
27567 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27568 && get_memref_parts (mem2, &reg2, &off2, &size2))
27569 return ((REGNO (reg1) == REGNO (reg2))
27570 && ((off1 + size1 == off2)
27571 || (off2 + size2 == off1)));
27573 return false;
27576 /* This function returns true if it can be determined that the two MEM
27577 locations overlap by at least 1 byte based on base reg/offset/size. */
27579 static bool
27580 mem_locations_overlap (rtx mem1, rtx mem2)
27582 rtx reg1, reg2;
27583 HOST_WIDE_INT off1, size1, off2, size2;
27585 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27586 && get_memref_parts (mem2, &reg2, &off2, &size2))
27587 return ((REGNO (reg1) == REGNO (reg2))
27588 && (((off1 <= off2) && (off1 + size1 > off2))
27589 || ((off2 <= off1) && (off2 + size2 > off1))));
27591 return false;
27594 /* A C statement (sans semicolon) to update the integer scheduling
27595 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27596 INSN earlier, reduce the priority to execute INSN later. Do not
27597 define this macro if you do not need to adjust the scheduling
27598 priorities of insns. */
27600 static int
27601 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27603 rtx load_mem, str_mem;
27604 /* On machines (like the 750) which have asymmetric integer units,
27605 where one integer unit can do multiply and divides and the other
27606 can't, reduce the priority of multiply/divide so it is scheduled
27607 before other integer operations. */
27609 #if 0
27610 if (! INSN_P (insn))
27611 return priority;
27613 if (GET_CODE (PATTERN (insn)) == USE)
27614 return priority;
27616 switch (rs6000_cpu_attr) {
27617 case CPU_PPC750:
27618 switch (get_attr_type (insn))
27620 default:
27621 break;
27623 case TYPE_MUL:
27624 case TYPE_DIV:
27625 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27626 priority, priority);
27627 if (priority >= 0 && priority < 0x01000000)
27628 priority >>= 3;
27629 break;
27632 #endif
27634 if (insn_must_be_first_in_group (insn)
27635 && reload_completed
27636 && current_sched_info->sched_max_insns_priority
27637 && rs6000_sched_restricted_insns_priority)
27640 /* Prioritize insns that can be dispatched only in the first
27641 dispatch slot. */
27642 if (rs6000_sched_restricted_insns_priority == 1)
27643 /* Attach highest priority to insn. This means that in
27644 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27645 precede 'priority' (critical path) considerations. */
27646 return current_sched_info->sched_max_insns_priority;
27647 else if (rs6000_sched_restricted_insns_priority == 2)
27648 /* Increase priority of insn by a minimal amount. This means that in
27649 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27650 considerations precede dispatch-slot restriction considerations. */
27651 return (priority + 1);
27654 if (rs6000_cpu == PROCESSOR_POWER6
27655 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27656 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27657 /* Attach highest priority to insn if the scheduler has just issued two
27658 stores and this instruction is a load, or two loads and this instruction
27659 is a store. Power6 wants loads and stores scheduled alternately
27660 when possible */
27661 return current_sched_info->sched_max_insns_priority;
27663 return priority;
27666 /* Return true if the instruction is nonpipelined on the Cell. */
27667 static bool
27668 is_nonpipeline_insn (rtx_insn *insn)
27670 enum attr_type type;
27671 if (!insn || !NONDEBUG_INSN_P (insn)
27672 || GET_CODE (PATTERN (insn)) == USE
27673 || GET_CODE (PATTERN (insn)) == CLOBBER)
27674 return false;
27676 type = get_attr_type (insn);
27677 if (type == TYPE_MUL
27678 || type == TYPE_DIV
27679 || type == TYPE_SDIV
27680 || type == TYPE_DDIV
27681 || type == TYPE_SSQRT
27682 || type == TYPE_DSQRT
27683 || type == TYPE_MFCR
27684 || type == TYPE_MFCRF
27685 || type == TYPE_MFJMPR)
27687 return true;
27689 return false;
27693 /* Return how many instructions the machine can issue per cycle. */
27695 static int
27696 rs6000_issue_rate (void)
27698 /* Unless scheduling for register pressure, use issue rate of 1 for
27699 first scheduling pass to decrease degradation. */
27700 if (!reload_completed && !flag_sched_pressure)
27701 return 1;
27703 switch (rs6000_cpu_attr) {
27704 case CPU_RS64A:
27705 case CPU_PPC601: /* ? */
27706 case CPU_PPC7450:
27707 return 3;
27708 case CPU_PPC440:
27709 case CPU_PPC603:
27710 case CPU_PPC750:
27711 case CPU_PPC7400:
27712 case CPU_PPC8540:
27713 case CPU_PPC8548:
27714 case CPU_CELL:
27715 case CPU_PPCE300C2:
27716 case CPU_PPCE300C3:
27717 case CPU_PPCE500MC:
27718 case CPU_PPCE500MC64:
27719 case CPU_PPCE5500:
27720 case CPU_PPCE6500:
27721 case CPU_TITAN:
27722 return 2;
27723 case CPU_PPC476:
27724 case CPU_PPC604:
27725 case CPU_PPC604E:
27726 case CPU_PPC620:
27727 case CPU_PPC630:
27728 return 4;
27729 case CPU_POWER4:
27730 case CPU_POWER5:
27731 case CPU_POWER6:
27732 case CPU_POWER7:
27733 return 5;
27734 case CPU_POWER8:
27735 return 7;
27736 default:
27737 return 1;
27741 /* Return how many instructions to look ahead for better insn
27742 scheduling. */
27744 static int
27745 rs6000_use_sched_lookahead (void)
27747 switch (rs6000_cpu_attr)
27749 case CPU_PPC8540:
27750 case CPU_PPC8548:
27751 return 4;
27753 case CPU_CELL:
27754 return (reload_completed ? 8 : 0);
27756 default:
27757 return 0;
27761 /* We are choosing insn from the ready queue. Return zero if INSN can be
27762 chosen. */
27763 static int
27764 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27766 if (ready_index == 0)
27767 return 0;
27769 if (rs6000_cpu_attr != CPU_CELL)
27770 return 0;
27772 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27774 if (!reload_completed
27775 || is_nonpipeline_insn (insn)
27776 || is_microcoded_insn (insn))
27777 return 1;
27779 return 0;
27782 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27783 and return true. */
27785 static bool
27786 find_mem_ref (rtx pat, rtx *mem_ref)
27788 const char * fmt;
27789 int i, j;
27791 /* stack_tie does not produce any real memory traffic. */
27792 if (tie_operand (pat, VOIDmode))
27793 return false;
27795 if (GET_CODE (pat) == MEM)
27797 *mem_ref = pat;
27798 return true;
27801 /* Recursively process the pattern. */
27802 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27804 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27806 if (fmt[i] == 'e')
27808 if (find_mem_ref (XEXP (pat, i), mem_ref))
27809 return true;
27811 else if (fmt[i] == 'E')
27812 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27814 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27815 return true;
27819 return false;
27822 /* Determine if PAT is a PATTERN of a load insn. */
27824 static bool
27825 is_load_insn1 (rtx pat, rtx *load_mem)
27827 if (!pat || pat == NULL_RTX)
27828 return false;
27830 if (GET_CODE (pat) == SET)
27831 return find_mem_ref (SET_SRC (pat), load_mem);
27833 if (GET_CODE (pat) == PARALLEL)
27835 int i;
27837 for (i = 0; i < XVECLEN (pat, 0); i++)
27838 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27839 return true;
27842 return false;
27845 /* Determine if INSN loads from memory. */
27847 static bool
27848 is_load_insn (rtx insn, rtx *load_mem)
27850 if (!insn || !INSN_P (insn))
27851 return false;
27853 if (CALL_P (insn))
27854 return false;
27856 return is_load_insn1 (PATTERN (insn), load_mem);
27859 /* Determine if PAT is a PATTERN of a store insn. */
27861 static bool
27862 is_store_insn1 (rtx pat, rtx *str_mem)
27864 if (!pat || pat == NULL_RTX)
27865 return false;
27867 if (GET_CODE (pat) == SET)
27868 return find_mem_ref (SET_DEST (pat), str_mem);
27870 if (GET_CODE (pat) == PARALLEL)
27872 int i;
27874 for (i = 0; i < XVECLEN (pat, 0); i++)
27875 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27876 return true;
27879 return false;
27882 /* Determine if INSN stores to memory. */
27884 static bool
27885 is_store_insn (rtx insn, rtx *str_mem)
27887 if (!insn || !INSN_P (insn))
27888 return false;
27890 return is_store_insn1 (PATTERN (insn), str_mem);
27893 /* Returns whether the dependence between INSN and NEXT is considered
27894 costly by the given target. */
27896 static bool
27897 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27899 rtx insn;
27900 rtx next;
27901 rtx load_mem, str_mem;
27903 /* If the flag is not enabled - no dependence is considered costly;
27904 allow all dependent insns in the same group.
27905 This is the most aggressive option. */
27906 if (rs6000_sched_costly_dep == no_dep_costly)
27907 return false;
27909 /* If the flag is set to 1 - a dependence is always considered costly;
27910 do not allow dependent instructions in the same group.
27911 This is the most conservative option. */
27912 if (rs6000_sched_costly_dep == all_deps_costly)
27913 return true;
27915 insn = DEP_PRO (dep);
27916 next = DEP_CON (dep);
27918 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27919 && is_load_insn (next, &load_mem)
27920 && is_store_insn (insn, &str_mem))
27921 /* Prevent load after store in the same group. */
27922 return true;
27924 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27925 && is_load_insn (next, &load_mem)
27926 && is_store_insn (insn, &str_mem)
27927 && DEP_TYPE (dep) == REG_DEP_TRUE
27928 && mem_locations_overlap(str_mem, load_mem))
27929 /* Prevent load after store in the same group if it is a true
27930 dependence. */
27931 return true;
27933 /* The flag is set to X; dependences with latency >= X are considered costly,
27934 and will not be scheduled in the same group. */
27935 if (rs6000_sched_costly_dep <= max_dep_latency
27936 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27937 return true;
27939 return false;
27942 /* Return the next insn after INSN that is found before TAIL is reached,
27943 skipping any "non-active" insns - insns that will not actually occupy
27944 an issue slot. Return NULL_RTX if such an insn is not found. */
27946 static rtx_insn *
27947 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27949 if (insn == NULL_RTX || insn == tail)
27950 return NULL;
27952 while (1)
27954 insn = NEXT_INSN (insn);
27955 if (insn == NULL_RTX || insn == tail)
27956 return NULL;
27958 if (CALL_P (insn)
27959 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27960 || (NONJUMP_INSN_P (insn)
27961 && GET_CODE (PATTERN (insn)) != USE
27962 && GET_CODE (PATTERN (insn)) != CLOBBER
27963 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27964 break;
27966 return insn;
27969 /* We are about to begin issuing insns for this clock cycle. */
27971 static int
27972 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27973 rtx_insn **ready ATTRIBUTE_UNUSED,
27974 int *pn_ready ATTRIBUTE_UNUSED,
27975 int clock_var ATTRIBUTE_UNUSED)
27977 int n_ready = *pn_ready;
27979 if (sched_verbose)
27980 fprintf (dump, "// rs6000_sched_reorder :\n");
27982 /* Reorder the ready list, if the second to last ready insn
27983 is a nonepipeline insn. */
27984 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27986 if (is_nonpipeline_insn (ready[n_ready - 1])
27987 && (recog_memoized (ready[n_ready - 2]) > 0))
27988 /* Simply swap first two insns. */
27989 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
27992 if (rs6000_cpu == PROCESSOR_POWER6)
27993 load_store_pendulum = 0;
27995 return rs6000_issue_rate ();
27998 /* Like rs6000_sched_reorder, but called after issuing each insn. */
28000 static int
28001 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
28002 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
28004 if (sched_verbose)
28005 fprintf (dump, "// rs6000_sched_reorder2 :\n");
28007 /* For Power6, we need to handle some special cases to try and keep the
28008 store queue from overflowing and triggering expensive flushes.
28010 This code monitors how load and store instructions are being issued
28011 and skews the ready list one way or the other to increase the likelihood
28012 that a desired instruction is issued at the proper time.
28014 A couple of things are done. First, we maintain a "load_store_pendulum"
28015 to track the current state of load/store issue.
28017 - If the pendulum is at zero, then no loads or stores have been
28018 issued in the current cycle so we do nothing.
28020 - If the pendulum is 1, then a single load has been issued in this
28021 cycle and we attempt to locate another load in the ready list to
28022 issue with it.
28024 - If the pendulum is -2, then two stores have already been
28025 issued in this cycle, so we increase the priority of the first load
28026 in the ready list to increase it's likelihood of being chosen first
28027 in the next cycle.
28029 - If the pendulum is -1, then a single store has been issued in this
28030 cycle and we attempt to locate another store in the ready list to
28031 issue with it, preferring a store to an adjacent memory location to
28032 facilitate store pairing in the store queue.
28034 - If the pendulum is 2, then two loads have already been
28035 issued in this cycle, so we increase the priority of the first store
28036 in the ready list to increase it's likelihood of being chosen first
28037 in the next cycle.
28039 - If the pendulum < -2 or > 2, then do nothing.
28041 Note: This code covers the most common scenarios. There exist non
28042 load/store instructions which make use of the LSU and which
28043 would need to be accounted for to strictly model the behavior
28044 of the machine. Those instructions are currently unaccounted
28045 for to help minimize compile time overhead of this code.
28047 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
28049 int pos;
28050 int i;
28051 rtx_insn *tmp;
28052 rtx load_mem, str_mem;
28054 if (is_store_insn (last_scheduled_insn, &str_mem))
28055 /* Issuing a store, swing the load_store_pendulum to the left */
28056 load_store_pendulum--;
28057 else if (is_load_insn (last_scheduled_insn, &load_mem))
28058 /* Issuing a load, swing the load_store_pendulum to the right */
28059 load_store_pendulum++;
28060 else
28061 return cached_can_issue_more;
28063 /* If the pendulum is balanced, or there is only one instruction on
28064 the ready list, then all is well, so return. */
28065 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
28066 return cached_can_issue_more;
28068 if (load_store_pendulum == 1)
28070 /* A load has been issued in this cycle. Scan the ready list
28071 for another load to issue with it */
28072 pos = *pn_ready-1;
28074 while (pos >= 0)
28076 if (is_load_insn (ready[pos], &load_mem))
28078 /* Found a load. Move it to the head of the ready list,
28079 and adjust it's priority so that it is more likely to
28080 stay there */
28081 tmp = ready[pos];
28082 for (i=pos; i<*pn_ready-1; i++)
28083 ready[i] = ready[i + 1];
28084 ready[*pn_ready-1] = tmp;
28086 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
28087 INSN_PRIORITY (tmp)++;
28088 break;
28090 pos--;
28093 else if (load_store_pendulum == -2)
28095 /* Two stores have been issued in this cycle. Increase the
28096 priority of the first load in the ready list to favor it for
28097 issuing in the next cycle. */
28098 pos = *pn_ready-1;
28100 while (pos >= 0)
28102 if (is_load_insn (ready[pos], &load_mem)
28103 && !sel_sched_p ()
28104 && INSN_PRIORITY_KNOWN (ready[pos]))
28106 INSN_PRIORITY (ready[pos])++;
28108 /* Adjust the pendulum to account for the fact that a load
28109 was found and increased in priority. This is to prevent
28110 increasing the priority of multiple loads */
28111 load_store_pendulum--;
28113 break;
28115 pos--;
28118 else if (load_store_pendulum == -1)
28120 /* A store has been issued in this cycle. Scan the ready list for
28121 another store to issue with it, preferring a store to an adjacent
28122 memory location */
28123 int first_store_pos = -1;
28125 pos = *pn_ready-1;
28127 while (pos >= 0)
28129 if (is_store_insn (ready[pos], &str_mem))
28131 rtx str_mem2;
28132 /* Maintain the index of the first store found on the
28133 list */
28134 if (first_store_pos == -1)
28135 first_store_pos = pos;
28137 if (is_store_insn (last_scheduled_insn, &str_mem2)
28138 && adjacent_mem_locations (str_mem, str_mem2))
28140 /* Found an adjacent store. Move it to the head of the
28141 ready list, and adjust it's priority so that it is
28142 more likely to stay there */
28143 tmp = ready[pos];
28144 for (i=pos; i<*pn_ready-1; i++)
28145 ready[i] = ready[i + 1];
28146 ready[*pn_ready-1] = tmp;
28148 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
28149 INSN_PRIORITY (tmp)++;
28151 first_store_pos = -1;
28153 break;
28156 pos--;
28159 if (first_store_pos >= 0)
28161 /* An adjacent store wasn't found, but a non-adjacent store was,
28162 so move the non-adjacent store to the front of the ready
28163 list, and adjust its priority so that it is more likely to
28164 stay there. */
28165 tmp = ready[first_store_pos];
28166 for (i=first_store_pos; i<*pn_ready-1; i++)
28167 ready[i] = ready[i + 1];
28168 ready[*pn_ready-1] = tmp;
28169 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
28170 INSN_PRIORITY (tmp)++;
28173 else if (load_store_pendulum == 2)
28175 /* Two loads have been issued in this cycle. Increase the priority
28176 of the first store in the ready list to favor it for issuing in
28177 the next cycle. */
28178 pos = *pn_ready-1;
28180 while (pos >= 0)
28182 if (is_store_insn (ready[pos], &str_mem)
28183 && !sel_sched_p ()
28184 && INSN_PRIORITY_KNOWN (ready[pos]))
28186 INSN_PRIORITY (ready[pos])++;
28188 /* Adjust the pendulum to account for the fact that a store
28189 was found and increased in priority. This is to prevent
28190 increasing the priority of multiple stores */
28191 load_store_pendulum++;
28193 break;
28195 pos--;
28200 return cached_can_issue_more;
28203 /* Return whether the presence of INSN causes a dispatch group termination
28204 of group WHICH_GROUP.
28206 If WHICH_GROUP == current_group, this function will return true if INSN
28207 causes the termination of the current group (i.e, the dispatch group to
28208 which INSN belongs). This means that INSN will be the last insn in the
28209 group it belongs to.
28211 If WHICH_GROUP == previous_group, this function will return true if INSN
28212 causes the termination of the previous group (i.e, the dispatch group that
28213 precedes the group to which INSN belongs). This means that INSN will be
28214 the first insn in the group it belongs to). */
28216 static bool
28217 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
28219 bool first, last;
28221 if (! insn)
28222 return false;
28224 first = insn_must_be_first_in_group (insn);
28225 last = insn_must_be_last_in_group (insn);
28227 if (first && last)
28228 return true;
28230 if (which_group == current_group)
28231 return last;
28232 else if (which_group == previous_group)
28233 return first;
28235 return false;
28239 static bool
28240 insn_must_be_first_in_group (rtx_insn *insn)
28242 enum attr_type type;
28244 if (!insn
28245 || NOTE_P (insn)
28246 || DEBUG_INSN_P (insn)
28247 || GET_CODE (PATTERN (insn)) == USE
28248 || GET_CODE (PATTERN (insn)) == CLOBBER)
28249 return false;
28251 switch (rs6000_cpu)
28253 case PROCESSOR_POWER5:
28254 if (is_cracked_insn (insn))
28255 return true;
28256 case PROCESSOR_POWER4:
28257 if (is_microcoded_insn (insn))
28258 return true;
28260 if (!rs6000_sched_groups)
28261 return false;
28263 type = get_attr_type (insn);
28265 switch (type)
28267 case TYPE_MFCR:
28268 case TYPE_MFCRF:
28269 case TYPE_MTCR:
28270 case TYPE_DELAYED_CR:
28271 case TYPE_CR_LOGICAL:
28272 case TYPE_MTJMPR:
28273 case TYPE_MFJMPR:
28274 case TYPE_DIV:
28275 case TYPE_LOAD_L:
28276 case TYPE_STORE_C:
28277 case TYPE_ISYNC:
28278 case TYPE_SYNC:
28279 return true;
28280 default:
28281 break;
28283 break;
28284 case PROCESSOR_POWER6:
28285 type = get_attr_type (insn);
28287 switch (type)
28289 case TYPE_EXTS:
28290 case TYPE_CNTLZ:
28291 case TYPE_TRAP:
28292 case TYPE_MUL:
28293 case TYPE_INSERT:
28294 case TYPE_FPCOMPARE:
28295 case TYPE_MFCR:
28296 case TYPE_MTCR:
28297 case TYPE_MFJMPR:
28298 case TYPE_MTJMPR:
28299 case TYPE_ISYNC:
28300 case TYPE_SYNC:
28301 case TYPE_LOAD_L:
28302 case TYPE_STORE_C:
28303 return true;
28304 case TYPE_SHIFT:
28305 if (get_attr_dot (insn) == DOT_NO
28306 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28307 return true;
28308 else
28309 break;
28310 case TYPE_DIV:
28311 if (get_attr_size (insn) == SIZE_32)
28312 return true;
28313 else
28314 break;
28315 case TYPE_LOAD:
28316 case TYPE_STORE:
28317 case TYPE_FPLOAD:
28318 case TYPE_FPSTORE:
28319 if (get_attr_update (insn) == UPDATE_YES)
28320 return true;
28321 else
28322 break;
28323 default:
28324 break;
28326 break;
28327 case PROCESSOR_POWER7:
28328 type = get_attr_type (insn);
28330 switch (type)
28332 case TYPE_CR_LOGICAL:
28333 case TYPE_MFCR:
28334 case TYPE_MFCRF:
28335 case TYPE_MTCR:
28336 case TYPE_DIV:
28337 case TYPE_ISYNC:
28338 case TYPE_LOAD_L:
28339 case TYPE_STORE_C:
28340 case TYPE_MFJMPR:
28341 case TYPE_MTJMPR:
28342 return true;
28343 case TYPE_MUL:
28344 case TYPE_SHIFT:
28345 case TYPE_EXTS:
28346 if (get_attr_dot (insn) == DOT_YES)
28347 return true;
28348 else
28349 break;
28350 case TYPE_LOAD:
28351 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28352 || get_attr_update (insn) == UPDATE_YES)
28353 return true;
28354 else
28355 break;
28356 case TYPE_STORE:
28357 case TYPE_FPLOAD:
28358 case TYPE_FPSTORE:
28359 if (get_attr_update (insn) == UPDATE_YES)
28360 return true;
28361 else
28362 break;
28363 default:
28364 break;
28366 break;
28367 case PROCESSOR_POWER8:
28368 type = get_attr_type (insn);
28370 switch (type)
28372 case TYPE_CR_LOGICAL:
28373 case TYPE_DELAYED_CR:
28374 case TYPE_MFCR:
28375 case TYPE_MFCRF:
28376 case TYPE_MTCR:
28377 case TYPE_SYNC:
28378 case TYPE_ISYNC:
28379 case TYPE_LOAD_L:
28380 case TYPE_STORE_C:
28381 case TYPE_VECSTORE:
28382 case TYPE_MFJMPR:
28383 case TYPE_MTJMPR:
28384 return true;
28385 case TYPE_SHIFT:
28386 case TYPE_EXTS:
28387 case TYPE_MUL:
28388 if (get_attr_dot (insn) == DOT_YES)
28389 return true;
28390 else
28391 break;
28392 case TYPE_LOAD:
28393 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28394 || get_attr_update (insn) == UPDATE_YES)
28395 return true;
28396 else
28397 break;
28398 case TYPE_STORE:
28399 if (get_attr_update (insn) == UPDATE_YES
28400 && get_attr_indexed (insn) == INDEXED_YES)
28401 return true;
28402 else
28403 break;
28404 default:
28405 break;
28407 break;
28408 default:
28409 break;
28412 return false;
28415 static bool
28416 insn_must_be_last_in_group (rtx_insn *insn)
28418 enum attr_type type;
28420 if (!insn
28421 || NOTE_P (insn)
28422 || DEBUG_INSN_P (insn)
28423 || GET_CODE (PATTERN (insn)) == USE
28424 || GET_CODE (PATTERN (insn)) == CLOBBER)
28425 return false;
28427 switch (rs6000_cpu) {
28428 case PROCESSOR_POWER4:
28429 case PROCESSOR_POWER5:
28430 if (is_microcoded_insn (insn))
28431 return true;
28433 if (is_branch_slot_insn (insn))
28434 return true;
28436 break;
28437 case PROCESSOR_POWER6:
28438 type = get_attr_type (insn);
28440 switch (type)
28442 case TYPE_EXTS:
28443 case TYPE_CNTLZ:
28444 case TYPE_TRAP:
28445 case TYPE_MUL:
28446 case TYPE_FPCOMPARE:
28447 case TYPE_MFCR:
28448 case TYPE_MTCR:
28449 case TYPE_MFJMPR:
28450 case TYPE_MTJMPR:
28451 case TYPE_ISYNC:
28452 case TYPE_SYNC:
28453 case TYPE_LOAD_L:
28454 case TYPE_STORE_C:
28455 return true;
28456 case TYPE_SHIFT:
28457 if (get_attr_dot (insn) == DOT_NO
28458 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28459 return true;
28460 else
28461 break;
28462 case TYPE_DIV:
28463 if (get_attr_size (insn) == SIZE_32)
28464 return true;
28465 else
28466 break;
28467 default:
28468 break;
28470 break;
28471 case PROCESSOR_POWER7:
28472 type = get_attr_type (insn);
28474 switch (type)
28476 case TYPE_ISYNC:
28477 case TYPE_SYNC:
28478 case TYPE_LOAD_L:
28479 case TYPE_STORE_C:
28480 return true;
28481 case TYPE_LOAD:
28482 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28483 && get_attr_update (insn) == UPDATE_YES)
28484 return true;
28485 else
28486 break;
28487 case TYPE_STORE:
28488 if (get_attr_update (insn) == UPDATE_YES
28489 && get_attr_indexed (insn) == INDEXED_YES)
28490 return true;
28491 else
28492 break;
28493 default:
28494 break;
28496 break;
28497 case PROCESSOR_POWER8:
28498 type = get_attr_type (insn);
28500 switch (type)
28502 case TYPE_MFCR:
28503 case TYPE_MTCR:
28504 case TYPE_ISYNC:
28505 case TYPE_SYNC:
28506 case TYPE_LOAD_L:
28507 case TYPE_STORE_C:
28508 return true;
28509 case TYPE_LOAD:
28510 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28511 && get_attr_update (insn) == UPDATE_YES)
28512 return true;
28513 else
28514 break;
28515 case TYPE_STORE:
28516 if (get_attr_update (insn) == UPDATE_YES
28517 && get_attr_indexed (insn) == INDEXED_YES)
28518 return true;
28519 else
28520 break;
28521 default:
28522 break;
28524 break;
28525 default:
28526 break;
28529 return false;
28532 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28533 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28535 static bool
28536 is_costly_group (rtx *group_insns, rtx next_insn)
28538 int i;
28539 int issue_rate = rs6000_issue_rate ();
28541 for (i = 0; i < issue_rate; i++)
28543 sd_iterator_def sd_it;
28544 dep_t dep;
28545 rtx insn = group_insns[i];
28547 if (!insn)
28548 continue;
28550 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28552 rtx next = DEP_CON (dep);
28554 if (next == next_insn
28555 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28556 return true;
28560 return false;
28563 /* Utility of the function redefine_groups.
28564 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28565 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28566 to keep it "far" (in a separate group) from GROUP_INSNS, following
28567 one of the following schemes, depending on the value of the flag
28568 -minsert_sched_nops = X:
28569 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28570 in order to force NEXT_INSN into a separate group.
28571 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28572 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28573 insertion (has a group just ended, how many vacant issue slots remain in the
28574 last group, and how many dispatch groups were encountered so far). */
28576 static int
28577 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28578 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28579 int *group_count)
28581 rtx nop;
28582 bool force;
28583 int issue_rate = rs6000_issue_rate ();
28584 bool end = *group_end;
28585 int i;
28587 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28588 return can_issue_more;
28590 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28591 return can_issue_more;
28593 force = is_costly_group (group_insns, next_insn);
28594 if (!force)
28595 return can_issue_more;
28597 if (sched_verbose > 6)
28598 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28599 *group_count ,can_issue_more);
28601 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28603 if (*group_end)
28604 can_issue_more = 0;
28606 /* Since only a branch can be issued in the last issue_slot, it is
28607 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28608 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28609 in this case the last nop will start a new group and the branch
28610 will be forced to the new group. */
28611 if (can_issue_more && !is_branch_slot_insn (next_insn))
28612 can_issue_more--;
28614 /* Do we have a special group ending nop? */
28615 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28616 || rs6000_cpu_attr == CPU_POWER8)
28618 nop = gen_group_ending_nop ();
28619 emit_insn_before (nop, next_insn);
28620 can_issue_more = 0;
28622 else
28623 while (can_issue_more > 0)
28625 nop = gen_nop ();
28626 emit_insn_before (nop, next_insn);
28627 can_issue_more--;
28630 *group_end = true;
28631 return 0;
28634 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28636 int n_nops = rs6000_sched_insert_nops;
28638 /* Nops can't be issued from the branch slot, so the effective
28639 issue_rate for nops is 'issue_rate - 1'. */
28640 if (can_issue_more == 0)
28641 can_issue_more = issue_rate;
28642 can_issue_more--;
28643 if (can_issue_more == 0)
28645 can_issue_more = issue_rate - 1;
28646 (*group_count)++;
28647 end = true;
28648 for (i = 0; i < issue_rate; i++)
28650 group_insns[i] = 0;
28654 while (n_nops > 0)
28656 nop = gen_nop ();
28657 emit_insn_before (nop, next_insn);
28658 if (can_issue_more == issue_rate - 1) /* new group begins */
28659 end = false;
28660 can_issue_more--;
28661 if (can_issue_more == 0)
28663 can_issue_more = issue_rate - 1;
28664 (*group_count)++;
28665 end = true;
28666 for (i = 0; i < issue_rate; i++)
28668 group_insns[i] = 0;
28671 n_nops--;
28674 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28675 can_issue_more++;
28677 /* Is next_insn going to start a new group? */
28678 *group_end
28679 = (end
28680 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28681 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28682 || (can_issue_more < issue_rate &&
28683 insn_terminates_group_p (next_insn, previous_group)));
28684 if (*group_end && end)
28685 (*group_count)--;
28687 if (sched_verbose > 6)
28688 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28689 *group_count, can_issue_more);
28690 return can_issue_more;
28693 return can_issue_more;
28696 /* This function tries to synch the dispatch groups that the compiler "sees"
28697 with the dispatch groups that the processor dispatcher is expected to
28698 form in practice. It tries to achieve this synchronization by forcing the
28699 estimated processor grouping on the compiler (as opposed to the function
28700 'pad_goups' which tries to force the scheduler's grouping on the processor).
28702 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28703 examines the (estimated) dispatch groups that will be formed by the processor
28704 dispatcher. It marks these group boundaries to reflect the estimated
28705 processor grouping, overriding the grouping that the scheduler had marked.
28706 Depending on the value of the flag '-minsert-sched-nops' this function can
28707 force certain insns into separate groups or force a certain distance between
28708 them by inserting nops, for example, if there exists a "costly dependence"
28709 between the insns.
28711 The function estimates the group boundaries that the processor will form as
28712 follows: It keeps track of how many vacant issue slots are available after
28713 each insn. A subsequent insn will start a new group if one of the following
28714 4 cases applies:
28715 - no more vacant issue slots remain in the current dispatch group.
28716 - only the last issue slot, which is the branch slot, is vacant, but the next
28717 insn is not a branch.
28718 - only the last 2 or less issue slots, including the branch slot, are vacant,
28719 which means that a cracked insn (which occupies two issue slots) can't be
28720 issued in this group.
28721 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28722 start a new group. */
28724 static int
28725 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28726 rtx_insn *tail)
28728 rtx_insn *insn, *next_insn;
28729 int issue_rate;
28730 int can_issue_more;
28731 int slot, i;
28732 bool group_end;
28733 int group_count = 0;
28734 rtx *group_insns;
28736 /* Initialize. */
28737 issue_rate = rs6000_issue_rate ();
28738 group_insns = XALLOCAVEC (rtx, issue_rate);
28739 for (i = 0; i < issue_rate; i++)
28741 group_insns[i] = 0;
28743 can_issue_more = issue_rate;
28744 slot = 0;
28745 insn = get_next_active_insn (prev_head_insn, tail);
28746 group_end = false;
28748 while (insn != NULL_RTX)
28750 slot = (issue_rate - can_issue_more);
28751 group_insns[slot] = insn;
28752 can_issue_more =
28753 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28754 if (insn_terminates_group_p (insn, current_group))
28755 can_issue_more = 0;
28757 next_insn = get_next_active_insn (insn, tail);
28758 if (next_insn == NULL_RTX)
28759 return group_count + 1;
28761 /* Is next_insn going to start a new group? */
28762 group_end
28763 = (can_issue_more == 0
28764 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28765 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28766 || (can_issue_more < issue_rate &&
28767 insn_terminates_group_p (next_insn, previous_group)));
28769 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28770 next_insn, &group_end, can_issue_more,
28771 &group_count);
28773 if (group_end)
28775 group_count++;
28776 can_issue_more = 0;
28777 for (i = 0; i < issue_rate; i++)
28779 group_insns[i] = 0;
28783 if (GET_MODE (next_insn) == TImode && can_issue_more)
28784 PUT_MODE (next_insn, VOIDmode);
28785 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28786 PUT_MODE (next_insn, TImode);
28788 insn = next_insn;
28789 if (can_issue_more == 0)
28790 can_issue_more = issue_rate;
28791 } /* while */
28793 return group_count;
28796 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28797 dispatch group boundaries that the scheduler had marked. Pad with nops
28798 any dispatch groups which have vacant issue slots, in order to force the
28799 scheduler's grouping on the processor dispatcher. The function
28800 returns the number of dispatch groups found. */
28802 static int
28803 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28804 rtx_insn *tail)
28806 rtx_insn *insn, *next_insn;
28807 rtx nop;
28808 int issue_rate;
28809 int can_issue_more;
28810 int group_end;
28811 int group_count = 0;
28813 /* Initialize issue_rate. */
28814 issue_rate = rs6000_issue_rate ();
28815 can_issue_more = issue_rate;
28817 insn = get_next_active_insn (prev_head_insn, tail);
28818 next_insn = get_next_active_insn (insn, tail);
28820 while (insn != NULL_RTX)
28822 can_issue_more =
28823 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28825 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28827 if (next_insn == NULL_RTX)
28828 break;
28830 if (group_end)
28832 /* If the scheduler had marked group termination at this location
28833 (between insn and next_insn), and neither insn nor next_insn will
28834 force group termination, pad the group with nops to force group
28835 termination. */
28836 if (can_issue_more
28837 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28838 && !insn_terminates_group_p (insn, current_group)
28839 && !insn_terminates_group_p (next_insn, previous_group))
28841 if (!is_branch_slot_insn (next_insn))
28842 can_issue_more--;
28844 while (can_issue_more)
28846 nop = gen_nop ();
28847 emit_insn_before (nop, next_insn);
28848 can_issue_more--;
28852 can_issue_more = issue_rate;
28853 group_count++;
28856 insn = next_insn;
28857 next_insn = get_next_active_insn (insn, tail);
28860 return group_count;
28863 /* We're beginning a new block. Initialize data structures as necessary. */
28865 static void
28866 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28867 int sched_verbose ATTRIBUTE_UNUSED,
28868 int max_ready ATTRIBUTE_UNUSED)
28870 last_scheduled_insn = NULL_RTX;
28871 load_store_pendulum = 0;
28874 /* The following function is called at the end of scheduling BB.
28875 After reload, it inserts nops at insn group bundling. */
28877 static void
28878 rs6000_sched_finish (FILE *dump, int sched_verbose)
28880 int n_groups;
28882 if (sched_verbose)
28883 fprintf (dump, "=== Finishing schedule.\n");
28885 if (reload_completed && rs6000_sched_groups)
28887 /* Do not run sched_finish hook when selective scheduling enabled. */
28888 if (sel_sched_p ())
28889 return;
28891 if (rs6000_sched_insert_nops == sched_finish_none)
28892 return;
28894 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28895 n_groups = pad_groups (dump, sched_verbose,
28896 current_sched_info->prev_head,
28897 current_sched_info->next_tail);
28898 else
28899 n_groups = redefine_groups (dump, sched_verbose,
28900 current_sched_info->prev_head,
28901 current_sched_info->next_tail);
28903 if (sched_verbose >= 6)
28905 fprintf (dump, "ngroups = %d\n", n_groups);
28906 print_rtl (dump, current_sched_info->prev_head);
28907 fprintf (dump, "Done finish_sched\n");
28912 struct _rs6000_sched_context
28914 short cached_can_issue_more;
28915 rtx last_scheduled_insn;
28916 int load_store_pendulum;
28919 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28920 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28922 /* Allocate store for new scheduling context. */
28923 static void *
28924 rs6000_alloc_sched_context (void)
28926 return xmalloc (sizeof (rs6000_sched_context_def));
28929 /* If CLEAN_P is true then initializes _SC with clean data,
28930 and from the global context otherwise. */
28931 static void
28932 rs6000_init_sched_context (void *_sc, bool clean_p)
28934 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28936 if (clean_p)
28938 sc->cached_can_issue_more = 0;
28939 sc->last_scheduled_insn = NULL_RTX;
28940 sc->load_store_pendulum = 0;
28942 else
28944 sc->cached_can_issue_more = cached_can_issue_more;
28945 sc->last_scheduled_insn = last_scheduled_insn;
28946 sc->load_store_pendulum = load_store_pendulum;
28950 /* Sets the global scheduling context to the one pointed to by _SC. */
28951 static void
28952 rs6000_set_sched_context (void *_sc)
28954 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28956 gcc_assert (sc != NULL);
28958 cached_can_issue_more = sc->cached_can_issue_more;
28959 last_scheduled_insn = sc->last_scheduled_insn;
28960 load_store_pendulum = sc->load_store_pendulum;
28963 /* Free _SC. */
28964 static void
28965 rs6000_free_sched_context (void *_sc)
28967 gcc_assert (_sc != NULL);
28969 free (_sc);
28973 /* Length in units of the trampoline for entering a nested function. */
28976 rs6000_trampoline_size (void)
28978 int ret = 0;
28980 switch (DEFAULT_ABI)
28982 default:
28983 gcc_unreachable ();
28985 case ABI_AIX:
28986 ret = (TARGET_32BIT) ? 12 : 24;
28987 break;
28989 case ABI_ELFv2:
28990 gcc_assert (!TARGET_32BIT);
28991 ret = 32;
28992 break;
28994 case ABI_DARWIN:
28995 case ABI_V4:
28996 ret = (TARGET_32BIT) ? 40 : 48;
28997 break;
29000 return ret;
29003 /* Emit RTL insns to initialize the variable parts of a trampoline.
29004 FNADDR is an RTX for the address of the function's pure code.
29005 CXT is an RTX for the static chain value for the function. */
29007 static void
29008 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
29010 int regsize = (TARGET_32BIT) ? 4 : 8;
29011 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
29012 rtx ctx_reg = force_reg (Pmode, cxt);
29013 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
29015 switch (DEFAULT_ABI)
29017 default:
29018 gcc_unreachable ();
29020 /* Under AIX, just build the 3 word function descriptor */
29021 case ABI_AIX:
29023 rtx fnmem, fn_reg, toc_reg;
29025 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
29026 error ("You cannot take the address of a nested function if you use "
29027 "the -mno-pointers-to-nested-functions option.");
29029 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
29030 fn_reg = gen_reg_rtx (Pmode);
29031 toc_reg = gen_reg_rtx (Pmode);
29033 /* Macro to shorten the code expansions below. */
29034 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
29036 m_tramp = replace_equiv_address (m_tramp, addr);
29038 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
29039 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
29040 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
29041 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
29042 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
29044 # undef MEM_PLUS
29046 break;
29048 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
29049 case ABI_ELFv2:
29050 case ABI_DARWIN:
29051 case ABI_V4:
29052 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
29053 LCT_NORMAL, VOIDmode, 4,
29054 addr, Pmode,
29055 GEN_INT (rs6000_trampoline_size ()), SImode,
29056 fnaddr, Pmode,
29057 ctx_reg, Pmode);
29058 break;
29063 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
29064 identifier as an argument, so the front end shouldn't look it up. */
29066 static bool
29067 rs6000_attribute_takes_identifier_p (const_tree attr_id)
29069 return is_attribute_p ("altivec", attr_id);
29072 /* Handle the "altivec" attribute. The attribute may have
29073 arguments as follows:
29075 __attribute__((altivec(vector__)))
29076 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
29077 __attribute__((altivec(bool__))) (always followed by 'unsigned')
29079 and may appear more than once (e.g., 'vector bool char') in a
29080 given declaration. */
29082 static tree
29083 rs6000_handle_altivec_attribute (tree *node,
29084 tree name ATTRIBUTE_UNUSED,
29085 tree args,
29086 int flags ATTRIBUTE_UNUSED,
29087 bool *no_add_attrs)
29089 tree type = *node, result = NULL_TREE;
29090 machine_mode mode;
29091 int unsigned_p;
29092 char altivec_type
29093 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
29094 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
29095 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
29096 : '?');
29098 while (POINTER_TYPE_P (type)
29099 || TREE_CODE (type) == FUNCTION_TYPE
29100 || TREE_CODE (type) == METHOD_TYPE
29101 || TREE_CODE (type) == ARRAY_TYPE)
29102 type = TREE_TYPE (type);
29104 mode = TYPE_MODE (type);
29106 /* Check for invalid AltiVec type qualifiers. */
29107 if (type == long_double_type_node)
29108 error ("use of %<long double%> in AltiVec types is invalid");
29109 else if (type == boolean_type_node)
29110 error ("use of boolean types in AltiVec types is invalid");
29111 else if (TREE_CODE (type) == COMPLEX_TYPE)
29112 error ("use of %<complex%> in AltiVec types is invalid");
29113 else if (DECIMAL_FLOAT_MODE_P (mode))
29114 error ("use of decimal floating point types in AltiVec types is invalid");
29115 else if (!TARGET_VSX)
29117 if (type == long_unsigned_type_node || type == long_integer_type_node)
29119 if (TARGET_64BIT)
29120 error ("use of %<long%> in AltiVec types is invalid for "
29121 "64-bit code without -mvsx");
29122 else if (rs6000_warn_altivec_long)
29123 warning (0, "use of %<long%> in AltiVec types is deprecated; "
29124 "use %<int%>");
29126 else if (type == long_long_unsigned_type_node
29127 || type == long_long_integer_type_node)
29128 error ("use of %<long long%> in AltiVec types is invalid without "
29129 "-mvsx");
29130 else if (type == double_type_node)
29131 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
29134 switch (altivec_type)
29136 case 'v':
29137 unsigned_p = TYPE_UNSIGNED (type);
29138 switch (mode)
29140 case TImode:
29141 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
29142 break;
29143 case DImode:
29144 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
29145 break;
29146 case SImode:
29147 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
29148 break;
29149 case HImode:
29150 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
29151 break;
29152 case QImode:
29153 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
29154 break;
29155 case SFmode: result = V4SF_type_node; break;
29156 case DFmode: result = V2DF_type_node; break;
29157 /* If the user says 'vector int bool', we may be handed the 'bool'
29158 attribute _before_ the 'vector' attribute, and so select the
29159 proper type in the 'b' case below. */
29160 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
29161 case V2DImode: case V2DFmode:
29162 result = type;
29163 default: break;
29165 break;
29166 case 'b':
29167 switch (mode)
29169 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
29170 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
29171 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
29172 case QImode: case V16QImode: result = bool_V16QI_type_node;
29173 default: break;
29175 break;
29176 case 'p':
29177 switch (mode)
29179 case V8HImode: result = pixel_V8HI_type_node;
29180 default: break;
29182 default: break;
29185 /* Propagate qualifiers attached to the element type
29186 onto the vector type. */
29187 if (result && result != type && TYPE_QUALS (type))
29188 result = build_qualified_type (result, TYPE_QUALS (type));
29190 *no_add_attrs = true; /* No need to hang on to the attribute. */
29192 if (result)
29193 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
29195 return NULL_TREE;
29198 /* AltiVec defines four built-in scalar types that serve as vector
29199 elements; we must teach the compiler how to mangle them. */
29201 static const char *
29202 rs6000_mangle_type (const_tree type)
29204 type = TYPE_MAIN_VARIANT (type);
29206 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29207 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29208 return NULL;
29210 if (type == bool_char_type_node) return "U6__boolc";
29211 if (type == bool_short_type_node) return "U6__bools";
29212 if (type == pixel_type_node) return "u7__pixel";
29213 if (type == bool_int_type_node) return "U6__booli";
29214 if (type == bool_long_type_node) return "U6__booll";
29216 /* Mangle IBM extended float long double as `g' (__float128) on
29217 powerpc*-linux where long-double-64 previously was the default. */
29218 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
29219 && TARGET_ELF
29220 && TARGET_LONG_DOUBLE_128
29221 && !TARGET_IEEEQUAD)
29222 return "g";
29224 /* For all other types, use normal C++ mangling. */
29225 return NULL;
29228 /* Handle a "longcall" or "shortcall" attribute; arguments as in
29229 struct attribute_spec.handler. */
29231 static tree
29232 rs6000_handle_longcall_attribute (tree *node, tree name,
29233 tree args ATTRIBUTE_UNUSED,
29234 int flags ATTRIBUTE_UNUSED,
29235 bool *no_add_attrs)
29237 if (TREE_CODE (*node) != FUNCTION_TYPE
29238 && TREE_CODE (*node) != FIELD_DECL
29239 && TREE_CODE (*node) != TYPE_DECL)
29241 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29242 name);
29243 *no_add_attrs = true;
29246 return NULL_TREE;
29249 /* Set longcall attributes on all functions declared when
29250 rs6000_default_long_calls is true. */
29251 static void
29252 rs6000_set_default_type_attributes (tree type)
29254 if (rs6000_default_long_calls
29255 && (TREE_CODE (type) == FUNCTION_TYPE
29256 || TREE_CODE (type) == METHOD_TYPE))
29257 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
29258 NULL_TREE,
29259 TYPE_ATTRIBUTES (type));
29261 #if TARGET_MACHO
29262 darwin_set_default_type_attributes (type);
29263 #endif
29266 /* Return a reference suitable for calling a function with the
29267 longcall attribute. */
29270 rs6000_longcall_ref (rtx call_ref)
29272 const char *call_name;
29273 tree node;
29275 if (GET_CODE (call_ref) != SYMBOL_REF)
29276 return call_ref;
29278 /* System V adds '.' to the internal name, so skip them. */
29279 call_name = XSTR (call_ref, 0);
29280 if (*call_name == '.')
29282 while (*call_name == '.')
29283 call_name++;
29285 node = get_identifier (call_name);
29286 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
29289 return force_reg (Pmode, call_ref);
29292 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
29293 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
29294 #endif
29296 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29297 struct attribute_spec.handler. */
29298 static tree
29299 rs6000_handle_struct_attribute (tree *node, tree name,
29300 tree args ATTRIBUTE_UNUSED,
29301 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29303 tree *type = NULL;
29304 if (DECL_P (*node))
29306 if (TREE_CODE (*node) == TYPE_DECL)
29307 type = &TREE_TYPE (*node);
29309 else
29310 type = node;
29312 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29313 || TREE_CODE (*type) == UNION_TYPE)))
29315 warning (OPT_Wattributes, "%qE attribute ignored", name);
29316 *no_add_attrs = true;
29319 else if ((is_attribute_p ("ms_struct", name)
29320 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29321 || ((is_attribute_p ("gcc_struct", name)
29322 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29324 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29325 name);
29326 *no_add_attrs = true;
29329 return NULL_TREE;
29332 static bool
29333 rs6000_ms_bitfield_layout_p (const_tree record_type)
29335 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
29336 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29337 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
29340 #ifdef USING_ELFOS_H
29342 /* A get_unnamed_section callback, used for switching to toc_section. */
29344 static void
29345 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29347 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29348 && TARGET_MINIMAL_TOC
29349 && !TARGET_RELOCATABLE)
29351 if (!toc_initialized)
29353 toc_initialized = 1;
29354 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29355 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
29356 fprintf (asm_out_file, "\t.tc ");
29357 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
29358 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29359 fprintf (asm_out_file, "\n");
29361 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29362 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29363 fprintf (asm_out_file, " = .+32768\n");
29365 else
29366 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29368 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29369 && !TARGET_RELOCATABLE)
29370 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29371 else
29373 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29374 if (!toc_initialized)
29376 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29377 fprintf (asm_out_file, " = .+32768\n");
29378 toc_initialized = 1;
29383 /* Implement TARGET_ASM_INIT_SECTIONS. */
29385 static void
29386 rs6000_elf_asm_init_sections (void)
29388 toc_section
29389 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
29391 sdata2_section
29392 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
29393 SDATA2_SECTION_ASM_OP);
29396 /* Implement TARGET_SELECT_RTX_SECTION. */
29398 static section *
29399 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
29400 unsigned HOST_WIDE_INT align)
29402 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29403 return toc_section;
29404 else
29405 return default_elf_select_rtx_section (mode, x, align);
29408 /* For a SYMBOL_REF, set generic flags and then perform some
29409 target-specific processing.
29411 When the AIX ABI is requested on a non-AIX system, replace the
29412 function name with the real name (with a leading .) rather than the
29413 function descriptor name. This saves a lot of overriding code to
29414 read the prefixes. */
29416 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
29417 static void
29418 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
29420 default_encode_section_info (decl, rtl, first);
29422 if (first
29423 && TREE_CODE (decl) == FUNCTION_DECL
29424 && !TARGET_AIX
29425 && DEFAULT_ABI == ABI_AIX)
29427 rtx sym_ref = XEXP (rtl, 0);
29428 size_t len = strlen (XSTR (sym_ref, 0));
29429 char *str = XALLOCAVEC (char, len + 2);
29430 str[0] = '.';
29431 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
29432 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
29436 static inline bool
29437 compare_section_name (const char *section, const char *templ)
29439 int len;
29441 len = strlen (templ);
29442 return (strncmp (section, templ, len) == 0
29443 && (section[len] == 0 || section[len] == '.'));
29446 bool
29447 rs6000_elf_in_small_data_p (const_tree decl)
29449 if (rs6000_sdata == SDATA_NONE)
29450 return false;
29452 /* We want to merge strings, so we never consider them small data. */
29453 if (TREE_CODE (decl) == STRING_CST)
29454 return false;
29456 /* Functions are never in the small data area. */
29457 if (TREE_CODE (decl) == FUNCTION_DECL)
29458 return false;
29460 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
29462 const char *section = DECL_SECTION_NAME (decl);
29463 if (compare_section_name (section, ".sdata")
29464 || compare_section_name (section, ".sdata2")
29465 || compare_section_name (section, ".gnu.linkonce.s")
29466 || compare_section_name (section, ".sbss")
29467 || compare_section_name (section, ".sbss2")
29468 || compare_section_name (section, ".gnu.linkonce.sb")
29469 || strcmp (section, ".PPC.EMB.sdata0") == 0
29470 || strcmp (section, ".PPC.EMB.sbss0") == 0)
29471 return true;
29473 else
29475 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
29477 if (size > 0
29478 && size <= g_switch_value
29479 /* If it's not public, and we're not going to reference it there,
29480 there's no need to put it in the small data section. */
29481 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
29482 return true;
29485 return false;
29488 #endif /* USING_ELFOS_H */
29490 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29492 static bool
29493 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29495 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29498 /* Do not place thread-local symbols refs in the object blocks. */
29500 static bool
29501 rs6000_use_blocks_for_decl_p (const_tree decl)
29503 return !DECL_THREAD_LOCAL_P (decl);
29506 /* Return a REG that occurs in ADDR with coefficient 1.
29507 ADDR can be effectively incremented by incrementing REG.
29509 r0 is special and we must not select it as an address
29510 register by this routine since our caller will try to
29511 increment the returned register via an "la" instruction. */
29514 find_addr_reg (rtx addr)
29516 while (GET_CODE (addr) == PLUS)
29518 if (GET_CODE (XEXP (addr, 0)) == REG
29519 && REGNO (XEXP (addr, 0)) != 0)
29520 addr = XEXP (addr, 0);
29521 else if (GET_CODE (XEXP (addr, 1)) == REG
29522 && REGNO (XEXP (addr, 1)) != 0)
29523 addr = XEXP (addr, 1);
29524 else if (CONSTANT_P (XEXP (addr, 0)))
29525 addr = XEXP (addr, 1);
29526 else if (CONSTANT_P (XEXP (addr, 1)))
29527 addr = XEXP (addr, 0);
29528 else
29529 gcc_unreachable ();
29531 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29532 return addr;
29535 void
29536 rs6000_fatal_bad_address (rtx op)
29538 fatal_insn ("bad address", op);
29541 #if TARGET_MACHO
29543 typedef struct branch_island_d {
29544 tree function_name;
29545 tree label_name;
29546 int line_number;
29547 } branch_island;
29550 static vec<branch_island, va_gc> *branch_islands;
29552 /* Remember to generate a branch island for far calls to the given
29553 function. */
29555 static void
29556 add_compiler_branch_island (tree label_name, tree function_name,
29557 int line_number)
29559 branch_island bi = {function_name, label_name, line_number};
29560 vec_safe_push (branch_islands, bi);
29563 /* Generate far-jump branch islands for everything recorded in
29564 branch_islands. Invoked immediately after the last instruction of
29565 the epilogue has been emitted; the branch islands must be appended
29566 to, and contiguous with, the function body. Mach-O stubs are
29567 generated in machopic_output_stub(). */
29569 static void
29570 macho_branch_islands (void)
29572 char tmp_buf[512];
29574 while (!vec_safe_is_empty (branch_islands))
29576 branch_island *bi = &branch_islands->last ();
29577 const char *label = IDENTIFIER_POINTER (bi->label_name);
29578 const char *name = IDENTIFIER_POINTER (bi->function_name);
29579 char name_buf[512];
29580 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29581 if (name[0] == '*' || name[0] == '&')
29582 strcpy (name_buf, name+1);
29583 else
29585 name_buf[0] = '_';
29586 strcpy (name_buf+1, name);
29588 strcpy (tmp_buf, "\n");
29589 strcat (tmp_buf, label);
29590 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29591 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29592 dbxout_stabd (N_SLINE, bi->line_number);
29593 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29594 if (flag_pic)
29596 if (TARGET_LINK_STACK)
29598 char name[32];
29599 get_ppc476_thunk_name (name);
29600 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29601 strcat (tmp_buf, name);
29602 strcat (tmp_buf, "\n");
29603 strcat (tmp_buf, label);
29604 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29606 else
29608 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29609 strcat (tmp_buf, label);
29610 strcat (tmp_buf, "_pic\n");
29611 strcat (tmp_buf, label);
29612 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29615 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29616 strcat (tmp_buf, name_buf);
29617 strcat (tmp_buf, " - ");
29618 strcat (tmp_buf, label);
29619 strcat (tmp_buf, "_pic)\n");
29621 strcat (tmp_buf, "\tmtlr r0\n");
29623 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29624 strcat (tmp_buf, name_buf);
29625 strcat (tmp_buf, " - ");
29626 strcat (tmp_buf, label);
29627 strcat (tmp_buf, "_pic)\n");
29629 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29631 else
29633 strcat (tmp_buf, ":\nlis r12,hi16(");
29634 strcat (tmp_buf, name_buf);
29635 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29636 strcat (tmp_buf, name_buf);
29637 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29639 output_asm_insn (tmp_buf, 0);
29640 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29641 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29642 dbxout_stabd (N_SLINE, bi->line_number);
29643 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29644 branch_islands->pop ();
29648 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29649 already there or not. */
29651 static int
29652 no_previous_def (tree function_name)
29654 branch_island *bi;
29655 unsigned ix;
29657 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29658 if (function_name == bi->function_name)
29659 return 0;
29660 return 1;
29663 /* GET_PREV_LABEL gets the label name from the previous definition of
29664 the function. */
29666 static tree
29667 get_prev_label (tree function_name)
29669 branch_island *bi;
29670 unsigned ix;
29672 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29673 if (function_name == bi->function_name)
29674 return bi->label_name;
29675 return NULL_TREE;
29678 /* INSN is either a function call or a millicode call. It may have an
29679 unconditional jump in its delay slot.
29681 CALL_DEST is the routine we are calling. */
29683 char *
29684 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29685 int cookie_operand_number)
29687 static char buf[256];
29688 if (darwin_emit_branch_islands
29689 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29690 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29692 tree labelname;
29693 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29695 if (no_previous_def (funname))
29697 rtx label_rtx = gen_label_rtx ();
29698 char *label_buf, temp_buf[256];
29699 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29700 CODE_LABEL_NUMBER (label_rtx));
29701 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29702 labelname = get_identifier (label_buf);
29703 add_compiler_branch_island (labelname, funname, insn_line (insn));
29705 else
29706 labelname = get_prev_label (funname);
29708 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29709 instruction will reach 'foo', otherwise link as 'bl L42'".
29710 "L42" should be a 'branch island', that will do a far jump to
29711 'foo'. Branch islands are generated in
29712 macho_branch_islands(). */
29713 sprintf (buf, "jbsr %%z%d,%.246s",
29714 dest_operand_number, IDENTIFIER_POINTER (labelname));
29716 else
29717 sprintf (buf, "bl %%z%d", dest_operand_number);
29718 return buf;
29721 /* Generate PIC and indirect symbol stubs. */
29723 void
29724 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29726 unsigned int length;
29727 char *symbol_name, *lazy_ptr_name;
29728 char *local_label_0;
29729 static int label = 0;
29731 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29732 symb = (*targetm.strip_name_encoding) (symb);
29735 length = strlen (symb);
29736 symbol_name = XALLOCAVEC (char, length + 32);
29737 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29739 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29740 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29742 if (flag_pic == 2)
29743 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29744 else
29745 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29747 if (flag_pic == 2)
29749 fprintf (file, "\t.align 5\n");
29751 fprintf (file, "%s:\n", stub);
29752 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29754 label++;
29755 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29756 sprintf (local_label_0, "\"L%011d$spb\"", label);
29758 fprintf (file, "\tmflr r0\n");
29759 if (TARGET_LINK_STACK)
29761 char name[32];
29762 get_ppc476_thunk_name (name);
29763 fprintf (file, "\tbl %s\n", name);
29764 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29766 else
29768 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29769 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29771 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29772 lazy_ptr_name, local_label_0);
29773 fprintf (file, "\tmtlr r0\n");
29774 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29775 (TARGET_64BIT ? "ldu" : "lwzu"),
29776 lazy_ptr_name, local_label_0);
29777 fprintf (file, "\tmtctr r12\n");
29778 fprintf (file, "\tbctr\n");
29780 else
29782 fprintf (file, "\t.align 4\n");
29784 fprintf (file, "%s:\n", stub);
29785 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29787 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29788 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29789 (TARGET_64BIT ? "ldu" : "lwzu"),
29790 lazy_ptr_name);
29791 fprintf (file, "\tmtctr r12\n");
29792 fprintf (file, "\tbctr\n");
29795 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29796 fprintf (file, "%s:\n", lazy_ptr_name);
29797 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29798 fprintf (file, "%sdyld_stub_binding_helper\n",
29799 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29802 /* Legitimize PIC addresses. If the address is already
29803 position-independent, we return ORIG. Newly generated
29804 position-independent addresses go into a reg. This is REG if non
29805 zero, otherwise we allocate register(s) as necessary. */
29807 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29810 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29811 rtx reg)
29813 rtx base, offset;
29815 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29816 reg = gen_reg_rtx (Pmode);
29818 if (GET_CODE (orig) == CONST)
29820 rtx reg_temp;
29822 if (GET_CODE (XEXP (orig, 0)) == PLUS
29823 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29824 return orig;
29826 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29828 /* Use a different reg for the intermediate value, as
29829 it will be marked UNCHANGING. */
29830 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29831 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29832 Pmode, reg_temp);
29833 offset =
29834 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29835 Pmode, reg);
29837 if (GET_CODE (offset) == CONST_INT)
29839 if (SMALL_INT (offset))
29840 return plus_constant (Pmode, base, INTVAL (offset));
29841 else if (! reload_in_progress && ! reload_completed)
29842 offset = force_reg (Pmode, offset);
29843 else
29845 rtx mem = force_const_mem (Pmode, orig);
29846 return machopic_legitimize_pic_address (mem, Pmode, reg);
29849 return gen_rtx_PLUS (Pmode, base, offset);
29852 /* Fall back on generic machopic code. */
29853 return machopic_legitimize_pic_address (orig, mode, reg);
29856 /* Output a .machine directive for the Darwin assembler, and call
29857 the generic start_file routine. */
29859 static void
29860 rs6000_darwin_file_start (void)
29862 static const struct
29864 const char *arg;
29865 const char *name;
29866 HOST_WIDE_INT if_set;
29867 } mapping[] = {
29868 { "ppc64", "ppc64", MASK_64BIT },
29869 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29870 { "power4", "ppc970", 0 },
29871 { "G5", "ppc970", 0 },
29872 { "7450", "ppc7450", 0 },
29873 { "7400", "ppc7400", MASK_ALTIVEC },
29874 { "G4", "ppc7400", 0 },
29875 { "750", "ppc750", 0 },
29876 { "740", "ppc750", 0 },
29877 { "G3", "ppc750", 0 },
29878 { "604e", "ppc604e", 0 },
29879 { "604", "ppc604", 0 },
29880 { "603e", "ppc603", 0 },
29881 { "603", "ppc603", 0 },
29882 { "601", "ppc601", 0 },
29883 { NULL, "ppc", 0 } };
29884 const char *cpu_id = "";
29885 size_t i;
29887 rs6000_file_start ();
29888 darwin_file_start ();
29890 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29892 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29893 cpu_id = rs6000_default_cpu;
29895 if (global_options_set.x_rs6000_cpu_index)
29896 cpu_id = processor_target_table[rs6000_cpu_index].name;
29898 /* Look through the mapping array. Pick the first name that either
29899 matches the argument, has a bit set in IF_SET that is also set
29900 in the target flags, or has a NULL name. */
29902 i = 0;
29903 while (mapping[i].arg != NULL
29904 && strcmp (mapping[i].arg, cpu_id) != 0
29905 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29906 i++;
29908 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29911 #endif /* TARGET_MACHO */
29913 #if TARGET_ELF
29914 static int
29915 rs6000_elf_reloc_rw_mask (void)
29917 if (flag_pic)
29918 return 3;
29919 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29920 return 2;
29921 else
29922 return 0;
29925 /* Record an element in the table of global constructors. SYMBOL is
29926 a SYMBOL_REF of the function to be called; PRIORITY is a number
29927 between 0 and MAX_INIT_PRIORITY.
29929 This differs from default_named_section_asm_out_constructor in
29930 that we have special handling for -mrelocatable. */
29932 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29933 static void
29934 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29936 const char *section = ".ctors";
29937 char buf[16];
29939 if (priority != DEFAULT_INIT_PRIORITY)
29941 sprintf (buf, ".ctors.%.5u",
29942 /* Invert the numbering so the linker puts us in the proper
29943 order; constructors are run from right to left, and the
29944 linker sorts in increasing order. */
29945 MAX_INIT_PRIORITY - priority);
29946 section = buf;
29949 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29950 assemble_align (POINTER_SIZE);
29952 if (TARGET_RELOCATABLE)
29954 fputs ("\t.long (", asm_out_file);
29955 output_addr_const (asm_out_file, symbol);
29956 fputs (")@fixup\n", asm_out_file);
29958 else
29959 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29962 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29963 static void
29964 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29966 const char *section = ".dtors";
29967 char buf[16];
29969 if (priority != DEFAULT_INIT_PRIORITY)
29971 sprintf (buf, ".dtors.%.5u",
29972 /* Invert the numbering so the linker puts us in the proper
29973 order; constructors are run from right to left, and the
29974 linker sorts in increasing order. */
29975 MAX_INIT_PRIORITY - priority);
29976 section = buf;
29979 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29980 assemble_align (POINTER_SIZE);
29982 if (TARGET_RELOCATABLE)
29984 fputs ("\t.long (", asm_out_file);
29985 output_addr_const (asm_out_file, symbol);
29986 fputs (")@fixup\n", asm_out_file);
29988 else
29989 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29992 void
29993 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29995 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29997 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29998 ASM_OUTPUT_LABEL (file, name);
29999 fputs (DOUBLE_INT_ASM_OP, file);
30000 rs6000_output_function_entry (file, name);
30001 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
30002 if (DOT_SYMBOLS)
30004 fputs ("\t.size\t", file);
30005 assemble_name (file, name);
30006 fputs (",24\n\t.type\t.", file);
30007 assemble_name (file, name);
30008 fputs (",@function\n", file);
30009 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
30011 fputs ("\t.globl\t.", file);
30012 assemble_name (file, name);
30013 putc ('\n', file);
30016 else
30017 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
30018 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
30019 rs6000_output_function_entry (file, name);
30020 fputs (":\n", file);
30021 return;
30024 if (TARGET_RELOCATABLE
30025 && !TARGET_SECURE_PLT
30026 && (get_pool_size () != 0 || crtl->profile)
30027 && uses_TOC ())
30029 char buf[256];
30031 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
30033 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
30034 fprintf (file, "\t.long ");
30035 assemble_name (file, buf);
30036 putc ('-', file);
30037 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30038 assemble_name (file, buf);
30039 putc ('\n', file);
30042 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
30043 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
30045 if (DEFAULT_ABI == ABI_AIX)
30047 const char *desc_name, *orig_name;
30049 orig_name = (*targetm.strip_name_encoding) (name);
30050 desc_name = orig_name;
30051 while (*desc_name == '.')
30052 desc_name++;
30054 if (TREE_PUBLIC (decl))
30055 fprintf (file, "\t.globl %s\n", desc_name);
30057 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30058 fprintf (file, "%s:\n", desc_name);
30059 fprintf (file, "\t.long %s\n", orig_name);
30060 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
30061 fputs ("\t.long 0\n", file);
30062 fprintf (file, "\t.previous\n");
30064 ASM_OUTPUT_LABEL (file, name);
30067 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
30068 static void
30069 rs6000_elf_file_end (void)
30071 #ifdef HAVE_AS_GNU_ATTRIBUTE
30072 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
30074 if (rs6000_passes_float)
30075 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
30076 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
30077 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
30078 : 2));
30079 if (rs6000_passes_vector)
30080 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
30081 (TARGET_ALTIVEC_ABI ? 2
30082 : TARGET_SPE_ABI ? 3
30083 : 1));
30084 if (rs6000_returns_struct)
30085 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
30086 aix_struct_return ? 2 : 1);
30088 #endif
30089 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
30090 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
30091 file_end_indicate_exec_stack ();
30092 #endif
30094 if (flag_split_stack)
30095 file_end_indicate_split_stack ();
30097 #endif
30099 #if TARGET_XCOFF
30100 static void
30101 rs6000_xcoff_asm_output_anchor (rtx symbol)
30103 char buffer[100];
30105 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
30106 SYMBOL_REF_BLOCK_OFFSET (symbol));
30107 fprintf (asm_out_file, "%s", SET_ASM_OP);
30108 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
30109 fprintf (asm_out_file, ",");
30110 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
30111 fprintf (asm_out_file, "\n");
30114 static void
30115 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
30117 fputs (GLOBAL_ASM_OP, stream);
30118 RS6000_OUTPUT_BASENAME (stream, name);
30119 putc ('\n', stream);
30122 /* A get_unnamed_decl callback, used for read-only sections. PTR
30123 points to the section string variable. */
30125 static void
30126 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
30128 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
30129 *(const char *const *) directive,
30130 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
30133 /* Likewise for read-write sections. */
30135 static void
30136 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
30138 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
30139 *(const char *const *) directive,
30140 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
30143 static void
30144 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
30146 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
30147 *(const char *const *) directive,
30148 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
30151 /* A get_unnamed_section callback, used for switching to toc_section. */
30153 static void
30154 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
30156 if (TARGET_MINIMAL_TOC)
30158 /* toc_section is always selected at least once from
30159 rs6000_xcoff_file_start, so this is guaranteed to
30160 always be defined once and only once in each file. */
30161 if (!toc_initialized)
30163 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
30164 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
30165 toc_initialized = 1;
30167 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
30168 (TARGET_32BIT ? "" : ",3"));
30170 else
30171 fputs ("\t.toc\n", asm_out_file);
30174 /* Implement TARGET_ASM_INIT_SECTIONS. */
30176 static void
30177 rs6000_xcoff_asm_init_sections (void)
30179 read_only_data_section
30180 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
30181 &xcoff_read_only_section_name);
30183 private_data_section
30184 = get_unnamed_section (SECTION_WRITE,
30185 rs6000_xcoff_output_readwrite_section_asm_op,
30186 &xcoff_private_data_section_name);
30188 tls_data_section
30189 = get_unnamed_section (SECTION_TLS,
30190 rs6000_xcoff_output_tls_section_asm_op,
30191 &xcoff_tls_data_section_name);
30193 tls_private_data_section
30194 = get_unnamed_section (SECTION_TLS,
30195 rs6000_xcoff_output_tls_section_asm_op,
30196 &xcoff_private_data_section_name);
30198 read_only_private_data_section
30199 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
30200 &xcoff_private_data_section_name);
30202 toc_section
30203 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
30205 readonly_data_section = read_only_data_section;
30206 exception_section = data_section;
30209 static int
30210 rs6000_xcoff_reloc_rw_mask (void)
30212 return 3;
30215 static void
30216 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
30217 tree decl ATTRIBUTE_UNUSED)
30219 int smclass;
30220 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
30222 if (flags & SECTION_CODE)
30223 smclass = 0;
30224 else if (flags & SECTION_TLS)
30225 smclass = 3;
30226 else if (flags & SECTION_WRITE)
30227 smclass = 2;
30228 else
30229 smclass = 1;
30231 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
30232 (flags & SECTION_CODE) ? "." : "",
30233 name, suffix[smclass], flags & SECTION_ENTSIZE);
30236 #define IN_NAMED_SECTION(DECL) \
30237 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
30238 && DECL_SECTION_NAME (DECL) != NULL)
30240 static section *
30241 rs6000_xcoff_select_section (tree decl, int reloc,
30242 unsigned HOST_WIDE_INT align)
30244 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
30245 named section. */
30246 if (align > BIGGEST_ALIGNMENT)
30248 resolve_unique_section (decl, reloc, true);
30249 if (IN_NAMED_SECTION (decl))
30250 return get_named_section (decl, NULL, reloc);
30253 if (decl_readonly_section (decl, reloc))
30255 if (TREE_PUBLIC (decl))
30256 return read_only_data_section;
30257 else
30258 return read_only_private_data_section;
30260 else
30262 #if HAVE_AS_TLS
30263 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30265 if (TREE_PUBLIC (decl))
30266 return tls_data_section;
30267 else if (bss_initializer_p (decl))
30269 /* Convert to COMMON to emit in BSS. */
30270 DECL_COMMON (decl) = 1;
30271 return tls_comm_section;
30273 else
30274 return tls_private_data_section;
30276 else
30277 #endif
30278 if (TREE_PUBLIC (decl))
30279 return data_section;
30280 else
30281 return private_data_section;
30285 static void
30286 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
30288 const char *name;
30290 /* Use select_section for private data and uninitialized data with
30291 alignment <= BIGGEST_ALIGNMENT. */
30292 if (!TREE_PUBLIC (decl)
30293 || DECL_COMMON (decl)
30294 || (DECL_INITIAL (decl) == NULL_TREE
30295 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
30296 || DECL_INITIAL (decl) == error_mark_node
30297 || (flag_zero_initialized_in_bss
30298 && initializer_zerop (DECL_INITIAL (decl))))
30299 return;
30301 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
30302 name = (*targetm.strip_name_encoding) (name);
30303 set_decl_section_name (decl, name);
30306 /* Select section for constant in constant pool.
30308 On RS/6000, all constants are in the private read-only data area.
30309 However, if this is being placed in the TOC it must be output as a
30310 toc entry. */
30312 static section *
30313 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
30314 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
30316 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
30317 return toc_section;
30318 else
30319 return read_only_private_data_section;
30322 /* Remove any trailing [DS] or the like from the symbol name. */
30324 static const char *
30325 rs6000_xcoff_strip_name_encoding (const char *name)
30327 size_t len;
30328 if (*name == '*')
30329 name++;
30330 len = strlen (name);
30331 if (name[len - 1] == ']')
30332 return ggc_alloc_string (name, len - 4);
30333 else
30334 return name;
30337 /* Section attributes. AIX is always PIC. */
30339 static unsigned int
30340 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
30342 unsigned int align;
30343 unsigned int flags = default_section_type_flags (decl, name, reloc);
30345 /* Align to at least UNIT size. */
30346 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
30347 align = MIN_UNITS_PER_WORD;
30348 else
30349 /* Increase alignment of large objects if not already stricter. */
30350 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
30351 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
30352 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
30354 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
30357 /* Output at beginning of assembler file.
30359 Initialize the section names for the RS/6000 at this point.
30361 Specify filename, including full path, to assembler.
30363 We want to go into the TOC section so at least one .toc will be emitted.
30364 Also, in order to output proper .bs/.es pairs, we need at least one static
30365 [RW] section emitted.
30367 Finally, declare mcount when profiling to make the assembler happy. */
30369 static void
30370 rs6000_xcoff_file_start (void)
30372 rs6000_gen_section_name (&xcoff_bss_section_name,
30373 main_input_filename, ".bss_");
30374 rs6000_gen_section_name (&xcoff_private_data_section_name,
30375 main_input_filename, ".rw_");
30376 rs6000_gen_section_name (&xcoff_read_only_section_name,
30377 main_input_filename, ".ro_");
30378 rs6000_gen_section_name (&xcoff_tls_data_section_name,
30379 main_input_filename, ".tls_");
30380 rs6000_gen_section_name (&xcoff_tbss_section_name,
30381 main_input_filename, ".tbss_[UL]");
30383 fputs ("\t.file\t", asm_out_file);
30384 output_quoted_string (asm_out_file, main_input_filename);
30385 fputc ('\n', asm_out_file);
30386 if (write_symbols != NO_DEBUG)
30387 switch_to_section (private_data_section);
30388 switch_to_section (text_section);
30389 if (profile_flag)
30390 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
30391 rs6000_file_start ();
30394 /* Output at end of assembler file.
30395 On the RS/6000, referencing data should automatically pull in text. */
30397 static void
30398 rs6000_xcoff_file_end (void)
30400 switch_to_section (text_section);
30401 fputs ("_section_.text:\n", asm_out_file);
30402 switch_to_section (data_section);
30403 fputs (TARGET_32BIT
30404 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
30405 asm_out_file);
30408 struct declare_alias_data
30410 FILE *file;
30411 bool function_descriptor;
30414 /* Declare alias N. A helper function for for_node_and_aliases. */
30416 static bool
30417 rs6000_declare_alias (struct symtab_node *n, void *d)
30419 struct declare_alias_data *data = (struct declare_alias_data *)d;
30420 /* Main symbol is output specially, because varasm machinery does part of
30421 the job for us - we do not need to declare .globl/lglobs and such. */
30422 if (!n->alias || n->weakref)
30423 return false;
30425 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
30426 return false;
30428 /* Prevent assemble_alias from trying to use .set pseudo operation
30429 that does not behave as expected by the middle-end. */
30430 TREE_ASM_WRITTEN (n->decl) = true;
30432 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
30433 char *buffer = (char *) alloca (strlen (name) + 2);
30434 char *p;
30435 int dollar_inside = 0;
30437 strcpy (buffer, name);
30438 p = strchr (buffer, '$');
30439 while (p) {
30440 *p = '_';
30441 dollar_inside++;
30442 p = strchr (p + 1, '$');
30444 if (TREE_PUBLIC (n->decl))
30446 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
30448 if (dollar_inside) {
30449 if (data->function_descriptor)
30450 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30451 else
30452 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30454 if (data->function_descriptor)
30455 fputs ("\t.globl .", data->file);
30456 else
30457 fputs ("\t.globl ", data->file);
30458 RS6000_OUTPUT_BASENAME (data->file, buffer);
30459 putc ('\n', data->file);
30461 #ifdef ASM_WEAKEN_DECL
30462 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
30463 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
30464 #endif
30466 else
30468 if (dollar_inside)
30470 if (data->function_descriptor)
30471 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
30472 else
30473 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
30475 if (data->function_descriptor)
30476 fputs ("\t.lglobl .", data->file);
30477 else
30478 fputs ("\t.lglobl ", data->file);
30479 RS6000_OUTPUT_BASENAME (data->file, buffer);
30480 putc ('\n', data->file);
30482 if (data->function_descriptor)
30483 fputs (".", data->file);
30484 RS6000_OUTPUT_BASENAME (data->file, buffer);
30485 fputs (":\n", data->file);
30486 return false;
30489 /* This macro produces the initial definition of a function name.
30490 On the RS/6000, we need to place an extra '.' in the function name and
30491 output the function descriptor.
30492 Dollar signs are converted to underscores.
30494 The csect for the function will have already been created when
30495 text_section was selected. We do have to go back to that csect, however.
30497 The third and fourth parameters to the .function pseudo-op (16 and 044)
30498 are placeholders which no longer have any use.
30500 Because AIX assembler's .set command has unexpected semantics, we output
30501 all aliases as alternative labels in front of the definition. */
30503 void
30504 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30506 char *buffer = (char *) alloca (strlen (name) + 1);
30507 char *p;
30508 int dollar_inside = 0;
30509 struct declare_alias_data data = {file, false};
30511 strcpy (buffer, name);
30512 p = strchr (buffer, '$');
30513 while (p) {
30514 *p = '_';
30515 dollar_inside++;
30516 p = strchr (p + 1, '$');
30518 if (TREE_PUBLIC (decl))
30520 if (!RS6000_WEAK || !DECL_WEAK (decl))
30522 if (dollar_inside) {
30523 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30524 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30526 fputs ("\t.globl .", file);
30527 RS6000_OUTPUT_BASENAME (file, buffer);
30528 putc ('\n', file);
30531 else
30533 if (dollar_inside) {
30534 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30535 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30537 fputs ("\t.lglobl .", file);
30538 RS6000_OUTPUT_BASENAME (file, buffer);
30539 putc ('\n', file);
30541 fputs ("\t.csect ", file);
30542 RS6000_OUTPUT_BASENAME (file, buffer);
30543 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30544 RS6000_OUTPUT_BASENAME (file, buffer);
30545 fputs (":\n", file);
30546 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30547 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30548 RS6000_OUTPUT_BASENAME (file, buffer);
30549 fputs (", TOC[tc0], 0\n", file);
30550 in_section = NULL;
30551 switch_to_section (function_section (decl));
30552 putc ('.', file);
30553 RS6000_OUTPUT_BASENAME (file, buffer);
30554 fputs (":\n", file);
30555 data.function_descriptor = true;
30556 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30557 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30558 xcoffout_declare_function (file, decl, buffer);
30559 return;
30562 /* This macro produces the initial definition of a object (variable) name.
30563 Because AIX assembler's .set command has unexpected semantics, we output
30564 all aliases as alternative labels in front of the definition. */
30566 void
30567 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30569 struct declare_alias_data data = {file, false};
30570 RS6000_OUTPUT_BASENAME (file, name);
30571 fputs (":\n", file);
30572 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30575 #ifdef HAVE_AS_TLS
30576 static void
30577 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30579 rtx symbol;
30580 int flags;
30582 default_encode_section_info (decl, rtl, first);
30584 /* Careful not to prod global register variables. */
30585 if (!MEM_P (rtl))
30586 return;
30587 symbol = XEXP (rtl, 0);
30588 if (GET_CODE (symbol) != SYMBOL_REF)
30589 return;
30591 flags = SYMBOL_REF_FLAGS (symbol);
30593 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30594 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30596 SYMBOL_REF_FLAGS (symbol) = flags;
30598 #endif /* HAVE_AS_TLS */
30599 #endif /* TARGET_XCOFF */
30601 /* Compute a (partial) cost for rtx X. Return true if the complete
30602 cost has been computed, and false if subexpressions should be
30603 scanned. In either case, *TOTAL contains the cost result. */
30605 static bool
30606 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30607 int *total, bool speed)
30609 machine_mode mode = GET_MODE (x);
30611 switch (code)
30613 /* On the RS/6000, if it is valid in the insn, it is free. */
30614 case CONST_INT:
30615 if (((outer_code == SET
30616 || outer_code == PLUS
30617 || outer_code == MINUS)
30618 && (satisfies_constraint_I (x)
30619 || satisfies_constraint_L (x)))
30620 || (outer_code == AND
30621 && (satisfies_constraint_K (x)
30622 || (mode == SImode
30623 ? satisfies_constraint_L (x)
30624 : satisfies_constraint_J (x))
30625 || mask_operand (x, mode)
30626 || (mode == DImode
30627 && mask64_operand (x, DImode))))
30628 || ((outer_code == IOR || outer_code == XOR)
30629 && (satisfies_constraint_K (x)
30630 || (mode == SImode
30631 ? satisfies_constraint_L (x)
30632 : satisfies_constraint_J (x))))
30633 || outer_code == ASHIFT
30634 || outer_code == ASHIFTRT
30635 || outer_code == LSHIFTRT
30636 || outer_code == ROTATE
30637 || outer_code == ROTATERT
30638 || outer_code == ZERO_EXTRACT
30639 || (outer_code == MULT
30640 && satisfies_constraint_I (x))
30641 || ((outer_code == DIV || outer_code == UDIV
30642 || outer_code == MOD || outer_code == UMOD)
30643 && exact_log2 (INTVAL (x)) >= 0)
30644 || (outer_code == COMPARE
30645 && (satisfies_constraint_I (x)
30646 || satisfies_constraint_K (x)))
30647 || ((outer_code == EQ || outer_code == NE)
30648 && (satisfies_constraint_I (x)
30649 || satisfies_constraint_K (x)
30650 || (mode == SImode
30651 ? satisfies_constraint_L (x)
30652 : satisfies_constraint_J (x))))
30653 || (outer_code == GTU
30654 && satisfies_constraint_I (x))
30655 || (outer_code == LTU
30656 && satisfies_constraint_P (x)))
30658 *total = 0;
30659 return true;
30661 else if ((outer_code == PLUS
30662 && reg_or_add_cint_operand (x, VOIDmode))
30663 || (outer_code == MINUS
30664 && reg_or_sub_cint_operand (x, VOIDmode))
30665 || ((outer_code == SET
30666 || outer_code == IOR
30667 || outer_code == XOR)
30668 && (INTVAL (x)
30669 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30671 *total = COSTS_N_INSNS (1);
30672 return true;
30674 /* FALLTHRU */
30676 case CONST_DOUBLE:
30677 case CONST_WIDE_INT:
30678 case CONST:
30679 case HIGH:
30680 case SYMBOL_REF:
30681 case MEM:
30682 /* When optimizing for size, MEM should be slightly more expensive
30683 than generating address, e.g., (plus (reg) (const)).
30684 L1 cache latency is about two instructions. */
30685 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30686 return true;
30688 case LABEL_REF:
30689 *total = 0;
30690 return true;
30692 case PLUS:
30693 case MINUS:
30694 if (FLOAT_MODE_P (mode))
30695 *total = rs6000_cost->fp;
30696 else
30697 *total = COSTS_N_INSNS (1);
30698 return false;
30700 case MULT:
30701 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30702 && satisfies_constraint_I (XEXP (x, 1)))
30704 if (INTVAL (XEXP (x, 1)) >= -256
30705 && INTVAL (XEXP (x, 1)) <= 255)
30706 *total = rs6000_cost->mulsi_const9;
30707 else
30708 *total = rs6000_cost->mulsi_const;
30710 else if (mode == SFmode)
30711 *total = rs6000_cost->fp;
30712 else if (FLOAT_MODE_P (mode))
30713 *total = rs6000_cost->dmul;
30714 else if (mode == DImode)
30715 *total = rs6000_cost->muldi;
30716 else
30717 *total = rs6000_cost->mulsi;
30718 return false;
30720 case FMA:
30721 if (mode == SFmode)
30722 *total = rs6000_cost->fp;
30723 else
30724 *total = rs6000_cost->dmul;
30725 break;
30727 case DIV:
30728 case MOD:
30729 if (FLOAT_MODE_P (mode))
30731 *total = mode == DFmode ? rs6000_cost->ddiv
30732 : rs6000_cost->sdiv;
30733 return false;
30735 /* FALLTHRU */
30737 case UDIV:
30738 case UMOD:
30739 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30740 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30742 if (code == DIV || code == MOD)
30743 /* Shift, addze */
30744 *total = COSTS_N_INSNS (2);
30745 else
30746 /* Shift */
30747 *total = COSTS_N_INSNS (1);
30749 else
30751 if (GET_MODE (XEXP (x, 1)) == DImode)
30752 *total = rs6000_cost->divdi;
30753 else
30754 *total = rs6000_cost->divsi;
30756 /* Add in shift and subtract for MOD. */
30757 if (code == MOD || code == UMOD)
30758 *total += COSTS_N_INSNS (2);
30759 return false;
30761 case CTZ:
30762 case FFS:
30763 *total = COSTS_N_INSNS (4);
30764 return false;
30766 case POPCOUNT:
30767 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30768 return false;
30770 case PARITY:
30771 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30772 return false;
30774 case NOT:
30775 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30777 *total = 0;
30778 return false;
30780 /* FALLTHRU */
30782 case AND:
30783 case CLZ:
30784 case IOR:
30785 case XOR:
30786 case ZERO_EXTRACT:
30787 *total = COSTS_N_INSNS (1);
30788 return false;
30790 case ASHIFT:
30791 case ASHIFTRT:
30792 case LSHIFTRT:
30793 case ROTATE:
30794 case ROTATERT:
30795 /* Handle mul_highpart. */
30796 if (outer_code == TRUNCATE
30797 && GET_CODE (XEXP (x, 0)) == MULT)
30799 if (mode == DImode)
30800 *total = rs6000_cost->muldi;
30801 else
30802 *total = rs6000_cost->mulsi;
30803 return true;
30805 else if (outer_code == AND)
30806 *total = 0;
30807 else
30808 *total = COSTS_N_INSNS (1);
30809 return false;
30811 case SIGN_EXTEND:
30812 case ZERO_EXTEND:
30813 if (GET_CODE (XEXP (x, 0)) == MEM)
30814 *total = 0;
30815 else
30816 *total = COSTS_N_INSNS (1);
30817 return false;
30819 case COMPARE:
30820 case NEG:
30821 case ABS:
30822 if (!FLOAT_MODE_P (mode))
30824 *total = COSTS_N_INSNS (1);
30825 return false;
30827 /* FALLTHRU */
30829 case FLOAT:
30830 case UNSIGNED_FLOAT:
30831 case FIX:
30832 case UNSIGNED_FIX:
30833 case FLOAT_TRUNCATE:
30834 *total = rs6000_cost->fp;
30835 return false;
30837 case FLOAT_EXTEND:
30838 if (mode == DFmode)
30839 *total = rs6000_cost->sfdf_convert;
30840 else
30841 *total = rs6000_cost->fp;
30842 return false;
30844 case UNSPEC:
30845 switch (XINT (x, 1))
30847 case UNSPEC_FRSP:
30848 *total = rs6000_cost->fp;
30849 return true;
30851 default:
30852 break;
30854 break;
30856 case CALL:
30857 case IF_THEN_ELSE:
30858 if (!speed)
30860 *total = COSTS_N_INSNS (1);
30861 return true;
30863 else if (FLOAT_MODE_P (mode)
30864 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30866 *total = rs6000_cost->fp;
30867 return false;
30869 break;
30871 case NE:
30872 case EQ:
30873 case GTU:
30874 case LTU:
30875 /* Carry bit requires mode == Pmode.
30876 NEG or PLUS already counted so only add one. */
30877 if (mode == Pmode
30878 && (outer_code == NEG || outer_code == PLUS))
30880 *total = COSTS_N_INSNS (1);
30881 return true;
30883 if (outer_code == SET)
30885 if (XEXP (x, 1) == const0_rtx)
30887 if (TARGET_ISEL && !TARGET_MFCRF)
30888 *total = COSTS_N_INSNS (8);
30889 else
30890 *total = COSTS_N_INSNS (2);
30891 return true;
30893 else
30895 *total = COSTS_N_INSNS (3);
30896 return false;
30899 /* FALLTHRU */
30901 case GT:
30902 case LT:
30903 case UNORDERED:
30904 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30906 if (TARGET_ISEL && !TARGET_MFCRF)
30907 *total = COSTS_N_INSNS (8);
30908 else
30909 *total = COSTS_N_INSNS (2);
30910 return true;
30912 /* CC COMPARE. */
30913 if (outer_code == COMPARE)
30915 *total = 0;
30916 return true;
30918 break;
30920 default:
30921 break;
30924 return false;
30927 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30929 static bool
30930 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30931 bool speed)
30933 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30935 fprintf (stderr,
30936 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30937 "opno = %d, total = %d, speed = %s, x:\n",
30938 ret ? "complete" : "scan inner",
30939 GET_RTX_NAME (code),
30940 GET_RTX_NAME (outer_code),
30941 opno,
30942 *total,
30943 speed ? "true" : "false");
30945 debug_rtx (x);
30947 return ret;
30950 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30952 static int
30953 rs6000_debug_address_cost (rtx x, machine_mode mode,
30954 addr_space_t as, bool speed)
30956 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30958 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30959 ret, speed ? "true" : "false");
30960 debug_rtx (x);
30962 return ret;
30966 /* A C expression returning the cost of moving data from a register of class
30967 CLASS1 to one of CLASS2. */
30969 static int
30970 rs6000_register_move_cost (machine_mode mode,
30971 reg_class_t from, reg_class_t to)
30973 int ret;
30975 if (TARGET_DEBUG_COST)
30976 dbg_cost_ctrl++;
30978 /* Moves from/to GENERAL_REGS. */
30979 if (reg_classes_intersect_p (to, GENERAL_REGS)
30980 || reg_classes_intersect_p (from, GENERAL_REGS))
30982 reg_class_t rclass = from;
30984 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30985 rclass = to;
30987 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30988 ret = (rs6000_memory_move_cost (mode, rclass, false)
30989 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30991 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30992 shift. */
30993 else if (rclass == CR_REGS)
30994 ret = 4;
30996 /* For those processors that have slow LR/CTR moves, make them more
30997 expensive than memory in order to bias spills to memory .*/
30998 else if ((rs6000_cpu == PROCESSOR_POWER6
30999 || rs6000_cpu == PROCESSOR_POWER7
31000 || rs6000_cpu == PROCESSOR_POWER8)
31001 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
31002 ret = 6 * hard_regno_nregs[0][mode];
31004 else
31005 /* A move will cost one instruction per GPR moved. */
31006 ret = 2 * hard_regno_nregs[0][mode];
31009 /* If we have VSX, we can easily move between FPR or Altivec registers. */
31010 else if (VECTOR_MEM_VSX_P (mode)
31011 && reg_classes_intersect_p (to, VSX_REGS)
31012 && reg_classes_intersect_p (from, VSX_REGS))
31013 ret = 2 * hard_regno_nregs[32][mode];
31015 /* Moving between two similar registers is just one instruction. */
31016 else if (reg_classes_intersect_p (to, from))
31017 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
31019 /* Everything else has to go through GENERAL_REGS. */
31020 else
31021 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
31022 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
31024 if (TARGET_DEBUG_COST)
31026 if (dbg_cost_ctrl == 1)
31027 fprintf (stderr,
31028 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
31029 ret, GET_MODE_NAME (mode), reg_class_names[from],
31030 reg_class_names[to]);
31031 dbg_cost_ctrl--;
31034 return ret;
31037 /* A C expressions returning the cost of moving data of MODE from a register to
31038 or from memory. */
31040 static int
31041 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
31042 bool in ATTRIBUTE_UNUSED)
31044 int ret;
31046 if (TARGET_DEBUG_COST)
31047 dbg_cost_ctrl++;
31049 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
31050 ret = 4 * hard_regno_nregs[0][mode];
31051 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
31052 || reg_classes_intersect_p (rclass, VSX_REGS)))
31053 ret = 4 * hard_regno_nregs[32][mode];
31054 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
31055 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
31056 else
31057 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
31059 if (TARGET_DEBUG_COST)
31061 if (dbg_cost_ctrl == 1)
31062 fprintf (stderr,
31063 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
31064 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
31065 dbg_cost_ctrl--;
31068 return ret;
31071 /* Returns a code for a target-specific builtin that implements
31072 reciprocal of the function, or NULL_TREE if not available. */
31074 static tree
31075 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
31076 bool sqrt ATTRIBUTE_UNUSED)
31078 if (optimize_insn_for_size_p ())
31079 return NULL_TREE;
31081 if (md_fn)
31082 switch (fn)
31084 case VSX_BUILTIN_XVSQRTDP:
31085 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
31086 return NULL_TREE;
31088 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
31090 case VSX_BUILTIN_XVSQRTSP:
31091 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
31092 return NULL_TREE;
31094 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
31096 default:
31097 return NULL_TREE;
31100 else
31101 switch (fn)
31103 case BUILT_IN_SQRT:
31104 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
31105 return NULL_TREE;
31107 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
31109 case BUILT_IN_SQRTF:
31110 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
31111 return NULL_TREE;
31113 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
31115 default:
31116 return NULL_TREE;
31120 /* Load up a constant. If the mode is a vector mode, splat the value across
31121 all of the vector elements. */
31123 static rtx
31124 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
31126 rtx reg;
31128 if (mode == SFmode || mode == DFmode)
31130 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
31131 reg = force_reg (mode, d);
31133 else if (mode == V4SFmode)
31135 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
31136 rtvec v = gen_rtvec (4, d, d, d, d);
31137 reg = gen_reg_rtx (mode);
31138 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
31140 else if (mode == V2DFmode)
31142 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
31143 rtvec v = gen_rtvec (2, d, d);
31144 reg = gen_reg_rtx (mode);
31145 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
31147 else
31148 gcc_unreachable ();
31150 return reg;
31153 /* Generate an FMA instruction. */
31155 static void
31156 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
31158 machine_mode mode = GET_MODE (target);
31159 rtx dst;
31161 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
31162 gcc_assert (dst != NULL);
31164 if (dst != target)
31165 emit_move_insn (target, dst);
31168 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
31170 static void
31171 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
31173 machine_mode mode = GET_MODE (target);
31174 rtx dst;
31176 /* Altivec does not support fms directly;
31177 generate in terms of fma in that case. */
31178 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
31179 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
31180 else
31182 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
31183 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
31185 gcc_assert (dst != NULL);
31187 if (dst != target)
31188 emit_move_insn (target, dst);
31191 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
31193 static void
31194 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
31196 machine_mode mode = GET_MODE (dst);
31197 rtx r;
31199 /* This is a tad more complicated, since the fnma_optab is for
31200 a different expression: fma(-m1, m2, a), which is the same
31201 thing except in the case of signed zeros.
31203 Fortunately we know that if FMA is supported that FNMSUB is
31204 also supported in the ISA. Just expand it directly. */
31206 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
31208 r = gen_rtx_NEG (mode, a);
31209 r = gen_rtx_FMA (mode, m1, m2, r);
31210 r = gen_rtx_NEG (mode, r);
31211 emit_insn (gen_rtx_SET (dst, r));
31214 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
31215 add a reg_note saying that this was a division. Support both scalar and
31216 vector divide. Assumes no trapping math and finite arguments. */
31218 void
31219 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
31221 machine_mode mode = GET_MODE (dst);
31222 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
31223 int i;
31225 /* Low precision estimates guarantee 5 bits of accuracy. High
31226 precision estimates guarantee 14 bits of accuracy. SFmode
31227 requires 23 bits of accuracy. DFmode requires 52 bits of
31228 accuracy. Each pass at least doubles the accuracy, leading
31229 to the following. */
31230 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
31231 if (mode == DFmode || mode == V2DFmode)
31232 passes++;
31234 enum insn_code code = optab_handler (smul_optab, mode);
31235 insn_gen_fn gen_mul = GEN_FCN (code);
31237 gcc_assert (code != CODE_FOR_nothing);
31239 one = rs6000_load_constant_and_splat (mode, dconst1);
31241 /* x0 = 1./d estimate */
31242 x0 = gen_reg_rtx (mode);
31243 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
31244 UNSPEC_FRES)));
31246 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
31247 if (passes > 1) {
31249 /* e0 = 1. - d * x0 */
31250 e0 = gen_reg_rtx (mode);
31251 rs6000_emit_nmsub (e0, d, x0, one);
31253 /* x1 = x0 + e0 * x0 */
31254 x1 = gen_reg_rtx (mode);
31255 rs6000_emit_madd (x1, e0, x0, x0);
31257 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
31258 ++i, xprev = xnext, eprev = enext) {
31260 /* enext = eprev * eprev */
31261 enext = gen_reg_rtx (mode);
31262 emit_insn (gen_mul (enext, eprev, eprev));
31264 /* xnext = xprev + enext * xprev */
31265 xnext = gen_reg_rtx (mode);
31266 rs6000_emit_madd (xnext, enext, xprev, xprev);
31269 } else
31270 xprev = x0;
31272 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
31274 /* u = n * xprev */
31275 u = gen_reg_rtx (mode);
31276 emit_insn (gen_mul (u, n, xprev));
31278 /* v = n - (d * u) */
31279 v = gen_reg_rtx (mode);
31280 rs6000_emit_nmsub (v, d, u, n);
31282 /* dst = (v * xprev) + u */
31283 rs6000_emit_madd (dst, v, xprev, u);
31285 if (note_p)
31286 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
31289 /* Newton-Raphson approximation of single/double-precision floating point
31290 rsqrt. Assumes no trapping math and finite arguments. */
31292 void
31293 rs6000_emit_swrsqrt (rtx dst, rtx src)
31295 machine_mode mode = GET_MODE (src);
31296 rtx x0 = gen_reg_rtx (mode);
31297 rtx y = gen_reg_rtx (mode);
31299 /* Low precision estimates guarantee 5 bits of accuracy. High
31300 precision estimates guarantee 14 bits of accuracy. SFmode
31301 requires 23 bits of accuracy. DFmode requires 52 bits of
31302 accuracy. Each pass at least doubles the accuracy, leading
31303 to the following. */
31304 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
31305 if (mode == DFmode || mode == V2DFmode)
31306 passes++;
31308 REAL_VALUE_TYPE dconst3_2;
31309 int i;
31310 rtx halfthree;
31311 enum insn_code code = optab_handler (smul_optab, mode);
31312 insn_gen_fn gen_mul = GEN_FCN (code);
31314 gcc_assert (code != CODE_FOR_nothing);
31316 /* Load up the constant 1.5 either as a scalar, or as a vector. */
31317 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
31318 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
31320 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
31322 /* x0 = rsqrt estimate */
31323 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
31324 UNSPEC_RSQRT)));
31326 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
31327 rs6000_emit_msub (y, src, halfthree, src);
31329 for (i = 0; i < passes; i++)
31331 rtx x1 = gen_reg_rtx (mode);
31332 rtx u = gen_reg_rtx (mode);
31333 rtx v = gen_reg_rtx (mode);
31335 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
31336 emit_insn (gen_mul (u, x0, x0));
31337 rs6000_emit_nmsub (v, y, u, halfthree);
31338 emit_insn (gen_mul (x1, x0, v));
31339 x0 = x1;
31342 emit_move_insn (dst, x0);
31343 return;
31346 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
31347 (Power7) targets. DST is the target, and SRC is the argument operand. */
31349 void
31350 rs6000_emit_popcount (rtx dst, rtx src)
31352 machine_mode mode = GET_MODE (dst);
31353 rtx tmp1, tmp2;
31355 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
31356 if (TARGET_POPCNTD)
31358 if (mode == SImode)
31359 emit_insn (gen_popcntdsi2 (dst, src));
31360 else
31361 emit_insn (gen_popcntddi2 (dst, src));
31362 return;
31365 tmp1 = gen_reg_rtx (mode);
31367 if (mode == SImode)
31369 emit_insn (gen_popcntbsi2 (tmp1, src));
31370 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
31371 NULL_RTX, 0);
31372 tmp2 = force_reg (SImode, tmp2);
31373 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
31375 else
31377 emit_insn (gen_popcntbdi2 (tmp1, src));
31378 tmp2 = expand_mult (DImode, tmp1,
31379 GEN_INT ((HOST_WIDE_INT)
31380 0x01010101 << 32 | 0x01010101),
31381 NULL_RTX, 0);
31382 tmp2 = force_reg (DImode, tmp2);
31383 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
31388 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
31389 target, and SRC is the argument operand. */
31391 void
31392 rs6000_emit_parity (rtx dst, rtx src)
31394 machine_mode mode = GET_MODE (dst);
31395 rtx tmp;
31397 tmp = gen_reg_rtx (mode);
31399 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
31400 if (TARGET_CMPB)
31402 if (mode == SImode)
31404 emit_insn (gen_popcntbsi2 (tmp, src));
31405 emit_insn (gen_paritysi2_cmpb (dst, tmp));
31407 else
31409 emit_insn (gen_popcntbdi2 (tmp, src));
31410 emit_insn (gen_paritydi2_cmpb (dst, tmp));
31412 return;
31415 if (mode == SImode)
31417 /* Is mult+shift >= shift+xor+shift+xor? */
31418 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
31420 rtx tmp1, tmp2, tmp3, tmp4;
31422 tmp1 = gen_reg_rtx (SImode);
31423 emit_insn (gen_popcntbsi2 (tmp1, src));
31425 tmp2 = gen_reg_rtx (SImode);
31426 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
31427 tmp3 = gen_reg_rtx (SImode);
31428 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
31430 tmp4 = gen_reg_rtx (SImode);
31431 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
31432 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
31434 else
31435 rs6000_emit_popcount (tmp, src);
31436 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
31438 else
31440 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
31441 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
31443 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
31445 tmp1 = gen_reg_rtx (DImode);
31446 emit_insn (gen_popcntbdi2 (tmp1, src));
31448 tmp2 = gen_reg_rtx (DImode);
31449 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
31450 tmp3 = gen_reg_rtx (DImode);
31451 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
31453 tmp4 = gen_reg_rtx (DImode);
31454 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
31455 tmp5 = gen_reg_rtx (DImode);
31456 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
31458 tmp6 = gen_reg_rtx (DImode);
31459 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
31460 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
31462 else
31463 rs6000_emit_popcount (tmp, src);
31464 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
31468 /* Expand an Altivec constant permutation for little endian mode.
31469 There are two issues: First, the two input operands must be
31470 swapped so that together they form a double-wide array in LE
31471 order. Second, the vperm instruction has surprising behavior
31472 in LE mode: it interprets the elements of the source vectors
31473 in BE mode ("left to right") and interprets the elements of
31474 the destination vector in LE mode ("right to left"). To
31475 correct for this, we must subtract each element of the permute
31476 control vector from 31.
31478 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
31479 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
31480 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
31481 serve as the permute control vector. Then, in BE mode,
31483 vperm 9,10,11,12
31485 places the desired result in vr9. However, in LE mode the
31486 vector contents will be
31488 vr10 = 00000003 00000002 00000001 00000000
31489 vr11 = 00000007 00000006 00000005 00000004
31491 The result of the vperm using the same permute control vector is
31493 vr9 = 05000000 07000000 01000000 03000000
31495 That is, the leftmost 4 bytes of vr10 are interpreted as the
31496 source for the rightmost 4 bytes of vr9, and so on.
31498 If we change the permute control vector to
31500 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31502 and issue
31504 vperm 9,11,10,12
31506 we get the desired
31508 vr9 = 00000006 00000004 00000002 00000000. */
31510 void
31511 altivec_expand_vec_perm_const_le (rtx operands[4])
31513 unsigned int i;
31514 rtx perm[16];
31515 rtx constv, unspec;
31516 rtx target = operands[0];
31517 rtx op0 = operands[1];
31518 rtx op1 = operands[2];
31519 rtx sel = operands[3];
31521 /* Unpack and adjust the constant selector. */
31522 for (i = 0; i < 16; ++i)
31524 rtx e = XVECEXP (sel, 0, i);
31525 unsigned int elt = 31 - (INTVAL (e) & 31);
31526 perm[i] = GEN_INT (elt);
31529 /* Expand to a permute, swapping the inputs and using the
31530 adjusted selector. */
31531 if (!REG_P (op0))
31532 op0 = force_reg (V16QImode, op0);
31533 if (!REG_P (op1))
31534 op1 = force_reg (V16QImode, op1);
31536 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31537 constv = force_reg (V16QImode, constv);
31538 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31539 UNSPEC_VPERM);
31540 if (!REG_P (target))
31542 rtx tmp = gen_reg_rtx (V16QImode);
31543 emit_move_insn (tmp, unspec);
31544 unspec = tmp;
31547 emit_move_insn (target, unspec);
31550 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31551 permute control vector. But here it's not a constant, so we must
31552 generate a vector NAND or NOR to do the adjustment. */
31554 void
31555 altivec_expand_vec_perm_le (rtx operands[4])
31557 rtx notx, iorx, unspec;
31558 rtx target = operands[0];
31559 rtx op0 = operands[1];
31560 rtx op1 = operands[2];
31561 rtx sel = operands[3];
31562 rtx tmp = target;
31563 rtx norreg = gen_reg_rtx (V16QImode);
31564 machine_mode mode = GET_MODE (target);
31566 /* Get everything in regs so the pattern matches. */
31567 if (!REG_P (op0))
31568 op0 = force_reg (mode, op0);
31569 if (!REG_P (op1))
31570 op1 = force_reg (mode, op1);
31571 if (!REG_P (sel))
31572 sel = force_reg (V16QImode, sel);
31573 if (!REG_P (target))
31574 tmp = gen_reg_rtx (mode);
31576 /* Invert the selector with a VNAND if available, else a VNOR.
31577 The VNAND is preferred for future fusion opportunities. */
31578 notx = gen_rtx_NOT (V16QImode, sel);
31579 iorx = (TARGET_P8_VECTOR
31580 ? gen_rtx_IOR (V16QImode, notx, notx)
31581 : gen_rtx_AND (V16QImode, notx, notx));
31582 emit_insn (gen_rtx_SET (norreg, iorx));
31584 /* Permute with operands reversed and adjusted selector. */
31585 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31586 UNSPEC_VPERM);
31588 /* Copy into target, possibly by way of a register. */
31589 if (!REG_P (target))
31591 emit_move_insn (tmp, unspec);
31592 unspec = tmp;
31595 emit_move_insn (target, unspec);
31598 /* Expand an Altivec constant permutation. Return true if we match
31599 an efficient implementation; false to fall back to VPERM. */
31601 bool
31602 altivec_expand_vec_perm_const (rtx operands[4])
31604 struct altivec_perm_insn {
31605 HOST_WIDE_INT mask;
31606 enum insn_code impl;
31607 unsigned char perm[16];
31609 static const struct altivec_perm_insn patterns[] = {
31610 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31611 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31612 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31613 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31614 { OPTION_MASK_ALTIVEC,
31615 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31616 : CODE_FOR_altivec_vmrglb_direct),
31617 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31618 { OPTION_MASK_ALTIVEC,
31619 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31620 : CODE_FOR_altivec_vmrglh_direct),
31621 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31622 { OPTION_MASK_ALTIVEC,
31623 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31624 : CODE_FOR_altivec_vmrglw_direct),
31625 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31626 { OPTION_MASK_ALTIVEC,
31627 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31628 : CODE_FOR_altivec_vmrghb_direct),
31629 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31630 { OPTION_MASK_ALTIVEC,
31631 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31632 : CODE_FOR_altivec_vmrghh_direct),
31633 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31634 { OPTION_MASK_ALTIVEC,
31635 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31636 : CODE_FOR_altivec_vmrghw_direct),
31637 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31638 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31639 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31640 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31641 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31644 unsigned int i, j, elt, which;
31645 unsigned char perm[16];
31646 rtx target, op0, op1, sel, x;
31647 bool one_vec;
31649 target = operands[0];
31650 op0 = operands[1];
31651 op1 = operands[2];
31652 sel = operands[3];
31654 /* Unpack the constant selector. */
31655 for (i = which = 0; i < 16; ++i)
31657 rtx e = XVECEXP (sel, 0, i);
31658 elt = INTVAL (e) & 31;
31659 which |= (elt < 16 ? 1 : 2);
31660 perm[i] = elt;
31663 /* Simplify the constant selector based on operands. */
31664 switch (which)
31666 default:
31667 gcc_unreachable ();
31669 case 3:
31670 one_vec = false;
31671 if (!rtx_equal_p (op0, op1))
31672 break;
31673 /* FALLTHRU */
31675 case 2:
31676 for (i = 0; i < 16; ++i)
31677 perm[i] &= 15;
31678 op0 = op1;
31679 one_vec = true;
31680 break;
31682 case 1:
31683 op1 = op0;
31684 one_vec = true;
31685 break;
31688 /* Look for splat patterns. */
31689 if (one_vec)
31691 elt = perm[0];
31693 for (i = 0; i < 16; ++i)
31694 if (perm[i] != elt)
31695 break;
31696 if (i == 16)
31698 if (!BYTES_BIG_ENDIAN)
31699 elt = 15 - elt;
31700 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31701 return true;
31704 if (elt % 2 == 0)
31706 for (i = 0; i < 16; i += 2)
31707 if (perm[i] != elt || perm[i + 1] != elt + 1)
31708 break;
31709 if (i == 16)
31711 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31712 x = gen_reg_rtx (V8HImode);
31713 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31714 GEN_INT (field)));
31715 emit_move_insn (target, gen_lowpart (V16QImode, x));
31716 return true;
31720 if (elt % 4 == 0)
31722 for (i = 0; i < 16; i += 4)
31723 if (perm[i] != elt
31724 || perm[i + 1] != elt + 1
31725 || perm[i + 2] != elt + 2
31726 || perm[i + 3] != elt + 3)
31727 break;
31728 if (i == 16)
31730 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31731 x = gen_reg_rtx (V4SImode);
31732 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31733 GEN_INT (field)));
31734 emit_move_insn (target, gen_lowpart (V16QImode, x));
31735 return true;
31740 /* Look for merge and pack patterns. */
31741 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31743 bool swapped;
31745 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31746 continue;
31748 elt = patterns[j].perm[0];
31749 if (perm[0] == elt)
31750 swapped = false;
31751 else if (perm[0] == elt + 16)
31752 swapped = true;
31753 else
31754 continue;
31755 for (i = 1; i < 16; ++i)
31757 elt = patterns[j].perm[i];
31758 if (swapped)
31759 elt = (elt >= 16 ? elt - 16 : elt + 16);
31760 else if (one_vec && elt >= 16)
31761 elt -= 16;
31762 if (perm[i] != elt)
31763 break;
31765 if (i == 16)
31767 enum insn_code icode = patterns[j].impl;
31768 machine_mode omode = insn_data[icode].operand[0].mode;
31769 machine_mode imode = insn_data[icode].operand[1].mode;
31771 /* For little-endian, don't use vpkuwum and vpkuhum if the
31772 underlying vector type is not V4SI and V8HI, respectively.
31773 For example, using vpkuwum with a V8HI picks up the even
31774 halfwords (BE numbering) when the even halfwords (LE
31775 numbering) are what we need. */
31776 if (!BYTES_BIG_ENDIAN
31777 && icode == CODE_FOR_altivec_vpkuwum_direct
31778 && ((GET_CODE (op0) == REG
31779 && GET_MODE (op0) != V4SImode)
31780 || (GET_CODE (op0) == SUBREG
31781 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31782 continue;
31783 if (!BYTES_BIG_ENDIAN
31784 && icode == CODE_FOR_altivec_vpkuhum_direct
31785 && ((GET_CODE (op0) == REG
31786 && GET_MODE (op0) != V8HImode)
31787 || (GET_CODE (op0) == SUBREG
31788 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31789 continue;
31791 /* For little-endian, the two input operands must be swapped
31792 (or swapped back) to ensure proper right-to-left numbering
31793 from 0 to 2N-1. */
31794 if (swapped ^ !BYTES_BIG_ENDIAN)
31795 std::swap (op0, op1);
31796 if (imode != V16QImode)
31798 op0 = gen_lowpart (imode, op0);
31799 op1 = gen_lowpart (imode, op1);
31801 if (omode == V16QImode)
31802 x = target;
31803 else
31804 x = gen_reg_rtx (omode);
31805 emit_insn (GEN_FCN (icode) (x, op0, op1));
31806 if (omode != V16QImode)
31807 emit_move_insn (target, gen_lowpart (V16QImode, x));
31808 return true;
31812 if (!BYTES_BIG_ENDIAN)
31814 altivec_expand_vec_perm_const_le (operands);
31815 return true;
31818 return false;
31821 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31822 Return true if we match an efficient implementation. */
31824 static bool
31825 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31826 unsigned char perm0, unsigned char perm1)
31828 rtx x;
31830 /* If both selectors come from the same operand, fold to single op. */
31831 if ((perm0 & 2) == (perm1 & 2))
31833 if (perm0 & 2)
31834 op0 = op1;
31835 else
31836 op1 = op0;
31838 /* If both operands are equal, fold to simpler permutation. */
31839 if (rtx_equal_p (op0, op1))
31841 perm0 = perm0 & 1;
31842 perm1 = (perm1 & 1) + 2;
31844 /* If the first selector comes from the second operand, swap. */
31845 else if (perm0 & 2)
31847 if (perm1 & 2)
31848 return false;
31849 perm0 -= 2;
31850 perm1 += 2;
31851 std::swap (op0, op1);
31853 /* If the second selector does not come from the second operand, fail. */
31854 else if ((perm1 & 2) == 0)
31855 return false;
31857 /* Success! */
31858 if (target != NULL)
31860 machine_mode vmode, dmode;
31861 rtvec v;
31863 vmode = GET_MODE (target);
31864 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31865 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31866 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31867 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31868 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31869 emit_insn (gen_rtx_SET (target, x));
31871 return true;
31874 bool
31875 rs6000_expand_vec_perm_const (rtx operands[4])
31877 rtx target, op0, op1, sel;
31878 unsigned char perm0, perm1;
31880 target = operands[0];
31881 op0 = operands[1];
31882 op1 = operands[2];
31883 sel = operands[3];
31885 /* Unpack the constant selector. */
31886 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31887 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31889 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31892 /* Test whether a constant permutation is supported. */
31894 static bool
31895 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31896 const unsigned char *sel)
31898 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31899 if (TARGET_ALTIVEC)
31900 return true;
31902 /* Check for ps_merge* or evmerge* insns. */
31903 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31904 || (TARGET_SPE && vmode == V2SImode))
31906 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31907 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31908 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31911 return false;
31914 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31916 static void
31917 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31918 machine_mode vmode, unsigned nelt, rtx perm[])
31920 machine_mode imode;
31921 rtx x;
31923 imode = vmode;
31924 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31926 imode = GET_MODE_INNER (vmode);
31927 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31928 imode = mode_for_vector (imode, nelt);
31931 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31932 x = expand_vec_perm (vmode, op0, op1, x, target);
31933 if (x != target)
31934 emit_move_insn (target, x);
31937 /* Expand an extract even operation. */
31939 void
31940 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31942 machine_mode vmode = GET_MODE (target);
31943 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31944 rtx perm[16];
31946 for (i = 0; i < nelt; i++)
31947 perm[i] = GEN_INT (i * 2);
31949 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31952 /* Expand a vector interleave operation. */
31954 void
31955 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31957 machine_mode vmode = GET_MODE (target);
31958 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31959 rtx perm[16];
31961 high = (highp ? 0 : nelt / 2);
31962 for (i = 0; i < nelt / 2; i++)
31964 perm[i * 2] = GEN_INT (i + high);
31965 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31968 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31971 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31972 void
31973 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31975 HOST_WIDE_INT hwi_scale (scale);
31976 REAL_VALUE_TYPE r_pow;
31977 rtvec v = rtvec_alloc (2);
31978 rtx elt;
31979 rtx scale_vec = gen_reg_rtx (V2DFmode);
31980 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31981 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31982 RTVEC_ELT (v, 0) = elt;
31983 RTVEC_ELT (v, 1) = elt;
31984 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31985 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31988 /* Return an RTX representing where to find the function value of a
31989 function returning MODE. */
31990 static rtx
31991 rs6000_complex_function_value (machine_mode mode)
31993 unsigned int regno;
31994 rtx r1, r2;
31995 machine_mode inner = GET_MODE_INNER (mode);
31996 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31998 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31999 regno = FP_ARG_RETURN;
32000 else
32002 regno = GP_ARG_RETURN;
32004 /* 32-bit is OK since it'll go in r3/r4. */
32005 if (TARGET_32BIT && inner_bytes >= 4)
32006 return gen_rtx_REG (mode, regno);
32009 if (inner_bytes >= 8)
32010 return gen_rtx_REG (mode, regno);
32012 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
32013 const0_rtx);
32014 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
32015 GEN_INT (inner_bytes));
32016 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
32019 /* Return an rtx describing a return value of MODE as a PARALLEL
32020 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
32021 stride REG_STRIDE. */
32023 static rtx
32024 rs6000_parallel_return (machine_mode mode,
32025 int n_elts, machine_mode elt_mode,
32026 unsigned int regno, unsigned int reg_stride)
32028 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
32030 int i;
32031 for (i = 0; i < n_elts; i++)
32033 rtx r = gen_rtx_REG (elt_mode, regno);
32034 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
32035 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
32036 regno += reg_stride;
32039 return par;
32042 /* Target hook for TARGET_FUNCTION_VALUE.
32044 On the SPE, both FPs and vectors are returned in r3.
32046 On RS/6000 an integer value is in r3 and a floating-point value is in
32047 fp1, unless -msoft-float. */
32049 static rtx
32050 rs6000_function_value (const_tree valtype,
32051 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
32052 bool outgoing ATTRIBUTE_UNUSED)
32054 machine_mode mode;
32055 unsigned int regno;
32056 machine_mode elt_mode;
32057 int n_elts;
32059 /* Special handling for structs in darwin64. */
32060 if (TARGET_MACHO
32061 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
32063 CUMULATIVE_ARGS valcum;
32064 rtx valret;
32066 valcum.words = 0;
32067 valcum.fregno = FP_ARG_MIN_REG;
32068 valcum.vregno = ALTIVEC_ARG_MIN_REG;
32069 /* Do a trial code generation as if this were going to be passed as
32070 an argument; if any part goes in memory, we return NULL. */
32071 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
32072 if (valret)
32073 return valret;
32074 /* Otherwise fall through to standard ABI rules. */
32077 mode = TYPE_MODE (valtype);
32079 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
32080 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
32082 int first_reg, n_regs;
32084 if (SCALAR_FLOAT_MODE_P (elt_mode))
32086 /* _Decimal128 must use even/odd register pairs. */
32087 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
32088 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
32090 else
32092 first_reg = ALTIVEC_ARG_RETURN;
32093 n_regs = 1;
32096 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
32099 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
32100 if (TARGET_32BIT && TARGET_POWERPC64)
32101 switch (mode)
32103 default:
32104 break;
32105 case DImode:
32106 case SCmode:
32107 case DCmode:
32108 case TCmode:
32109 int count = GET_MODE_SIZE (mode) / 4;
32110 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
32113 if ((INTEGRAL_TYPE_P (valtype)
32114 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
32115 || POINTER_TYPE_P (valtype))
32116 mode = TARGET_32BIT ? SImode : DImode;
32118 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
32119 /* _Decimal128 must use an even/odd register pair. */
32120 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
32121 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
32122 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
32123 regno = FP_ARG_RETURN;
32124 else if (TREE_CODE (valtype) == COMPLEX_TYPE
32125 && targetm.calls.split_complex_arg)
32126 return rs6000_complex_function_value (mode);
32127 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
32128 return register is used in both cases, and we won't see V2DImode/V2DFmode
32129 for pure altivec, combine the two cases. */
32130 else if (TREE_CODE (valtype) == VECTOR_TYPE
32131 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
32132 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
32133 regno = ALTIVEC_ARG_RETURN;
32134 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
32135 && (mode == DFmode || mode == DCmode
32136 || mode == TFmode || mode == TCmode))
32137 return spe_build_register_parallel (mode, GP_ARG_RETURN);
32138 else
32139 regno = GP_ARG_RETURN;
32141 return gen_rtx_REG (mode, regno);
32144 /* Define how to find the value returned by a library function
32145 assuming the value has mode MODE. */
32147 rs6000_libcall_value (machine_mode mode)
32149 unsigned int regno;
32151 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
32152 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
32153 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
32155 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
32156 /* _Decimal128 must use an even/odd register pair. */
32157 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
32158 else if (SCALAR_FLOAT_MODE_P (mode)
32159 && TARGET_HARD_FLOAT && TARGET_FPRS
32160 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
32161 regno = FP_ARG_RETURN;
32162 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
32163 return register is used in both cases, and we won't see V2DImode/V2DFmode
32164 for pure altivec, combine the two cases. */
32165 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
32166 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
32167 regno = ALTIVEC_ARG_RETURN;
32168 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
32169 return rs6000_complex_function_value (mode);
32170 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
32171 && (mode == DFmode || mode == DCmode
32172 || mode == TFmode || mode == TCmode))
32173 return spe_build_register_parallel (mode, GP_ARG_RETURN);
32174 else
32175 regno = GP_ARG_RETURN;
32177 return gen_rtx_REG (mode, regno);
32181 /* Return true if we use LRA instead of reload pass. */
32182 static bool
32183 rs6000_lra_p (void)
32185 return rs6000_lra_flag;
32188 /* Given FROM and TO register numbers, say whether this elimination is allowed.
32189 Frame pointer elimination is automatically handled.
32191 For the RS/6000, if frame pointer elimination is being done, we would like
32192 to convert ap into fp, not sp.
32194 We need r30 if -mminimal-toc was specified, and there are constant pool
32195 references. */
32197 static bool
32198 rs6000_can_eliminate (const int from, const int to)
32200 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
32201 ? ! frame_pointer_needed
32202 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
32203 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
32204 : true);
32207 /* Define the offset between two registers, FROM to be eliminated and its
32208 replacement TO, at the start of a routine. */
32209 HOST_WIDE_INT
32210 rs6000_initial_elimination_offset (int from, int to)
32212 rs6000_stack_t *info = rs6000_stack_info ();
32213 HOST_WIDE_INT offset;
32215 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
32216 offset = info->push_p ? 0 : -info->total_size;
32217 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
32219 offset = info->push_p ? 0 : -info->total_size;
32220 if (FRAME_GROWS_DOWNWARD)
32221 offset += info->fixed_size + info->vars_size + info->parm_size;
32223 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
32224 offset = FRAME_GROWS_DOWNWARD
32225 ? info->fixed_size + info->vars_size + info->parm_size
32226 : 0;
32227 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
32228 offset = info->total_size;
32229 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
32230 offset = info->push_p ? info->total_size : 0;
32231 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
32232 offset = 0;
32233 else
32234 gcc_unreachable ();
32236 return offset;
32239 static rtx
32240 rs6000_dwarf_register_span (rtx reg)
32242 rtx parts[8];
32243 int i, words;
32244 unsigned regno = REGNO (reg);
32245 machine_mode mode = GET_MODE (reg);
32247 if (TARGET_SPE
32248 && regno < 32
32249 && (SPE_VECTOR_MODE (GET_MODE (reg))
32250 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
32251 && mode != SFmode && mode != SDmode && mode != SCmode)))
32253 else
32254 return NULL_RTX;
32256 regno = REGNO (reg);
32258 /* The duality of the SPE register size wreaks all kinds of havoc.
32259 This is a way of distinguishing r0 in 32-bits from r0 in
32260 64-bits. */
32261 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
32262 gcc_assert (words <= 4);
32263 for (i = 0; i < words; i++, regno++)
32265 if (BYTES_BIG_ENDIAN)
32267 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
32268 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
32270 else
32272 parts[2 * i] = gen_rtx_REG (SImode, regno);
32273 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
32277 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
32280 /* Fill in sizes for SPE register high parts in table used by unwinder. */
32282 static void
32283 rs6000_init_dwarf_reg_sizes_extra (tree address)
32285 if (TARGET_SPE)
32287 int i;
32288 machine_mode mode = TYPE_MODE (char_type_node);
32289 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
32290 rtx mem = gen_rtx_MEM (BLKmode, addr);
32291 rtx value = gen_int_mode (4, mode);
32293 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
32295 int column = DWARF_REG_TO_UNWIND_COLUMN
32296 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
32297 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
32299 emit_move_insn (adjust_address (mem, mode, offset), value);
32303 if (TARGET_MACHO && ! TARGET_ALTIVEC)
32305 int i;
32306 machine_mode mode = TYPE_MODE (char_type_node);
32307 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
32308 rtx mem = gen_rtx_MEM (BLKmode, addr);
32309 rtx value = gen_int_mode (16, mode);
32311 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
32312 The unwinder still needs to know the size of Altivec registers. */
32314 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
32316 int column = DWARF_REG_TO_UNWIND_COLUMN
32317 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
32318 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
32320 emit_move_insn (adjust_address (mem, mode, offset), value);
32325 /* Map internal gcc register numbers to debug format register numbers.
32326 FORMAT specifies the type of debug register number to use:
32327 0 -- debug information, except for frame-related sections
32328 1 -- DWARF .debug_frame section
32329 2 -- DWARF .eh_frame section */
32331 unsigned int
32332 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
32334 /* We never use the GCC internal number for SPE high registers.
32335 Those are mapped to the 1200..1231 range for all debug formats. */
32336 if (SPE_HIGH_REGNO_P (regno))
32337 return regno - FIRST_SPE_HIGH_REGNO + 1200;
32339 /* Except for the above, we use the internal number for non-DWARF
32340 debug information, and also for .eh_frame. */
32341 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
32342 return regno;
32344 /* On some platforms, we use the standard DWARF register
32345 numbering for .debug_info and .debug_frame. */
32346 #ifdef RS6000_USE_DWARF_NUMBERING
32347 if (regno <= 63)
32348 return regno;
32349 if (regno == LR_REGNO)
32350 return 108;
32351 if (regno == CTR_REGNO)
32352 return 109;
32353 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
32354 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
32355 The actual code emitted saves the whole of CR, so we map CR2_REGNO
32356 to the DWARF reg for CR. */
32357 if (format == 1 && regno == CR2_REGNO)
32358 return 64;
32359 if (CR_REGNO_P (regno))
32360 return regno - CR0_REGNO + 86;
32361 if (regno == CA_REGNO)
32362 return 101; /* XER */
32363 if (ALTIVEC_REGNO_P (regno))
32364 return regno - FIRST_ALTIVEC_REGNO + 1124;
32365 if (regno == VRSAVE_REGNO)
32366 return 356;
32367 if (regno == VSCR_REGNO)
32368 return 67;
32369 if (regno == SPE_ACC_REGNO)
32370 return 99;
32371 if (regno == SPEFSCR_REGNO)
32372 return 612;
32373 #endif
32374 return regno;
32377 /* target hook eh_return_filter_mode */
32378 static machine_mode
32379 rs6000_eh_return_filter_mode (void)
32381 return TARGET_32BIT ? SImode : word_mode;
32384 /* Target hook for scalar_mode_supported_p. */
32385 static bool
32386 rs6000_scalar_mode_supported_p (machine_mode mode)
32388 /* -m32 does not support TImode. This is the default, from
32389 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
32390 same ABI as for -m32. But default_scalar_mode_supported_p allows
32391 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
32392 for -mpowerpc64. */
32393 if (TARGET_32BIT && mode == TImode)
32394 return false;
32396 if (DECIMAL_FLOAT_MODE_P (mode))
32397 return default_decimal_float_supported_p ();
32398 else
32399 return default_scalar_mode_supported_p (mode);
32402 /* Target hook for vector_mode_supported_p. */
32403 static bool
32404 rs6000_vector_mode_supported_p (machine_mode mode)
32407 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
32408 return true;
32410 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
32411 return true;
32413 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
32414 return true;
32416 else
32417 return false;
32420 /* Target hook for invalid_arg_for_unprototyped_fn. */
32421 static const char *
32422 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
32424 return (!rs6000_darwin64_abi
32425 && typelist == 0
32426 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
32427 && (funcdecl == NULL_TREE
32428 || (TREE_CODE (funcdecl) == FUNCTION_DECL
32429 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
32430 ? N_("AltiVec argument passed to unprototyped function")
32431 : NULL;
32434 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
32435 setup by using __stack_chk_fail_local hidden function instead of
32436 calling __stack_chk_fail directly. Otherwise it is better to call
32437 __stack_chk_fail directly. */
32439 static tree ATTRIBUTE_UNUSED
32440 rs6000_stack_protect_fail (void)
32442 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
32443 ? default_hidden_stack_protect_fail ()
32444 : default_external_stack_protect_fail ();
32447 void
32448 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
32449 int num_operands ATTRIBUTE_UNUSED)
32451 if (rs6000_warn_cell_microcode)
32453 const char *temp;
32454 int insn_code_number = recog_memoized (insn);
32455 location_t location = INSN_LOCATION (insn);
32457 /* Punt on insns we cannot recognize. */
32458 if (insn_code_number < 0)
32459 return;
32461 temp = get_insn_template (insn_code_number, insn);
32463 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
32464 warning_at (location, OPT_mwarn_cell_microcode,
32465 "emitting microcode insn %s\t[%s] #%d",
32466 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32467 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
32468 warning_at (location, OPT_mwarn_cell_microcode,
32469 "emitting conditional microcode insn %s\t[%s] #%d",
32470 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
32474 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
32476 #if TARGET_ELF
32477 static unsigned HOST_WIDE_INT
32478 rs6000_asan_shadow_offset (void)
32480 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
32482 #endif
32484 /* Mask options that we want to support inside of attribute((target)) and
32485 #pragma GCC target operations. Note, we do not include things like
32486 64/32-bit, endianess, hard/soft floating point, etc. that would have
32487 different calling sequences. */
32489 struct rs6000_opt_mask {
32490 const char *name; /* option name */
32491 HOST_WIDE_INT mask; /* mask to set */
32492 bool invert; /* invert sense of mask */
32493 bool valid_target; /* option is a target option */
32496 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32498 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32499 { "cmpb", OPTION_MASK_CMPB, false, true },
32500 { "crypto", OPTION_MASK_CRYPTO, false, true },
32501 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32502 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32503 { "fprnd", OPTION_MASK_FPRND, false, true },
32504 { "hard-dfp", OPTION_MASK_DFP, false, true },
32505 { "htm", OPTION_MASK_HTM, false, true },
32506 { "isel", OPTION_MASK_ISEL, false, true },
32507 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32508 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32509 { "mulhw", OPTION_MASK_MULHW, false, true },
32510 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32511 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32512 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32513 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32514 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32515 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32516 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32517 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32518 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32519 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32520 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32521 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
32522 { "string", OPTION_MASK_STRING, false, true },
32523 { "update", OPTION_MASK_NO_UPDATE, true , true },
32524 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
32525 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
32526 { "vsx", OPTION_MASK_VSX, false, true },
32527 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32528 #ifdef OPTION_MASK_64BIT
32529 #if TARGET_AIX_OS
32530 { "aix64", OPTION_MASK_64BIT, false, false },
32531 { "aix32", OPTION_MASK_64BIT, true, false },
32532 #else
32533 { "64", OPTION_MASK_64BIT, false, false },
32534 { "32", OPTION_MASK_64BIT, true, false },
32535 #endif
32536 #endif
32537 #ifdef OPTION_MASK_EABI
32538 { "eabi", OPTION_MASK_EABI, false, false },
32539 #endif
32540 #ifdef OPTION_MASK_LITTLE_ENDIAN
32541 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32542 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32543 #endif
32544 #ifdef OPTION_MASK_RELOCATABLE
32545 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32546 #endif
32547 #ifdef OPTION_MASK_STRICT_ALIGN
32548 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32549 #endif
32550 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32551 { "string", OPTION_MASK_STRING, false, false },
32554 /* Builtin mask mapping for printing the flags. */
32555 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32557 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32558 { "vsx", RS6000_BTM_VSX, false, false },
32559 { "spe", RS6000_BTM_SPE, false, false },
32560 { "paired", RS6000_BTM_PAIRED, false, false },
32561 { "fre", RS6000_BTM_FRE, false, false },
32562 { "fres", RS6000_BTM_FRES, false, false },
32563 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32564 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32565 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32566 { "cell", RS6000_BTM_CELL, false, false },
32567 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32568 { "crypto", RS6000_BTM_CRYPTO, false, false },
32569 { "htm", RS6000_BTM_HTM, false, false },
32570 { "hard-dfp", RS6000_BTM_DFP, false, false },
32571 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32572 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32575 /* Option variables that we want to support inside attribute((target)) and
32576 #pragma GCC target operations. */
32578 struct rs6000_opt_var {
32579 const char *name; /* option name */
32580 size_t global_offset; /* offset of the option in global_options. */
32581 size_t target_offset; /* offset of the option in target optiosn. */
32584 static struct rs6000_opt_var const rs6000_opt_vars[] =
32586 { "friz",
32587 offsetof (struct gcc_options, x_TARGET_FRIZ),
32588 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32589 { "avoid-indexed-addresses",
32590 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32591 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32592 { "paired",
32593 offsetof (struct gcc_options, x_rs6000_paired_float),
32594 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32595 { "longcall",
32596 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32597 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32598 { "optimize-swaps",
32599 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
32600 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
32601 { "allow-movmisalign",
32602 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
32603 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
32604 { "allow-df-permute",
32605 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
32606 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
32607 { "sched-groups",
32608 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
32609 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
32610 { "always-hint",
32611 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
32612 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
32613 { "align-branch-targets",
32614 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
32615 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
32616 { "vectorize-builtins",
32617 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
32618 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
32619 { "tls-markers",
32620 offsetof (struct gcc_options, x_tls_markers),
32621 offsetof (struct cl_target_option, x_tls_markers), },
32622 { "sched-prolog",
32623 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
32624 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
32625 { "sched-epilog",
32626 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
32627 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
32628 { "gen-cell-microcode",
32629 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
32630 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
32631 { "warn-cell-microcode",
32632 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
32633 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
32636 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32637 parsing. Return true if there were no errors. */
32639 static bool
32640 rs6000_inner_target_options (tree args, bool attr_p)
32642 bool ret = true;
32644 if (args == NULL_TREE)
32647 else if (TREE_CODE (args) == STRING_CST)
32649 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32650 char *q;
32652 while ((q = strtok (p, ",")) != NULL)
32654 bool error_p = false;
32655 bool not_valid_p = false;
32656 const char *cpu_opt = NULL;
32658 p = NULL;
32659 if (strncmp (q, "cpu=", 4) == 0)
32661 int cpu_index = rs6000_cpu_name_lookup (q+4);
32662 if (cpu_index >= 0)
32663 rs6000_cpu_index = cpu_index;
32664 else
32666 error_p = true;
32667 cpu_opt = q+4;
32670 else if (strncmp (q, "tune=", 5) == 0)
32672 int tune_index = rs6000_cpu_name_lookup (q+5);
32673 if (tune_index >= 0)
32674 rs6000_tune_index = tune_index;
32675 else
32677 error_p = true;
32678 cpu_opt = q+5;
32681 else
32683 size_t i;
32684 bool invert = false;
32685 char *r = q;
32687 error_p = true;
32688 if (strncmp (r, "no-", 3) == 0)
32690 invert = true;
32691 r += 3;
32694 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32695 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32697 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32699 if (!rs6000_opt_masks[i].valid_target)
32700 not_valid_p = true;
32701 else
32703 error_p = false;
32704 rs6000_isa_flags_explicit |= mask;
32706 /* VSX needs altivec, so -mvsx automagically sets
32707 altivec and disables -mavoid-indexed-addresses. */
32708 if (!invert)
32710 if (mask == OPTION_MASK_VSX)
32712 mask |= OPTION_MASK_ALTIVEC;
32713 TARGET_AVOID_XFORM = 0;
32717 if (rs6000_opt_masks[i].invert)
32718 invert = !invert;
32720 if (invert)
32721 rs6000_isa_flags &= ~mask;
32722 else
32723 rs6000_isa_flags |= mask;
32725 break;
32728 if (error_p && !not_valid_p)
32730 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32731 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32733 size_t j = rs6000_opt_vars[i].global_offset;
32734 *((int *) ((char *)&global_options + j)) = !invert;
32735 error_p = false;
32736 not_valid_p = false;
32737 break;
32742 if (error_p)
32744 const char *eprefix, *esuffix;
32746 ret = false;
32747 if (attr_p)
32749 eprefix = "__attribute__((__target__(";
32750 esuffix = ")))";
32752 else
32754 eprefix = "#pragma GCC target ";
32755 esuffix = "";
32758 if (cpu_opt)
32759 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32760 q, esuffix);
32761 else if (not_valid_p)
32762 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32763 else
32764 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32769 else if (TREE_CODE (args) == TREE_LIST)
32773 tree value = TREE_VALUE (args);
32774 if (value)
32776 bool ret2 = rs6000_inner_target_options (value, attr_p);
32777 if (!ret2)
32778 ret = false;
32780 args = TREE_CHAIN (args);
32782 while (args != NULL_TREE);
32785 else
32786 gcc_unreachable ();
32788 return ret;
32791 /* Print out the target options as a list for -mdebug=target. */
32793 static void
32794 rs6000_debug_target_options (tree args, const char *prefix)
32796 if (args == NULL_TREE)
32797 fprintf (stderr, "%s<NULL>", prefix);
32799 else if (TREE_CODE (args) == STRING_CST)
32801 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32802 char *q;
32804 while ((q = strtok (p, ",")) != NULL)
32806 p = NULL;
32807 fprintf (stderr, "%s\"%s\"", prefix, q);
32808 prefix = ", ";
32812 else if (TREE_CODE (args) == TREE_LIST)
32816 tree value = TREE_VALUE (args);
32817 if (value)
32819 rs6000_debug_target_options (value, prefix);
32820 prefix = ", ";
32822 args = TREE_CHAIN (args);
32824 while (args != NULL_TREE);
32827 else
32828 gcc_unreachable ();
32830 return;
32834 /* Hook to validate attribute((target("..."))). */
32836 static bool
32837 rs6000_valid_attribute_p (tree fndecl,
32838 tree ARG_UNUSED (name),
32839 tree args,
32840 int flags)
32842 struct cl_target_option cur_target;
32843 bool ret;
32844 tree old_optimize = build_optimization_node (&global_options);
32845 tree new_target, new_optimize;
32846 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32848 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32850 if (TARGET_DEBUG_TARGET)
32852 tree tname = DECL_NAME (fndecl);
32853 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32854 if (tname)
32855 fprintf (stderr, "function: %.*s\n",
32856 (int) IDENTIFIER_LENGTH (tname),
32857 IDENTIFIER_POINTER (tname));
32858 else
32859 fprintf (stderr, "function: unknown\n");
32861 fprintf (stderr, "args:");
32862 rs6000_debug_target_options (args, " ");
32863 fprintf (stderr, "\n");
32865 if (flags)
32866 fprintf (stderr, "flags: 0x%x\n", flags);
32868 fprintf (stderr, "--------------------\n");
32871 old_optimize = build_optimization_node (&global_options);
32872 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32874 /* If the function changed the optimization levels as well as setting target
32875 options, start with the optimizations specified. */
32876 if (func_optimize && func_optimize != old_optimize)
32877 cl_optimization_restore (&global_options,
32878 TREE_OPTIMIZATION (func_optimize));
32880 /* The target attributes may also change some optimization flags, so update
32881 the optimization options if necessary. */
32882 cl_target_option_save (&cur_target, &global_options);
32883 rs6000_cpu_index = rs6000_tune_index = -1;
32884 ret = rs6000_inner_target_options (args, true);
32886 /* Set up any additional state. */
32887 if (ret)
32889 ret = rs6000_option_override_internal (false);
32890 new_target = build_target_option_node (&global_options);
32892 else
32893 new_target = NULL;
32895 new_optimize = build_optimization_node (&global_options);
32897 if (!new_target)
32898 ret = false;
32900 else if (fndecl)
32902 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32904 if (old_optimize != new_optimize)
32905 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32908 cl_target_option_restore (&global_options, &cur_target);
32910 if (old_optimize != new_optimize)
32911 cl_optimization_restore (&global_options,
32912 TREE_OPTIMIZATION (old_optimize));
32914 return ret;
32918 /* Hook to validate the current #pragma GCC target and set the state, and
32919 update the macros based on what was changed. If ARGS is NULL, then
32920 POP_TARGET is used to reset the options. */
32922 bool
32923 rs6000_pragma_target_parse (tree args, tree pop_target)
32925 tree prev_tree = build_target_option_node (&global_options);
32926 tree cur_tree;
32927 struct cl_target_option *prev_opt, *cur_opt;
32928 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32929 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32931 if (TARGET_DEBUG_TARGET)
32933 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32934 fprintf (stderr, "args:");
32935 rs6000_debug_target_options (args, " ");
32936 fprintf (stderr, "\n");
32938 if (pop_target)
32940 fprintf (stderr, "pop_target:\n");
32941 debug_tree (pop_target);
32943 else
32944 fprintf (stderr, "pop_target: <NULL>\n");
32946 fprintf (stderr, "--------------------\n");
32949 if (! args)
32951 cur_tree = ((pop_target)
32952 ? pop_target
32953 : target_option_default_node);
32954 cl_target_option_restore (&global_options,
32955 TREE_TARGET_OPTION (cur_tree));
32957 else
32959 rs6000_cpu_index = rs6000_tune_index = -1;
32960 if (!rs6000_inner_target_options (args, false)
32961 || !rs6000_option_override_internal (false)
32962 || (cur_tree = build_target_option_node (&global_options))
32963 == NULL_TREE)
32965 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32966 fprintf (stderr, "invalid pragma\n");
32968 return false;
32972 target_option_current_node = cur_tree;
32974 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32975 change the macros that are defined. */
32976 if (rs6000_target_modify_macros_ptr)
32978 prev_opt = TREE_TARGET_OPTION (prev_tree);
32979 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32980 prev_flags = prev_opt->x_rs6000_isa_flags;
32982 cur_opt = TREE_TARGET_OPTION (cur_tree);
32983 cur_flags = cur_opt->x_rs6000_isa_flags;
32984 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32986 diff_bumask = (prev_bumask ^ cur_bumask);
32987 diff_flags = (prev_flags ^ cur_flags);
32989 if ((diff_flags != 0) || (diff_bumask != 0))
32991 /* Delete old macros. */
32992 rs6000_target_modify_macros_ptr (false,
32993 prev_flags & diff_flags,
32994 prev_bumask & diff_bumask);
32996 /* Define new macros. */
32997 rs6000_target_modify_macros_ptr (true,
32998 cur_flags & diff_flags,
32999 cur_bumask & diff_bumask);
33003 return true;
33007 /* Remember the last target of rs6000_set_current_function. */
33008 static GTY(()) tree rs6000_previous_fndecl;
33010 /* Establish appropriate back-end context for processing the function
33011 FNDECL. The argument might be NULL to indicate processing at top
33012 level, outside of any function scope. */
33013 static void
33014 rs6000_set_current_function (tree fndecl)
33016 tree old_tree = (rs6000_previous_fndecl
33017 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
33018 : NULL_TREE);
33020 tree new_tree = (fndecl
33021 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
33022 : NULL_TREE);
33024 if (TARGET_DEBUG_TARGET)
33026 bool print_final = false;
33027 fprintf (stderr, "\n==================== rs6000_set_current_function");
33029 if (fndecl)
33030 fprintf (stderr, ", fndecl %s (%p)",
33031 (DECL_NAME (fndecl)
33032 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
33033 : "<unknown>"), (void *)fndecl);
33035 if (rs6000_previous_fndecl)
33036 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
33038 fprintf (stderr, "\n");
33039 if (new_tree)
33041 fprintf (stderr, "\nnew fndecl target specific options:\n");
33042 debug_tree (new_tree);
33043 print_final = true;
33046 if (old_tree)
33048 fprintf (stderr, "\nold fndecl target specific options:\n");
33049 debug_tree (old_tree);
33050 print_final = true;
33053 if (print_final)
33054 fprintf (stderr, "--------------------\n");
33057 /* Only change the context if the function changes. This hook is called
33058 several times in the course of compiling a function, and we don't want to
33059 slow things down too much or call target_reinit when it isn't safe. */
33060 if (fndecl && fndecl != rs6000_previous_fndecl)
33062 rs6000_previous_fndecl = fndecl;
33063 if (old_tree == new_tree)
33066 else if (new_tree && new_tree != target_option_default_node)
33068 cl_target_option_restore (&global_options,
33069 TREE_TARGET_OPTION (new_tree));
33070 if (TREE_TARGET_GLOBALS (new_tree))
33071 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33072 else
33073 TREE_TARGET_GLOBALS (new_tree)
33074 = save_target_globals_default_opts ();
33077 else if (old_tree && old_tree != target_option_default_node)
33079 new_tree = target_option_current_node;
33080 cl_target_option_restore (&global_options,
33081 TREE_TARGET_OPTION (new_tree));
33082 if (TREE_TARGET_GLOBALS (new_tree))
33083 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33084 else if (new_tree == target_option_default_node)
33085 restore_target_globals (&default_target_globals);
33086 else
33087 TREE_TARGET_GLOBALS (new_tree)
33088 = save_target_globals_default_opts ();
33094 /* Save the current options */
33096 static void
33097 rs6000_function_specific_save (struct cl_target_option *ptr,
33098 struct gcc_options *opts)
33100 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
33101 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
33104 /* Restore the current options */
33106 static void
33107 rs6000_function_specific_restore (struct gcc_options *opts,
33108 struct cl_target_option *ptr)
33111 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
33112 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
33113 (void) rs6000_option_override_internal (false);
33116 /* Print the current options */
33118 static void
33119 rs6000_function_specific_print (FILE *file, int indent,
33120 struct cl_target_option *ptr)
33122 rs6000_print_isa_options (file, indent, "Isa options set",
33123 ptr->x_rs6000_isa_flags);
33125 rs6000_print_isa_options (file, indent, "Isa options explicit",
33126 ptr->x_rs6000_isa_flags_explicit);
33129 /* Helper function to print the current isa or misc options on a line. */
33131 static void
33132 rs6000_print_options_internal (FILE *file,
33133 int indent,
33134 const char *string,
33135 HOST_WIDE_INT flags,
33136 const char *prefix,
33137 const struct rs6000_opt_mask *opts,
33138 size_t num_elements)
33140 size_t i;
33141 size_t start_column = 0;
33142 size_t cur_column;
33143 size_t max_column = 76;
33144 const char *comma = "";
33146 if (indent)
33147 start_column += fprintf (file, "%*s", indent, "");
33149 if (!flags)
33151 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
33152 return;
33155 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
33157 /* Print the various mask options. */
33158 cur_column = start_column;
33159 for (i = 0; i < num_elements; i++)
33161 if ((flags & opts[i].mask) != 0)
33163 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
33164 size_t len = (strlen (comma)
33165 + strlen (prefix)
33166 + strlen (no_str)
33167 + strlen (rs6000_opt_masks[i].name));
33169 cur_column += len;
33170 if (cur_column > max_column)
33172 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
33173 cur_column = start_column + len;
33174 comma = "";
33177 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
33178 rs6000_opt_masks[i].name);
33179 flags &= ~ opts[i].mask;
33180 comma = ", ";
33184 fputs ("\n", file);
33187 /* Helper function to print the current isa options on a line. */
33189 static void
33190 rs6000_print_isa_options (FILE *file, int indent, const char *string,
33191 HOST_WIDE_INT flags)
33193 rs6000_print_options_internal (file, indent, string, flags, "-m",
33194 &rs6000_opt_masks[0],
33195 ARRAY_SIZE (rs6000_opt_masks));
33198 static void
33199 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
33200 HOST_WIDE_INT flags)
33202 rs6000_print_options_internal (file, indent, string, flags, "",
33203 &rs6000_builtin_mask_names[0],
33204 ARRAY_SIZE (rs6000_builtin_mask_names));
33208 /* Hook to determine if one function can safely inline another. */
33210 static bool
33211 rs6000_can_inline_p (tree caller, tree callee)
33213 bool ret = false;
33214 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33215 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33217 /* If callee has no option attributes, then it is ok to inline. */
33218 if (!callee_tree)
33219 ret = true;
33221 /* If caller has no option attributes, but callee does then it is not ok to
33222 inline. */
33223 else if (!caller_tree)
33224 ret = false;
33226 else
33228 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
33229 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
33231 /* Callee's options should a subset of the caller's, i.e. a vsx function
33232 can inline an altivec function but a non-vsx function can't inline a
33233 vsx function. */
33234 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
33235 == callee_opts->x_rs6000_isa_flags)
33236 ret = true;
33239 if (TARGET_DEBUG_TARGET)
33240 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
33241 (DECL_NAME (caller)
33242 ? IDENTIFIER_POINTER (DECL_NAME (caller))
33243 : "<unknown>"),
33244 (DECL_NAME (callee)
33245 ? IDENTIFIER_POINTER (DECL_NAME (callee))
33246 : "<unknown>"),
33247 (ret ? "can" : "cannot"));
33249 return ret;
33252 /* Allocate a stack temp and fixup the address so it meets the particular
33253 memory requirements (either offetable or REG+REG addressing). */
33256 rs6000_allocate_stack_temp (machine_mode mode,
33257 bool offsettable_p,
33258 bool reg_reg_p)
33260 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
33261 rtx addr = XEXP (stack, 0);
33262 int strict_p = (reload_in_progress || reload_completed);
33264 if (!legitimate_indirect_address_p (addr, strict_p))
33266 if (offsettable_p
33267 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
33268 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
33270 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
33271 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
33274 return stack;
33277 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
33278 to such a form to deal with memory reference instructions like STFIWX that
33279 only take reg+reg addressing. */
33282 rs6000_address_for_fpconvert (rtx x)
33284 int strict_p = (reload_in_progress || reload_completed);
33285 rtx addr;
33287 gcc_assert (MEM_P (x));
33288 addr = XEXP (x, 0);
33289 if (! legitimate_indirect_address_p (addr, strict_p)
33290 && ! legitimate_indexed_address_p (addr, strict_p))
33292 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
33294 rtx reg = XEXP (addr, 0);
33295 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
33296 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
33297 gcc_assert (REG_P (reg));
33298 emit_insn (gen_add3_insn (reg, reg, size_rtx));
33299 addr = reg;
33301 else if (GET_CODE (addr) == PRE_MODIFY)
33303 rtx reg = XEXP (addr, 0);
33304 rtx expr = XEXP (addr, 1);
33305 gcc_assert (REG_P (reg));
33306 gcc_assert (GET_CODE (expr) == PLUS);
33307 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
33308 addr = reg;
33311 x = replace_equiv_address (x, copy_addr_to_reg (addr));
33314 return x;
33317 /* Given a memory reference, if it is not in the form for altivec memory
33318 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
33319 convert to the altivec format. */
33322 rs6000_address_for_altivec (rtx x)
33324 gcc_assert (MEM_P (x));
33325 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
33327 rtx addr = XEXP (x, 0);
33328 int strict_p = (reload_in_progress || reload_completed);
33330 if (!legitimate_indexed_address_p (addr, strict_p)
33331 && !legitimate_indirect_address_p (addr, strict_p))
33332 addr = copy_to_mode_reg (Pmode, addr);
33334 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
33335 x = change_address (x, GET_MODE (x), addr);
33338 return x;
33341 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
33343 On the RS/6000, all integer constants are acceptable, most won't be valid
33344 for particular insns, though. Only easy FP constants are acceptable. */
33346 static bool
33347 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
33349 if (TARGET_ELF && tls_referenced_p (x))
33350 return false;
33352 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
33353 || GET_MODE (x) == VOIDmode
33354 || (TARGET_POWERPC64 && mode == DImode)
33355 || easy_fp_constant (x, mode)
33356 || easy_vector_constant (x, mode));
33360 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
33362 static bool
33363 chain_already_loaded (rtx_insn *last)
33365 for (; last != NULL; last = PREV_INSN (last))
33367 if (NONJUMP_INSN_P (last))
33369 rtx patt = PATTERN (last);
33371 if (GET_CODE (patt) == SET)
33373 rtx lhs = XEXP (patt, 0);
33375 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
33376 return true;
33380 return false;
33383 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
33385 void
33386 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
33388 const bool direct_call_p
33389 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
33390 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
33391 rtx toc_load = NULL_RTX;
33392 rtx toc_restore = NULL_RTX;
33393 rtx func_addr;
33394 rtx abi_reg = NULL_RTX;
33395 rtx call[4];
33396 int n_call;
33397 rtx insn;
33399 /* Handle longcall attributes. */
33400 if (INTVAL (cookie) & CALL_LONG)
33401 func_desc = rs6000_longcall_ref (func_desc);
33403 /* Handle indirect calls. */
33404 if (GET_CODE (func_desc) != SYMBOL_REF
33405 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
33407 /* Save the TOC into its reserved slot before the call,
33408 and prepare to restore it after the call. */
33409 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
33410 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
33411 rtx stack_toc_mem = gen_frame_mem (Pmode,
33412 gen_rtx_PLUS (Pmode, stack_ptr,
33413 stack_toc_offset));
33414 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
33415 gen_rtvec (1, stack_toc_offset),
33416 UNSPEC_TOCSLOT);
33417 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
33419 /* Can we optimize saving the TOC in the prologue or
33420 do we need to do it at every call? */
33421 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
33422 cfun->machine->save_toc_in_prologue = true;
33423 else
33425 MEM_VOLATILE_P (stack_toc_mem) = 1;
33426 emit_move_insn (stack_toc_mem, toc_reg);
33429 if (DEFAULT_ABI == ABI_ELFv2)
33431 /* A function pointer in the ELFv2 ABI is just a plain address, but
33432 the ABI requires it to be loaded into r12 before the call. */
33433 func_addr = gen_rtx_REG (Pmode, 12);
33434 emit_move_insn (func_addr, func_desc);
33435 abi_reg = func_addr;
33437 else
33439 /* A function pointer under AIX is a pointer to a data area whose
33440 first word contains the actual address of the function, whose
33441 second word contains a pointer to its TOC, and whose third word
33442 contains a value to place in the static chain register (r11).
33443 Note that if we load the static chain, our "trampoline" need
33444 not have any executable code. */
33446 /* Load up address of the actual function. */
33447 func_desc = force_reg (Pmode, func_desc);
33448 func_addr = gen_reg_rtx (Pmode);
33449 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
33451 /* Prepare to load the TOC of the called function. Note that the
33452 TOC load must happen immediately before the actual call so
33453 that unwinding the TOC registers works correctly. See the
33454 comment in frob_update_context. */
33455 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
33456 rtx func_toc_mem = gen_rtx_MEM (Pmode,
33457 gen_rtx_PLUS (Pmode, func_desc,
33458 func_toc_offset));
33459 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
33461 /* If we have a static chain, load it up. But, if the call was
33462 originally direct, the 3rd word has not been written since no
33463 trampoline has been built, so we ought not to load it, lest we
33464 override a static chain value. */
33465 if (!direct_call_p
33466 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
33467 && !chain_already_loaded (get_current_sequence ()->next->last))
33469 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
33470 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
33471 rtx func_sc_mem = gen_rtx_MEM (Pmode,
33472 gen_rtx_PLUS (Pmode, func_desc,
33473 func_sc_offset));
33474 emit_move_insn (sc_reg, func_sc_mem);
33475 abi_reg = sc_reg;
33479 else
33481 /* Direct calls use the TOC: for local calls, the callee will
33482 assume the TOC register is set; for non-local calls, the
33483 PLT stub needs the TOC register. */
33484 abi_reg = toc_reg;
33485 func_addr = func_desc;
33488 /* Create the call. */
33489 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
33490 if (value != NULL_RTX)
33491 call[0] = gen_rtx_SET (value, call[0]);
33492 n_call = 1;
33494 if (toc_load)
33495 call[n_call++] = toc_load;
33496 if (toc_restore)
33497 call[n_call++] = toc_restore;
33499 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
33501 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
33502 insn = emit_call_insn (insn);
33504 /* Mention all registers defined by the ABI to hold information
33505 as uses in CALL_INSN_FUNCTION_USAGE. */
33506 if (abi_reg)
33507 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
33510 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
33512 void
33513 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
33515 rtx call[2];
33516 rtx insn;
33518 gcc_assert (INTVAL (cookie) == 0);
33520 /* Create the call. */
33521 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
33522 if (value != NULL_RTX)
33523 call[0] = gen_rtx_SET (value, call[0]);
33525 call[1] = simple_return_rtx;
33527 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
33528 insn = emit_call_insn (insn);
33530 /* Note use of the TOC register. */
33531 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
33532 /* We need to also mark a use of the link register since the function we
33533 sibling-call to will use it to return to our caller. */
33534 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
33537 /* Return whether we need to always update the saved TOC pointer when we update
33538 the stack pointer. */
33540 static bool
33541 rs6000_save_toc_in_prologue_p (void)
33543 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
33546 #ifdef HAVE_GAS_HIDDEN
33547 # define USE_HIDDEN_LINKONCE 1
33548 #else
33549 # define USE_HIDDEN_LINKONCE 0
33550 #endif
33552 /* Fills in the label name that should be used for a 476 link stack thunk. */
33554 void
33555 get_ppc476_thunk_name (char name[32])
33557 gcc_assert (TARGET_LINK_STACK);
33559 if (USE_HIDDEN_LINKONCE)
33560 sprintf (name, "__ppc476.get_thunk");
33561 else
33562 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33565 /* This function emits the simple thunk routine that is used to preserve
33566 the link stack on the 476 cpu. */
33568 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33569 static void
33570 rs6000_code_end (void)
33572 char name[32];
33573 tree decl;
33575 if (!TARGET_LINK_STACK)
33576 return;
33578 get_ppc476_thunk_name (name);
33580 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33581 build_function_type_list (void_type_node, NULL_TREE));
33582 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33583 NULL_TREE, void_type_node);
33584 TREE_PUBLIC (decl) = 1;
33585 TREE_STATIC (decl) = 1;
33587 #if RS6000_WEAK
33588 if (USE_HIDDEN_LINKONCE)
33590 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33591 targetm.asm_out.unique_section (decl, 0);
33592 switch_to_section (get_named_section (decl, NULL, 0));
33593 DECL_WEAK (decl) = 1;
33594 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33595 targetm.asm_out.globalize_label (asm_out_file, name);
33596 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33597 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33599 else
33600 #endif
33602 switch_to_section (text_section);
33603 ASM_OUTPUT_LABEL (asm_out_file, name);
33606 DECL_INITIAL (decl) = make_node (BLOCK);
33607 current_function_decl = decl;
33608 init_function_start (decl);
33609 first_function_block_is_cold = false;
33610 /* Make sure unwind info is emitted for the thunk if needed. */
33611 final_start_function (emit_barrier (), asm_out_file, 1);
33613 fputs ("\tblr\n", asm_out_file);
33615 final_end_function ();
33616 init_insn_lengths ();
33617 free_after_compilation (cfun);
33618 set_cfun (NULL);
33619 current_function_decl = NULL;
33622 /* Add r30 to hard reg set if the prologue sets it up and it is not
33623 pic_offset_table_rtx. */
33625 static void
33626 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33628 if (!TARGET_SINGLE_PIC_BASE
33629 && TARGET_TOC
33630 && TARGET_MINIMAL_TOC
33631 && get_pool_size () != 0)
33632 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33636 /* Helper function for rs6000_split_logical to emit a logical instruction after
33637 spliting the operation to single GPR registers.
33639 DEST is the destination register.
33640 OP1 and OP2 are the input source registers.
33641 CODE is the base operation (AND, IOR, XOR, NOT).
33642 MODE is the machine mode.
33643 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33644 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33645 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33647 static void
33648 rs6000_split_logical_inner (rtx dest,
33649 rtx op1,
33650 rtx op2,
33651 enum rtx_code code,
33652 machine_mode mode,
33653 bool complement_final_p,
33654 bool complement_op1_p,
33655 bool complement_op2_p)
33657 rtx bool_rtx;
33659 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33660 if (op2 && GET_CODE (op2) == CONST_INT
33661 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33662 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33664 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33665 HOST_WIDE_INT value = INTVAL (op2) & mask;
33667 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33668 if (code == AND)
33670 if (value == 0)
33672 emit_insn (gen_rtx_SET (dest, const0_rtx));
33673 return;
33676 else if (value == mask)
33678 if (!rtx_equal_p (dest, op1))
33679 emit_insn (gen_rtx_SET (dest, op1));
33680 return;
33684 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33685 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33686 else if (code == IOR || code == XOR)
33688 if (value == 0)
33690 if (!rtx_equal_p (dest, op1))
33691 emit_insn (gen_rtx_SET (dest, op1));
33692 return;
33697 if (code == AND && mode == SImode
33698 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33700 emit_insn (gen_andsi3 (dest, op1, op2));
33701 return;
33704 if (complement_op1_p)
33705 op1 = gen_rtx_NOT (mode, op1);
33707 if (complement_op2_p)
33708 op2 = gen_rtx_NOT (mode, op2);
33710 /* For canonical RTL, if only one arm is inverted it is the first. */
33711 if (!complement_op1_p && complement_op2_p)
33712 std::swap (op1, op2);
33714 bool_rtx = ((code == NOT)
33715 ? gen_rtx_NOT (mode, op1)
33716 : gen_rtx_fmt_ee (code, mode, op1, op2));
33718 if (complement_final_p)
33719 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33721 emit_insn (gen_rtx_SET (dest, bool_rtx));
33724 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33725 operations are split immediately during RTL generation to allow for more
33726 optimizations of the AND/IOR/XOR.
33728 OPERANDS is an array containing the destination and two input operands.
33729 CODE is the base operation (AND, IOR, XOR, NOT).
33730 MODE is the machine mode.
33731 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33732 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33733 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33734 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33735 formation of the AND instructions. */
33737 static void
33738 rs6000_split_logical_di (rtx operands[3],
33739 enum rtx_code code,
33740 bool complement_final_p,
33741 bool complement_op1_p,
33742 bool complement_op2_p)
33744 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33745 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33746 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33747 enum hi_lo { hi = 0, lo = 1 };
33748 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33749 size_t i;
33751 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33752 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33753 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33754 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33756 if (code == NOT)
33757 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33758 else
33760 if (GET_CODE (operands[2]) != CONST_INT)
33762 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33763 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33765 else
33767 HOST_WIDE_INT value = INTVAL (operands[2]);
33768 HOST_WIDE_INT value_hi_lo[2];
33770 gcc_assert (!complement_final_p);
33771 gcc_assert (!complement_op1_p);
33772 gcc_assert (!complement_op2_p);
33774 value_hi_lo[hi] = value >> 32;
33775 value_hi_lo[lo] = value & lower_32bits;
33777 for (i = 0; i < 2; i++)
33779 HOST_WIDE_INT sub_value = value_hi_lo[i];
33781 if (sub_value & sign_bit)
33782 sub_value |= upper_32bits;
33784 op2_hi_lo[i] = GEN_INT (sub_value);
33786 /* If this is an AND instruction, check to see if we need to load
33787 the value in a register. */
33788 if (code == AND && sub_value != -1 && sub_value != 0
33789 && !and_operand (op2_hi_lo[i], SImode))
33790 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33795 for (i = 0; i < 2; i++)
33797 /* Split large IOR/XOR operations. */
33798 if ((code == IOR || code == XOR)
33799 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33800 && !complement_final_p
33801 && !complement_op1_p
33802 && !complement_op2_p
33803 && !logical_const_operand (op2_hi_lo[i], SImode))
33805 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33806 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33807 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33808 rtx tmp = gen_reg_rtx (SImode);
33810 /* Make sure the constant is sign extended. */
33811 if ((hi_16bits & sign_bit) != 0)
33812 hi_16bits |= upper_32bits;
33814 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33815 code, SImode, false, false, false);
33817 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33818 code, SImode, false, false, false);
33820 else
33821 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33822 code, SImode, complement_final_p,
33823 complement_op1_p, complement_op2_p);
33826 return;
33829 /* Split the insns that make up boolean operations operating on multiple GPR
33830 registers. The boolean MD patterns ensure that the inputs either are
33831 exactly the same as the output registers, or there is no overlap.
33833 OPERANDS is an array containing the destination and two input operands.
33834 CODE is the base operation (AND, IOR, XOR, NOT).
33835 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33836 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33837 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33839 void
33840 rs6000_split_logical (rtx operands[3],
33841 enum rtx_code code,
33842 bool complement_final_p,
33843 bool complement_op1_p,
33844 bool complement_op2_p)
33846 machine_mode mode = GET_MODE (operands[0]);
33847 machine_mode sub_mode;
33848 rtx op0, op1, op2;
33849 int sub_size, regno0, regno1, nregs, i;
33851 /* If this is DImode, use the specialized version that can run before
33852 register allocation. */
33853 if (mode == DImode && !TARGET_POWERPC64)
33855 rs6000_split_logical_di (operands, code, complement_final_p,
33856 complement_op1_p, complement_op2_p);
33857 return;
33860 op0 = operands[0];
33861 op1 = operands[1];
33862 op2 = (code == NOT) ? NULL_RTX : operands[2];
33863 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33864 sub_size = GET_MODE_SIZE (sub_mode);
33865 regno0 = REGNO (op0);
33866 regno1 = REGNO (op1);
33868 gcc_assert (reload_completed);
33869 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33870 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33872 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33873 gcc_assert (nregs > 1);
33875 if (op2 && REG_P (op2))
33876 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33878 for (i = 0; i < nregs; i++)
33880 int offset = i * sub_size;
33881 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33882 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33883 rtx sub_op2 = ((code == NOT)
33884 ? NULL_RTX
33885 : simplify_subreg (sub_mode, op2, mode, offset));
33887 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33888 complement_final_p, complement_op1_p,
33889 complement_op2_p);
33892 return;
33896 /* Return true if the peephole2 can combine a load involving a combination of
33897 an addis instruction and a load with an offset that can be fused together on
33898 a power8. */
33900 bool
33901 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33902 rtx addis_value, /* addis value. */
33903 rtx target, /* target register that is loaded. */
33904 rtx mem) /* bottom part of the memory addr. */
33906 rtx addr;
33907 rtx base_reg;
33909 /* Validate arguments. */
33910 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33911 return false;
33913 if (!base_reg_operand (target, GET_MODE (target)))
33914 return false;
33916 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33917 return false;
33919 /* Allow sign/zero extension. */
33920 if (GET_CODE (mem) == ZERO_EXTEND
33921 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33922 mem = XEXP (mem, 0);
33924 if (!MEM_P (mem))
33925 return false;
33927 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33928 return false;
33930 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33931 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33932 return false;
33934 /* Validate that the register used to load the high value is either the
33935 register being loaded, or we can safely replace its use.
33937 This function is only called from the peephole2 pass and we assume that
33938 there are 2 instructions in the peephole (addis and load), so we want to
33939 check if the target register was not used in the memory address and the
33940 register to hold the addis result is dead after the peephole. */
33941 if (REGNO (addis_reg) != REGNO (target))
33943 if (reg_mentioned_p (target, mem))
33944 return false;
33946 if (!peep2_reg_dead_p (2, addis_reg))
33947 return false;
33949 /* If the target register being loaded is the stack pointer, we must
33950 avoid loading any other value into it, even temporarily. */
33951 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33952 return false;
33955 base_reg = XEXP (addr, 0);
33956 return REGNO (addis_reg) == REGNO (base_reg);
33959 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33960 sequence. We adjust the addis register to use the target register. If the
33961 load sign extends, we adjust the code to do the zero extending load, and an
33962 explicit sign extension later since the fusion only covers zero extending
33963 loads.
33965 The operands are:
33966 operands[0] register set with addis (to be replaced with target)
33967 operands[1] value set via addis
33968 operands[2] target register being loaded
33969 operands[3] D-form memory reference using operands[0]. */
33971 void
33972 expand_fusion_gpr_load (rtx *operands)
33974 rtx addis_value = operands[1];
33975 rtx target = operands[2];
33976 rtx orig_mem = operands[3];
33977 rtx new_addr, new_mem, orig_addr, offset;
33978 enum rtx_code plus_or_lo_sum;
33979 machine_mode target_mode = GET_MODE (target);
33980 machine_mode extend_mode = target_mode;
33981 machine_mode ptr_mode = Pmode;
33982 enum rtx_code extend = UNKNOWN;
33984 if (GET_CODE (orig_mem) == ZERO_EXTEND
33985 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33987 extend = GET_CODE (orig_mem);
33988 orig_mem = XEXP (orig_mem, 0);
33989 target_mode = GET_MODE (orig_mem);
33992 gcc_assert (MEM_P (orig_mem));
33994 orig_addr = XEXP (orig_mem, 0);
33995 plus_or_lo_sum = GET_CODE (orig_addr);
33996 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33998 offset = XEXP (orig_addr, 1);
33999 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
34000 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
34002 if (extend != UNKNOWN)
34003 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
34005 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
34006 UNSPEC_FUSION_GPR);
34007 emit_insn (gen_rtx_SET (target, new_mem));
34009 if (extend == SIGN_EXTEND)
34011 int sub_off = ((BYTES_BIG_ENDIAN)
34012 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
34013 : 0);
34014 rtx sign_reg
34015 = simplify_subreg (target_mode, target, extend_mode, sub_off);
34017 emit_insn (gen_rtx_SET (target,
34018 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
34021 return;
34024 /* Return a string to fuse an addis instruction with a gpr load to the same
34025 register that we loaded up the addis instruction. The address that is used
34026 is the logical address that was formed during peephole2:
34027 (lo_sum (high) (low-part))
34029 The code is complicated, so we call output_asm_insn directly, and just
34030 return "". */
34032 const char *
34033 emit_fusion_gpr_load (rtx target, rtx mem)
34035 rtx addis_value;
34036 rtx fuse_ops[10];
34037 rtx addr;
34038 rtx load_offset;
34039 const char *addis_str = NULL;
34040 const char *load_str = NULL;
34041 const char *mode_name = NULL;
34042 char insn_template[80];
34043 machine_mode mode;
34044 const char *comment_str = ASM_COMMENT_START;
34046 if (GET_CODE (mem) == ZERO_EXTEND)
34047 mem = XEXP (mem, 0);
34049 gcc_assert (REG_P (target) && MEM_P (mem));
34051 if (*comment_str == ' ')
34052 comment_str++;
34054 addr = XEXP (mem, 0);
34055 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
34056 gcc_unreachable ();
34058 addis_value = XEXP (addr, 0);
34059 load_offset = XEXP (addr, 1);
34061 /* Now emit the load instruction to the same register. */
34062 mode = GET_MODE (mem);
34063 switch (mode)
34065 case QImode:
34066 mode_name = "char";
34067 load_str = "lbz";
34068 break;
34070 case HImode:
34071 mode_name = "short";
34072 load_str = "lhz";
34073 break;
34075 case SImode:
34076 mode_name = "int";
34077 load_str = "lwz";
34078 break;
34080 case DImode:
34081 gcc_assert (TARGET_POWERPC64);
34082 mode_name = "long";
34083 load_str = "ld";
34084 break;
34086 default:
34087 gcc_unreachable ();
34090 /* Emit the addis instruction. */
34091 fuse_ops[0] = target;
34092 if (satisfies_constraint_L (addis_value))
34094 fuse_ops[1] = addis_value;
34095 addis_str = "lis %0,%v1";
34098 else if (GET_CODE (addis_value) == PLUS)
34100 rtx op0 = XEXP (addis_value, 0);
34101 rtx op1 = XEXP (addis_value, 1);
34103 if (REG_P (op0) && CONST_INT_P (op1)
34104 && satisfies_constraint_L (op1))
34106 fuse_ops[1] = op0;
34107 fuse_ops[2] = op1;
34108 addis_str = "addis %0,%1,%v2";
34112 else if (GET_CODE (addis_value) == HIGH)
34114 rtx value = XEXP (addis_value, 0);
34115 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
34117 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
34118 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
34119 if (TARGET_ELF)
34120 addis_str = "addis %0,%2,%1@toc@ha";
34122 else if (TARGET_XCOFF)
34123 addis_str = "addis %0,%1@u(%2)";
34125 else
34126 gcc_unreachable ();
34129 else if (GET_CODE (value) == PLUS)
34131 rtx op0 = XEXP (value, 0);
34132 rtx op1 = XEXP (value, 1);
34134 if (GET_CODE (op0) == UNSPEC
34135 && XINT (op0, 1) == UNSPEC_TOCREL
34136 && CONST_INT_P (op1))
34138 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
34139 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
34140 fuse_ops[3] = op1;
34141 if (TARGET_ELF)
34142 addis_str = "addis %0,%2,%1+%3@toc@ha";
34144 else if (TARGET_XCOFF)
34145 addis_str = "addis %0,%1+%3@u(%2)";
34147 else
34148 gcc_unreachable ();
34152 else if (satisfies_constraint_L (value))
34154 fuse_ops[1] = value;
34155 addis_str = "lis %0,%v1";
34158 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
34160 fuse_ops[1] = value;
34161 addis_str = "lis %0,%1@ha";
34165 if (!addis_str)
34166 fatal_insn ("Could not generate addis value for fusion", addis_value);
34168 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
34169 comment_str, mode_name);
34170 output_asm_insn (insn_template, fuse_ops);
34172 /* Emit the D-form load instruction. */
34173 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
34175 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
34176 fuse_ops[1] = load_offset;
34177 output_asm_insn (insn_template, fuse_ops);
34180 else if (GET_CODE (load_offset) == UNSPEC
34181 && XINT (load_offset, 1) == UNSPEC_TOCREL)
34183 if (TARGET_ELF)
34184 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
34186 else if (TARGET_XCOFF)
34187 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
34189 else
34190 gcc_unreachable ();
34192 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
34193 output_asm_insn (insn_template, fuse_ops);
34196 else if (GET_CODE (load_offset) == PLUS
34197 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
34198 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
34199 && CONST_INT_P (XEXP (load_offset, 1)))
34201 rtx tocrel_unspec = XEXP (load_offset, 0);
34202 if (TARGET_ELF)
34203 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
34205 else if (TARGET_XCOFF)
34206 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
34208 else
34209 gcc_unreachable ();
34211 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
34212 fuse_ops[2] = XEXP (load_offset, 1);
34213 output_asm_insn (insn_template, fuse_ops);
34216 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
34218 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
34220 fuse_ops[1] = load_offset;
34221 output_asm_insn (insn_template, fuse_ops);
34224 else
34225 fatal_insn ("Unable to generate load offset for fusion", load_offset);
34227 return "";
34230 /* Analyze vector computations and remove unnecessary doubleword
34231 swaps (xxswapdi instructions). This pass is performed only
34232 for little-endian VSX code generation.
34234 For this specific case, loads and stores of 4x32 and 2x64 vectors
34235 are inefficient. These are implemented using the lvx2dx and
34236 stvx2dx instructions, which invert the order of doublewords in
34237 a vector register. Thus the code generation inserts an xxswapdi
34238 after each such load, and prior to each such store. (For spill
34239 code after register assignment, an additional xxswapdi is inserted
34240 following each store in order to return a hard register to its
34241 unpermuted value.)
34243 The extra xxswapdi instructions reduce performance. This can be
34244 particularly bad for vectorized code. The purpose of this pass
34245 is to reduce the number of xxswapdi instructions required for
34246 correctness.
34248 The primary insight is that much code that operates on vectors
34249 does not care about the relative order of elements in a register,
34250 so long as the correct memory order is preserved. If we have
34251 a computation where all input values are provided by lvxd2x/xxswapdi
34252 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
34253 and all intermediate computations are pure SIMD (independent of
34254 element order), then all the xxswapdi's associated with the loads
34255 and stores may be removed.
34257 This pass uses some of the infrastructure and logical ideas from
34258 the "web" pass in web.c. We create maximal webs of computations
34259 fitting the description above using union-find. Each such web is
34260 then optimized by removing its unnecessary xxswapdi instructions.
34262 The pass is placed prior to global optimization so that we can
34263 perform the optimization in the safest and simplest way possible;
34264 that is, by replacing each xxswapdi insn with a register copy insn.
34265 Subsequent forward propagation will remove copies where possible.
34267 There are some operations sensitive to element order for which we
34268 can still allow the operation, provided we modify those operations.
34269 These include CONST_VECTORs, for which we must swap the first and
34270 second halves of the constant vector; and SUBREGs, for which we
34271 must adjust the byte offset to account for the swapped doublewords.
34272 A remaining opportunity would be non-immediate-form splats, for
34273 which we should adjust the selected lane of the input. We should
34274 also make code generation adjustments for sum-across operations,
34275 since this is a common vectorizer reduction.
34277 Because we run prior to the first split, we can see loads and stores
34278 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
34279 vector loads and stores that have not yet been split into a permuting
34280 load/store and a swap. (One way this can happen is with a builtin
34281 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
34282 than deleting a swap, we convert the load/store into a permuting
34283 load/store (which effectively removes the swap). */
34285 /* Notes on Permutes
34287 We do not currently handle computations that contain permutes. There
34288 is a general transformation that can be performed correctly, but it
34289 may introduce more expensive code than it replaces. To handle these
34290 would require a cost model to determine when to perform the optimization.
34291 This commentary records how this could be done if desired.
34293 The most general permute is something like this (example for V16QI):
34295 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
34296 (parallel [(const_int a0) (const_int a1)
34298 (const_int a14) (const_int a15)]))
34300 where a0,...,a15 are in [0,31] and select elements from op1 and op2
34301 to produce in the result.
34303 Regardless of mode, we can convert the PARALLEL to a mask of 16
34304 byte-element selectors. Let's call this M, with M[i] representing
34305 the ith byte-element selector value. Then if we swap doublewords
34306 throughout the computation, we can get correct behavior by replacing
34307 M with M' as follows:
34309 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
34310 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
34311 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
34312 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
34314 This seems promising at first, since we are just replacing one mask
34315 with another. But certain masks are preferable to others. If M
34316 is a mask that matches a vmrghh pattern, for example, M' certainly
34317 will not. Instead of a single vmrghh, we would generate a load of
34318 M' and a vperm. So we would need to know how many xxswapd's we can
34319 remove as a result of this transformation to determine if it's
34320 profitable; and preferably the logic would need to be aware of all
34321 the special preferable masks.
34323 Another form of permute is an UNSPEC_VPERM, in which the mask is
34324 already in a register. In some cases, this mask may be a constant
34325 that we can discover with ud-chains, in which case the above
34326 transformation is ok. However, the common usage here is for the
34327 mask to be produced by an UNSPEC_LVSL, in which case the mask
34328 cannot be known at compile time. In such a case we would have to
34329 generate several instructions to compute M' as above at run time,
34330 and a cost model is needed again. */
34332 /* This is based on the union-find logic in web.c. web_entry_base is
34333 defined in df.h. */
34334 class swap_web_entry : public web_entry_base
34336 public:
34337 /* Pointer to the insn. */
34338 rtx_insn *insn;
34339 /* Set if insn contains a mention of a vector register. All other
34340 fields are undefined if this field is unset. */
34341 unsigned int is_relevant : 1;
34342 /* Set if insn is a load. */
34343 unsigned int is_load : 1;
34344 /* Set if insn is a store. */
34345 unsigned int is_store : 1;
34346 /* Set if insn is a doubleword swap. This can either be a register swap
34347 or a permuting load or store (test is_load and is_store for this). */
34348 unsigned int is_swap : 1;
34349 /* Set if the insn has a live-in use of a parameter register. */
34350 unsigned int is_live_in : 1;
34351 /* Set if the insn has a live-out def of a return register. */
34352 unsigned int is_live_out : 1;
34353 /* Set if the insn contains a subreg reference of a vector register. */
34354 unsigned int contains_subreg : 1;
34355 /* Set if the insn contains a 128-bit integer operand. */
34356 unsigned int is_128_int : 1;
34357 /* Set if this is a call-insn. */
34358 unsigned int is_call : 1;
34359 /* Set if this insn does not perform a vector operation for which
34360 element order matters, or if we know how to fix it up if it does.
34361 Undefined if is_swap is set. */
34362 unsigned int is_swappable : 1;
34363 /* A nonzero value indicates what kind of special handling for this
34364 insn is required if doublewords are swapped. Undefined if
34365 is_swappable is not set. */
34366 unsigned int special_handling : 3;
34367 /* Set if the web represented by this entry cannot be optimized. */
34368 unsigned int web_not_optimizable : 1;
34369 /* Set if this insn should be deleted. */
34370 unsigned int will_delete : 1;
34373 enum special_handling_values {
34374 SH_NONE = 0,
34375 SH_CONST_VECTOR,
34376 SH_SUBREG,
34377 SH_NOSWAP_LD,
34378 SH_NOSWAP_ST,
34379 SH_EXTRACT,
34380 SH_SPLAT
34383 /* Union INSN with all insns containing definitions that reach USE.
34384 Detect whether USE is live-in to the current function. */
34385 static void
34386 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
34388 struct df_link *link = DF_REF_CHAIN (use);
34390 if (!link)
34391 insn_entry[INSN_UID (insn)].is_live_in = 1;
34393 while (link)
34395 if (DF_REF_IS_ARTIFICIAL (link->ref))
34396 insn_entry[INSN_UID (insn)].is_live_in = 1;
34398 if (DF_REF_INSN_INFO (link->ref))
34400 rtx def_insn = DF_REF_INSN (link->ref);
34401 (void)unionfind_union (insn_entry + INSN_UID (insn),
34402 insn_entry + INSN_UID (def_insn));
34405 link = link->next;
34409 /* Union INSN with all insns containing uses reached from DEF.
34410 Detect whether DEF is live-out from the current function. */
34411 static void
34412 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
34414 struct df_link *link = DF_REF_CHAIN (def);
34416 if (!link)
34417 insn_entry[INSN_UID (insn)].is_live_out = 1;
34419 while (link)
34421 /* This could be an eh use or some other artificial use;
34422 we treat these all the same (killing the optimization). */
34423 if (DF_REF_IS_ARTIFICIAL (link->ref))
34424 insn_entry[INSN_UID (insn)].is_live_out = 1;
34426 if (DF_REF_INSN_INFO (link->ref))
34428 rtx use_insn = DF_REF_INSN (link->ref);
34429 (void)unionfind_union (insn_entry + INSN_UID (insn),
34430 insn_entry + INSN_UID (use_insn));
34433 link = link->next;
34437 /* Return 1 iff INSN is a load insn, including permuting loads that
34438 represent an lvxd2x instruction; else return 0. */
34439 static unsigned int
34440 insn_is_load_p (rtx insn)
34442 rtx body = PATTERN (insn);
34444 if (GET_CODE (body) == SET)
34446 if (GET_CODE (SET_SRC (body)) == MEM)
34447 return 1;
34449 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
34450 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
34451 return 1;
34453 return 0;
34456 if (GET_CODE (body) != PARALLEL)
34457 return 0;
34459 rtx set = XVECEXP (body, 0, 0);
34461 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
34462 return 1;
34464 return 0;
34467 /* Return 1 iff INSN is a store insn, including permuting stores that
34468 represent an stvxd2x instruction; else return 0. */
34469 static unsigned int
34470 insn_is_store_p (rtx insn)
34472 rtx body = PATTERN (insn);
34473 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
34474 return 1;
34475 if (GET_CODE (body) != PARALLEL)
34476 return 0;
34477 rtx set = XVECEXP (body, 0, 0);
34478 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
34479 return 1;
34480 return 0;
34483 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
34484 a permuting load, or a permuting store. */
34485 static unsigned int
34486 insn_is_swap_p (rtx insn)
34488 rtx body = PATTERN (insn);
34489 if (GET_CODE (body) != SET)
34490 return 0;
34491 rtx rhs = SET_SRC (body);
34492 if (GET_CODE (rhs) != VEC_SELECT)
34493 return 0;
34494 rtx parallel = XEXP (rhs, 1);
34495 if (GET_CODE (parallel) != PARALLEL)
34496 return 0;
34497 unsigned int len = XVECLEN (parallel, 0);
34498 if (len != 2 && len != 4 && len != 8 && len != 16)
34499 return 0;
34500 for (unsigned int i = 0; i < len / 2; ++i)
34502 rtx op = XVECEXP (parallel, 0, i);
34503 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
34504 return 0;
34506 for (unsigned int i = len / 2; i < len; ++i)
34508 rtx op = XVECEXP (parallel, 0, i);
34509 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
34510 return 0;
34512 return 1;
34515 /* Return 1 iff OP is an operand that will not be affected by having
34516 vector doublewords swapped in memory. */
34517 static unsigned int
34518 rtx_is_swappable_p (rtx op, unsigned int *special)
34520 enum rtx_code code = GET_CODE (op);
34521 int i, j;
34522 rtx parallel;
34524 switch (code)
34526 case LABEL_REF:
34527 case SYMBOL_REF:
34528 case CLOBBER:
34529 case REG:
34530 return 1;
34532 case VEC_CONCAT:
34533 case ASM_INPUT:
34534 case ASM_OPERANDS:
34535 return 0;
34537 case CONST_VECTOR:
34539 *special = SH_CONST_VECTOR;
34540 return 1;
34543 case VEC_DUPLICATE:
34544 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
34545 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
34546 it represents a vector splat for which we can do special
34547 handling. */
34548 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
34549 return 1;
34550 else if (GET_CODE (XEXP (op, 0)) == REG
34551 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
34552 /* This catches V2DF and V2DI splat, at a minimum. */
34553 return 1;
34554 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
34555 /* If the duplicated item is from a select, defer to the select
34556 processing to see if we can change the lane for the splat. */
34557 return rtx_is_swappable_p (XEXP (op, 0), special);
34558 else
34559 return 0;
34561 case VEC_SELECT:
34562 /* A vec_extract operation is ok if we change the lane. */
34563 if (GET_CODE (XEXP (op, 0)) == REG
34564 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34565 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34566 && XVECLEN (parallel, 0) == 1
34567 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34569 *special = SH_EXTRACT;
34570 return 1;
34572 else
34573 return 0;
34575 case UNSPEC:
34577 /* Various operations are unsafe for this optimization, at least
34578 without significant additional work. Permutes are obviously
34579 problematic, as both the permute control vector and the ordering
34580 of the target values are invalidated by doubleword swapping.
34581 Vector pack and unpack modify the number of vector lanes.
34582 Merge-high/low will not operate correctly on swapped operands.
34583 Vector shifts across element boundaries are clearly uncool,
34584 as are vector select and concatenate operations. Vector
34585 sum-across instructions define one operand with a specific
34586 order-dependent element, so additional fixup code would be
34587 needed to make those work. Vector set and non-immediate-form
34588 vector splat are element-order sensitive. A few of these
34589 cases might be workable with special handling if required.
34590 Adding cost modeling would be appropriate in some cases. */
34591 int val = XINT (op, 1);
34592 switch (val)
34594 default:
34595 break;
34596 case UNSPEC_VMRGH_DIRECT:
34597 case UNSPEC_VMRGL_DIRECT:
34598 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34599 case UNSPEC_VPACK_SIGN_UNS_SAT:
34600 case UNSPEC_VPACK_UNS_UNS_MOD:
34601 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34602 case UNSPEC_VPACK_UNS_UNS_SAT:
34603 case UNSPEC_VPERM:
34604 case UNSPEC_VPERM_UNS:
34605 case UNSPEC_VPERMHI:
34606 case UNSPEC_VPERMSI:
34607 case UNSPEC_VPKPX:
34608 case UNSPEC_VSLDOI:
34609 case UNSPEC_VSLO:
34610 case UNSPEC_VSRO:
34611 case UNSPEC_VSUM2SWS:
34612 case UNSPEC_VSUM4S:
34613 case UNSPEC_VSUM4UBS:
34614 case UNSPEC_VSUMSWS:
34615 case UNSPEC_VSUMSWS_DIRECT:
34616 case UNSPEC_VSX_CONCAT:
34617 case UNSPEC_VSX_SET:
34618 case UNSPEC_VSX_SLDWI:
34619 case UNSPEC_VUNPACK_HI_SIGN:
34620 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34621 case UNSPEC_VUNPACK_LO_SIGN:
34622 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34623 case UNSPEC_VUPKHPX:
34624 case UNSPEC_VUPKHS_V4SF:
34625 case UNSPEC_VUPKHU_V4SF:
34626 case UNSPEC_VUPKLPX:
34627 case UNSPEC_VUPKLS_V4SF:
34628 case UNSPEC_VUPKLU_V4SF:
34629 case UNSPEC_VSX_CVDPSPN:
34630 case UNSPEC_VSX_CVSPDP:
34631 case UNSPEC_VSX_CVSPDPN:
34632 return 0;
34633 case UNSPEC_VSPLT_DIRECT:
34634 *special = SH_SPLAT;
34635 return 1;
34639 default:
34640 break;
34643 const char *fmt = GET_RTX_FORMAT (code);
34644 int ok = 1;
34646 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34647 if (fmt[i] == 'e' || fmt[i] == 'u')
34649 unsigned int special_op = SH_NONE;
34650 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34651 if (special_op == SH_NONE)
34652 continue;
34653 /* Ensure we never have two kinds of special handling
34654 for the same insn. */
34655 if (*special != SH_NONE && *special != special_op)
34656 return 0;
34657 *special = special_op;
34659 else if (fmt[i] == 'E')
34660 for (j = 0; j < XVECLEN (op, i); ++j)
34662 unsigned int special_op = SH_NONE;
34663 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34664 if (special_op == SH_NONE)
34665 continue;
34666 /* Ensure we never have two kinds of special handling
34667 for the same insn. */
34668 if (*special != SH_NONE && *special != special_op)
34669 return 0;
34670 *special = special_op;
34673 return ok;
34676 /* Return 1 iff INSN is an operand that will not be affected by
34677 having vector doublewords swapped in memory (in which case
34678 *SPECIAL is unchanged), or that can be modified to be correct
34679 if vector doublewords are swapped in memory (in which case
34680 *SPECIAL is changed to a value indicating how). */
34681 static unsigned int
34682 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34683 unsigned int *special)
34685 /* Calls are always bad. */
34686 if (GET_CODE (insn) == CALL_INSN)
34687 return 0;
34689 /* Loads and stores seen here are not permuting, but we can still
34690 fix them up by converting them to permuting ones. Exceptions:
34691 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34692 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34693 for the SET source. */
34694 rtx body = PATTERN (insn);
34695 int i = INSN_UID (insn);
34697 if (insn_entry[i].is_load)
34699 if (GET_CODE (body) == SET)
34701 *special = SH_NOSWAP_LD;
34702 return 1;
34704 else
34705 return 0;
34708 if (insn_entry[i].is_store)
34710 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34712 *special = SH_NOSWAP_ST;
34713 return 1;
34715 else
34716 return 0;
34719 /* A convert to single precision can be left as is provided that
34720 all of its uses are in xxspltw instructions that splat BE element
34721 zero. */
34722 if (GET_CODE (body) == SET
34723 && GET_CODE (SET_SRC (body)) == UNSPEC
34724 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
34726 df_ref def;
34727 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34729 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34731 struct df_link *link = DF_REF_CHAIN (def);
34732 if (!link)
34733 return 0;
34735 for (; link; link = link->next) {
34736 rtx use_insn = DF_REF_INSN (link->ref);
34737 rtx use_body = PATTERN (use_insn);
34738 if (GET_CODE (use_body) != SET
34739 || GET_CODE (SET_SRC (use_body)) != UNSPEC
34740 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
34741 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
34742 return 0;
34746 return 1;
34749 /* Otherwise check the operands for vector lane violations. */
34750 return rtx_is_swappable_p (body, special);
34753 enum chain_purpose { FOR_LOADS, FOR_STORES };
34755 /* Return true if the UD or DU chain headed by LINK is non-empty,
34756 and every entry on the chain references an insn that is a
34757 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34758 register swap must have only permuting loads as reaching defs.
34759 If PURPOSE is FOR_STORES, each such register swap must have only
34760 register swaps or permuting stores as reached uses. */
34761 static bool
34762 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34763 enum chain_purpose purpose)
34765 if (!link)
34766 return false;
34768 for (; link; link = link->next)
34770 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34771 continue;
34773 if (DF_REF_IS_ARTIFICIAL (link->ref))
34774 return false;
34776 rtx reached_insn = DF_REF_INSN (link->ref);
34777 unsigned uid = INSN_UID (reached_insn);
34778 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34780 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34781 || insn_entry[uid].is_store)
34782 return false;
34784 if (purpose == FOR_LOADS)
34786 df_ref use;
34787 FOR_EACH_INSN_INFO_USE (use, insn_info)
34789 struct df_link *swap_link = DF_REF_CHAIN (use);
34791 while (swap_link)
34793 if (DF_REF_IS_ARTIFICIAL (link->ref))
34794 return false;
34796 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34797 unsigned uid2 = INSN_UID (swap_def_insn);
34799 /* Only permuting loads are allowed. */
34800 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34801 return false;
34803 swap_link = swap_link->next;
34807 else if (purpose == FOR_STORES)
34809 df_ref def;
34810 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34812 struct df_link *swap_link = DF_REF_CHAIN (def);
34814 while (swap_link)
34816 if (DF_REF_IS_ARTIFICIAL (link->ref))
34817 return false;
34819 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34820 unsigned uid2 = INSN_UID (swap_use_insn);
34822 /* Permuting stores or register swaps are allowed. */
34823 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34824 return false;
34826 swap_link = swap_link->next;
34832 return true;
34835 /* Mark the xxswapdi instructions associated with permuting loads and
34836 stores for removal. Note that we only flag them for deletion here,
34837 as there is a possibility of a swap being reached from multiple
34838 loads, etc. */
34839 static void
34840 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34842 rtx insn = insn_entry[i].insn;
34843 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34845 if (insn_entry[i].is_load)
34847 df_ref def;
34848 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34850 struct df_link *link = DF_REF_CHAIN (def);
34852 /* We know by now that these are swaps, so we can delete
34853 them confidently. */
34854 while (link)
34856 rtx use_insn = DF_REF_INSN (link->ref);
34857 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34858 link = link->next;
34862 else if (insn_entry[i].is_store)
34864 df_ref use;
34865 FOR_EACH_INSN_INFO_USE (use, insn_info)
34867 /* Ignore uses for addressability. */
34868 machine_mode mode = GET_MODE (DF_REF_REG (use));
34869 if (!VECTOR_MODE_P (mode))
34870 continue;
34872 struct df_link *link = DF_REF_CHAIN (use);
34874 /* We know by now that these are swaps, so we can delete
34875 them confidently. */
34876 while (link)
34878 rtx def_insn = DF_REF_INSN (link->ref);
34879 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34880 link = link->next;
34886 /* OP is either a CONST_VECTOR or an expression containing one.
34887 Swap the first half of the vector with the second in the first
34888 case. Recurse to find it in the second. */
34889 static void
34890 swap_const_vector_halves (rtx op)
34892 int i;
34893 enum rtx_code code = GET_CODE (op);
34894 if (GET_CODE (op) == CONST_VECTOR)
34896 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34897 for (i = 0; i < half_units; ++i)
34899 rtx temp = CONST_VECTOR_ELT (op, i);
34900 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34901 CONST_VECTOR_ELT (op, i + half_units) = temp;
34904 else
34906 int j;
34907 const char *fmt = GET_RTX_FORMAT (code);
34908 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34909 if (fmt[i] == 'e' || fmt[i] == 'u')
34910 swap_const_vector_halves (XEXP (op, i));
34911 else if (fmt[i] == 'E')
34912 for (j = 0; j < XVECLEN (op, i); ++j)
34913 swap_const_vector_halves (XVECEXP (op, i, j));
34917 /* Find all subregs of a vector expression that perform a narrowing,
34918 and adjust the subreg index to account for doubleword swapping. */
34919 static void
34920 adjust_subreg_index (rtx op)
34922 enum rtx_code code = GET_CODE (op);
34923 if (code == SUBREG
34924 && (GET_MODE_SIZE (GET_MODE (op))
34925 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34927 unsigned int index = SUBREG_BYTE (op);
34928 if (index < 8)
34929 index += 8;
34930 else
34931 index -= 8;
34932 SUBREG_BYTE (op) = index;
34935 const char *fmt = GET_RTX_FORMAT (code);
34936 int i,j;
34937 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34938 if (fmt[i] == 'e' || fmt[i] == 'u')
34939 adjust_subreg_index (XEXP (op, i));
34940 else if (fmt[i] == 'E')
34941 for (j = 0; j < XVECLEN (op, i); ++j)
34942 adjust_subreg_index (XVECEXP (op, i, j));
34945 /* Convert the non-permuting load INSN to a permuting one. */
34946 static void
34947 permute_load (rtx_insn *insn)
34949 rtx body = PATTERN (insn);
34950 rtx mem_op = SET_SRC (body);
34951 rtx tgt_reg = SET_DEST (body);
34952 machine_mode mode = GET_MODE (tgt_reg);
34953 int n_elts = GET_MODE_NUNITS (mode);
34954 int half_elts = n_elts / 2;
34955 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34956 int i, j;
34957 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34958 XVECEXP (par, 0, i) = GEN_INT (j);
34959 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34960 XVECEXP (par, 0, i) = GEN_INT (j);
34961 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34962 SET_SRC (body) = sel;
34963 INSN_CODE (insn) = -1; /* Force re-recognition. */
34964 df_insn_rescan (insn);
34966 if (dump_file)
34967 fprintf (dump_file, "Replacing load %d with permuted load\n",
34968 INSN_UID (insn));
34971 /* Convert the non-permuting store INSN to a permuting one. */
34972 static void
34973 permute_store (rtx_insn *insn)
34975 rtx body = PATTERN (insn);
34976 rtx src_reg = SET_SRC (body);
34977 machine_mode mode = GET_MODE (src_reg);
34978 int n_elts = GET_MODE_NUNITS (mode);
34979 int half_elts = n_elts / 2;
34980 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34981 int i, j;
34982 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34983 XVECEXP (par, 0, i) = GEN_INT (j);
34984 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34985 XVECEXP (par, 0, i) = GEN_INT (j);
34986 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34987 SET_SRC (body) = sel;
34988 INSN_CODE (insn) = -1; /* Force re-recognition. */
34989 df_insn_rescan (insn);
34991 if (dump_file)
34992 fprintf (dump_file, "Replacing store %d with permuted store\n",
34993 INSN_UID (insn));
34996 /* Given OP that contains a vector extract operation, adjust the index
34997 of the extracted lane to account for the doubleword swap. */
34998 static void
34999 adjust_extract (rtx_insn *insn)
35001 rtx pattern = PATTERN (insn);
35002 if (GET_CODE (pattern) == PARALLEL)
35003 pattern = XVECEXP (pattern, 0, 0);
35004 rtx src = SET_SRC (pattern);
35005 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
35006 account for that. */
35007 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
35008 rtx par = XEXP (sel, 1);
35009 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
35010 int lane = INTVAL (XVECEXP (par, 0, 0));
35011 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
35012 XVECEXP (par, 0, 0) = GEN_INT (lane);
35013 INSN_CODE (insn) = -1; /* Force re-recognition. */
35014 df_insn_rescan (insn);
35016 if (dump_file)
35017 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
35020 /* Given OP that contains a vector direct-splat operation, adjust the index
35021 of the source lane to account for the doubleword swap. */
35022 static void
35023 adjust_splat (rtx_insn *insn)
35025 rtx body = PATTERN (insn);
35026 rtx unspec = XEXP (body, 1);
35027 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
35028 int lane = INTVAL (XVECEXP (unspec, 0, 1));
35029 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
35030 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
35031 INSN_CODE (insn) = -1; /* Force re-recognition. */
35032 df_insn_rescan (insn);
35034 if (dump_file)
35035 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
35038 /* The insn described by INSN_ENTRY[I] can be swapped, but only
35039 with special handling. Take care of that here. */
35040 static void
35041 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
35043 rtx_insn *insn = insn_entry[i].insn;
35044 rtx body = PATTERN (insn);
35046 switch (insn_entry[i].special_handling)
35048 default:
35049 gcc_unreachable ();
35050 case SH_CONST_VECTOR:
35052 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
35053 gcc_assert (GET_CODE (body) == SET);
35054 rtx rhs = SET_SRC (body);
35055 swap_const_vector_halves (rhs);
35056 if (dump_file)
35057 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
35058 break;
35060 case SH_SUBREG:
35061 /* A subreg of the same size is already safe. For subregs that
35062 select a smaller portion of a reg, adjust the index for
35063 swapped doublewords. */
35064 adjust_subreg_index (body);
35065 if (dump_file)
35066 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
35067 break;
35068 case SH_NOSWAP_LD:
35069 /* Convert a non-permuting load to a permuting one. */
35070 permute_load (insn);
35071 break;
35072 case SH_NOSWAP_ST:
35073 /* Convert a non-permuting store to a permuting one. */
35074 permute_store (insn);
35075 break;
35076 case SH_EXTRACT:
35077 /* Change the lane on an extract operation. */
35078 adjust_extract (insn);
35079 break;
35080 case SH_SPLAT:
35081 /* Change the lane on a direct-splat operation. */
35082 adjust_splat (insn);
35083 break;
35087 /* Find the insn from the Ith table entry, which is known to be a
35088 register swap Y = SWAP(X). Replace it with a copy Y = X. */
35089 static void
35090 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
35092 rtx_insn *insn = insn_entry[i].insn;
35093 rtx body = PATTERN (insn);
35094 rtx src_reg = XEXP (SET_SRC (body), 0);
35095 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
35096 rtx_insn *new_insn = emit_insn_before (copy, insn);
35097 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
35098 df_insn_rescan (new_insn);
35100 if (dump_file)
35102 unsigned int new_uid = INSN_UID (new_insn);
35103 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
35106 df_insn_delete (insn);
35107 remove_insn (insn);
35108 insn->set_deleted ();
35111 /* Dump the swap table to DUMP_FILE. */
35112 static void
35113 dump_swap_insn_table (swap_web_entry *insn_entry)
35115 int e = get_max_uid ();
35116 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
35118 for (int i = 0; i < e; ++i)
35119 if (insn_entry[i].is_relevant)
35121 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
35122 fprintf (dump_file, "%6d %6d ", i,
35123 pred_entry && pred_entry->insn
35124 ? INSN_UID (pred_entry->insn) : 0);
35125 if (insn_entry[i].is_load)
35126 fputs ("load ", dump_file);
35127 if (insn_entry[i].is_store)
35128 fputs ("store ", dump_file);
35129 if (insn_entry[i].is_swap)
35130 fputs ("swap ", dump_file);
35131 if (insn_entry[i].is_live_in)
35132 fputs ("live-in ", dump_file);
35133 if (insn_entry[i].is_live_out)
35134 fputs ("live-out ", dump_file);
35135 if (insn_entry[i].contains_subreg)
35136 fputs ("subreg ", dump_file);
35137 if (insn_entry[i].is_128_int)
35138 fputs ("int128 ", dump_file);
35139 if (insn_entry[i].is_call)
35140 fputs ("call ", dump_file);
35141 if (insn_entry[i].is_swappable)
35143 fputs ("swappable ", dump_file);
35144 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
35145 fputs ("special:constvec ", dump_file);
35146 else if (insn_entry[i].special_handling == SH_SUBREG)
35147 fputs ("special:subreg ", dump_file);
35148 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
35149 fputs ("special:load ", dump_file);
35150 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
35151 fputs ("special:store ", dump_file);
35152 else if (insn_entry[i].special_handling == SH_EXTRACT)
35153 fputs ("special:extract ", dump_file);
35154 else if (insn_entry[i].special_handling == SH_SPLAT)
35155 fputs ("special:splat ", dump_file);
35157 if (insn_entry[i].web_not_optimizable)
35158 fputs ("unoptimizable ", dump_file);
35159 if (insn_entry[i].will_delete)
35160 fputs ("delete ", dump_file);
35161 fputs ("\n", dump_file);
35163 fputs ("\n", dump_file);
35166 /* Main entry point for this pass. */
35167 unsigned int
35168 rs6000_analyze_swaps (function *fun)
35170 swap_web_entry *insn_entry;
35171 basic_block bb;
35172 rtx_insn *insn;
35174 /* Dataflow analysis for use-def chains. */
35175 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
35176 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
35177 df_analyze ();
35178 df_set_flags (DF_DEFER_INSN_RESCAN);
35180 /* Allocate structure to represent webs of insns. */
35181 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
35183 /* Walk the insns to gather basic data. */
35184 FOR_ALL_BB_FN (bb, fun)
35185 FOR_BB_INSNS (bb, insn)
35187 unsigned int uid = INSN_UID (insn);
35188 if (NONDEBUG_INSN_P (insn))
35190 insn_entry[uid].insn = insn;
35192 if (GET_CODE (insn) == CALL_INSN)
35193 insn_entry[uid].is_call = 1;
35195 /* Walk the uses and defs to see if we mention vector regs.
35196 Record any constraints on optimization of such mentions. */
35197 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35198 df_ref mention;
35199 FOR_EACH_INSN_INFO_USE (mention, insn_info)
35201 /* We use DF_REF_REAL_REG here to get inside any subregs. */
35202 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
35204 /* If a use gets its value from a call insn, it will be
35205 a hard register and will look like (reg:V4SI 3 3).
35206 The df analysis creates two mentions for GPR3 and GPR4,
35207 both DImode. We must recognize this and treat it as a
35208 vector mention to ensure the call is unioned with this
35209 use. */
35210 if (mode == DImode && DF_REF_INSN_INFO (mention))
35212 rtx feeder = DF_REF_INSN (mention);
35213 /* FIXME: It is pretty hard to get from the df mention
35214 to the mode of the use in the insn. We arbitrarily
35215 pick a vector mode here, even though the use might
35216 be a real DImode. We can be too conservative
35217 (create a web larger than necessary) because of
35218 this, so consider eventually fixing this. */
35219 if (GET_CODE (feeder) == CALL_INSN)
35220 mode = V4SImode;
35223 if (VECTOR_MODE_P (mode) || mode == TImode)
35225 insn_entry[uid].is_relevant = 1;
35226 if (mode == TImode || mode == V1TImode)
35227 insn_entry[uid].is_128_int = 1;
35228 if (DF_REF_INSN_INFO (mention))
35229 insn_entry[uid].contains_subreg
35230 = !rtx_equal_p (DF_REF_REG (mention),
35231 DF_REF_REAL_REG (mention));
35232 union_defs (insn_entry, insn, mention);
35235 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
35237 /* We use DF_REF_REAL_REG here to get inside any subregs. */
35238 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
35240 /* If we're loading up a hard vector register for a call,
35241 it looks like (set (reg:V4SI 9 9) (...)). The df
35242 analysis creates two mentions for GPR9 and GPR10, both
35243 DImode. So relying on the mode from the mentions
35244 isn't sufficient to ensure we union the call into the
35245 web with the parameter setup code. */
35246 if (mode == DImode && GET_CODE (insn) == SET
35247 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
35248 mode = GET_MODE (SET_DEST (insn));
35250 if (VECTOR_MODE_P (mode) || mode == TImode)
35252 insn_entry[uid].is_relevant = 1;
35253 if (mode == TImode || mode == V1TImode)
35254 insn_entry[uid].is_128_int = 1;
35255 if (DF_REF_INSN_INFO (mention))
35256 insn_entry[uid].contains_subreg
35257 = !rtx_equal_p (DF_REF_REG (mention),
35258 DF_REF_REAL_REG (mention));
35259 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
35260 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
35261 insn_entry[uid].is_live_out = 1;
35262 union_uses (insn_entry, insn, mention);
35266 if (insn_entry[uid].is_relevant)
35268 /* Determine if this is a load or store. */
35269 insn_entry[uid].is_load = insn_is_load_p (insn);
35270 insn_entry[uid].is_store = insn_is_store_p (insn);
35272 /* Determine if this is a doubleword swap. If not,
35273 determine whether it can legally be swapped. */
35274 if (insn_is_swap_p (insn))
35275 insn_entry[uid].is_swap = 1;
35276 else
35278 unsigned int special = SH_NONE;
35279 insn_entry[uid].is_swappable
35280 = insn_is_swappable_p (insn_entry, insn, &special);
35281 if (special != SH_NONE && insn_entry[uid].contains_subreg)
35282 insn_entry[uid].is_swappable = 0;
35283 else if (special != SH_NONE)
35284 insn_entry[uid].special_handling = special;
35285 else if (insn_entry[uid].contains_subreg)
35286 insn_entry[uid].special_handling = SH_SUBREG;
35292 if (dump_file)
35294 fprintf (dump_file, "\nSwap insn entry table when first built\n");
35295 dump_swap_insn_table (insn_entry);
35298 /* Record unoptimizable webs. */
35299 unsigned e = get_max_uid (), i;
35300 for (i = 0; i < e; ++i)
35302 if (!insn_entry[i].is_relevant)
35303 continue;
35305 swap_web_entry *root
35306 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
35308 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
35309 || (insn_entry[i].contains_subreg
35310 && insn_entry[i].special_handling != SH_SUBREG)
35311 || insn_entry[i].is_128_int || insn_entry[i].is_call
35312 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
35313 root->web_not_optimizable = 1;
35315 /* If we have loads or stores that aren't permuting then the
35316 optimization isn't appropriate. */
35317 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
35318 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
35319 root->web_not_optimizable = 1;
35321 /* If we have permuting loads or stores that are not accompanied
35322 by a register swap, the optimization isn't appropriate. */
35323 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
35325 rtx insn = insn_entry[i].insn;
35326 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35327 df_ref def;
35329 FOR_EACH_INSN_INFO_DEF (def, insn_info)
35331 struct df_link *link = DF_REF_CHAIN (def);
35333 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
35335 root->web_not_optimizable = 1;
35336 break;
35340 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
35342 rtx insn = insn_entry[i].insn;
35343 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35344 df_ref use;
35346 FOR_EACH_INSN_INFO_USE (use, insn_info)
35348 struct df_link *link = DF_REF_CHAIN (use);
35350 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
35352 root->web_not_optimizable = 1;
35353 break;
35359 if (dump_file)
35361 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
35362 dump_swap_insn_table (insn_entry);
35365 /* For each load and store in an optimizable web (which implies
35366 the loads and stores are permuting), find the associated
35367 register swaps and mark them for removal. Due to various
35368 optimizations we may mark the same swap more than once. Also
35369 perform special handling for swappable insns that require it. */
35370 for (i = 0; i < e; ++i)
35371 if ((insn_entry[i].is_load || insn_entry[i].is_store)
35372 && insn_entry[i].is_swap)
35374 swap_web_entry* root_entry
35375 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
35376 if (!root_entry->web_not_optimizable)
35377 mark_swaps_for_removal (insn_entry, i);
35379 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
35381 swap_web_entry* root_entry
35382 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
35383 if (!root_entry->web_not_optimizable)
35384 handle_special_swappables (insn_entry, i);
35387 /* Now delete the swaps marked for removal. */
35388 for (i = 0; i < e; ++i)
35389 if (insn_entry[i].will_delete)
35390 replace_swap_with_copy (insn_entry, i);
35392 /* Clean up. */
35393 free (insn_entry);
35394 return 0;
35397 const pass_data pass_data_analyze_swaps =
35399 RTL_PASS, /* type */
35400 "swaps", /* name */
35401 OPTGROUP_NONE, /* optinfo_flags */
35402 TV_NONE, /* tv_id */
35403 0, /* properties_required */
35404 0, /* properties_provided */
35405 0, /* properties_destroyed */
35406 0, /* todo_flags_start */
35407 TODO_df_finish, /* todo_flags_finish */
35410 class pass_analyze_swaps : public rtl_opt_pass
35412 public:
35413 pass_analyze_swaps(gcc::context *ctxt)
35414 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
35417 /* opt_pass methods: */
35418 virtual bool gate (function *)
35420 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
35421 && rs6000_optimize_swaps);
35424 virtual unsigned int execute (function *fun)
35426 return rs6000_analyze_swaps (fun);
35429 }; // class pass_analyze_swaps
35431 rtl_opt_pass *
35432 make_pass_analyze_swaps (gcc::context *ctxt)
35434 return new pass_analyze_swaps (ctxt);
35437 #ifdef RS6000_GLIBC_ATOMIC_FENV
35438 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
35439 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
35440 #endif
35442 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
35444 static void
35445 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
35447 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
35449 #ifdef RS6000_GLIBC_ATOMIC_FENV
35450 if (atomic_hold_decl == NULL_TREE)
35452 atomic_hold_decl
35453 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35454 get_identifier ("__atomic_feholdexcept"),
35455 build_function_type_list (void_type_node,
35456 double_ptr_type_node,
35457 NULL_TREE));
35458 TREE_PUBLIC (atomic_hold_decl) = 1;
35459 DECL_EXTERNAL (atomic_hold_decl) = 1;
35462 if (atomic_clear_decl == NULL_TREE)
35464 atomic_clear_decl
35465 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35466 get_identifier ("__atomic_feclearexcept"),
35467 build_function_type_list (void_type_node,
35468 NULL_TREE));
35469 TREE_PUBLIC (atomic_clear_decl) = 1;
35470 DECL_EXTERNAL (atomic_clear_decl) = 1;
35473 tree const_double = build_qualified_type (double_type_node,
35474 TYPE_QUAL_CONST);
35475 tree const_double_ptr = build_pointer_type (const_double);
35476 if (atomic_update_decl == NULL_TREE)
35478 atomic_update_decl
35479 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
35480 get_identifier ("__atomic_feupdateenv"),
35481 build_function_type_list (void_type_node,
35482 const_double_ptr,
35483 NULL_TREE));
35484 TREE_PUBLIC (atomic_update_decl) = 1;
35485 DECL_EXTERNAL (atomic_update_decl) = 1;
35488 tree fenv_var = create_tmp_var (double_type_node);
35489 mark_addressable (fenv_var);
35490 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
35492 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
35493 *clear = build_call_expr (atomic_clear_decl, 0);
35494 *update = build_call_expr (atomic_update_decl, 1,
35495 fold_convert (const_double_ptr, fenv_addr));
35496 #endif
35497 return;
35500 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
35501 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
35502 tree call_mffs = build_call_expr (mffs, 0);
35504 /* Generates the equivalent of feholdexcept (&fenv_var)
35506 *fenv_var = __builtin_mffs ();
35507 double fenv_hold;
35508 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
35509 __builtin_mtfsf (0xff, fenv_hold); */
35511 /* Mask to clear everything except for the rounding modes and non-IEEE
35512 arithmetic flag. */
35513 const unsigned HOST_WIDE_INT hold_exception_mask =
35514 HOST_WIDE_INT_C (0xffffffff00000007);
35516 tree fenv_var = create_tmp_var (double_type_node);
35518 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
35520 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
35521 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35522 build_int_cst (uint64_type_node,
35523 hold_exception_mask));
35525 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35526 fenv_llu_and);
35528 tree hold_mtfsf = build_call_expr (mtfsf, 2,
35529 build_int_cst (unsigned_type_node, 0xff),
35530 fenv_hold_mtfsf);
35532 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
35534 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
35536 double fenv_clear = __builtin_mffs ();
35537 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
35538 __builtin_mtfsf (0xff, fenv_clear); */
35540 /* Mask to clear everything except for the rounding modes and non-IEEE
35541 arithmetic flag. */
35542 const unsigned HOST_WIDE_INT clear_exception_mask =
35543 HOST_WIDE_INT_C (0xffffffff00000000);
35545 tree fenv_clear = create_tmp_var (double_type_node);
35547 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
35549 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
35550 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
35551 fenv_clean_llu,
35552 build_int_cst (uint64_type_node,
35553 clear_exception_mask));
35555 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35556 fenv_clear_llu_and);
35558 tree clear_mtfsf = build_call_expr (mtfsf, 2,
35559 build_int_cst (unsigned_type_node, 0xff),
35560 fenv_clear_mtfsf);
35562 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
35564 /* Generates the equivalent of feupdateenv (&fenv_var)
35566 double old_fenv = __builtin_mffs ();
35567 double fenv_update;
35568 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
35569 (*(uint64_t*)fenv_var 0x1ff80fff);
35570 __builtin_mtfsf (0xff, fenv_update); */
35572 const unsigned HOST_WIDE_INT update_exception_mask =
35573 HOST_WIDE_INT_C (0xffffffff1fffff00);
35574 const unsigned HOST_WIDE_INT new_exception_mask =
35575 HOST_WIDE_INT_C (0x1ff80fff);
35577 tree old_fenv = create_tmp_var (double_type_node);
35578 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
35580 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
35581 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
35582 build_int_cst (uint64_type_node,
35583 update_exception_mask));
35585 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
35586 build_int_cst (uint64_type_node,
35587 new_exception_mask));
35589 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
35590 old_llu_and, new_llu_and);
35592 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
35593 new_llu_mask);
35595 tree update_mtfsf = build_call_expr (mtfsf, 2,
35596 build_int_cst (unsigned_type_node, 0xff),
35597 fenv_update_mtfsf);
35599 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35603 struct gcc_target targetm = TARGET_INITIALIZER;
35605 #include "gt-rs6000.h"