libgcc/
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob8a22af629628c0109c09535914bb602619a592a9
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "backend.h"
25 #include "cfghooks.h"
26 #include "tree.h"
27 #include "gimple.h"
28 #include "rtl.h"
29 #include "df.h"
30 #include "regs.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "insn-attr.h"
34 #include "flags.h"
35 #include "recog.h"
36 #include "alias.h"
37 #include "fold-const.h"
38 #include "stringpool.h"
39 #include "stor-layout.h"
40 #include "calls.h"
41 #include "print-tree.h"
42 #include "varasm.h"
43 #include "expmed.h"
44 #include "dojump.h"
45 #include "explow.h"
46 #include "emit-rtl.h"
47 #include "stmt.h"
48 #include "expr.h"
49 #include "insn-codes.h"
50 #include "optabs.h"
51 #include "except.h"
52 #include "output.h"
53 #include "dbxout.h"
54 #include "cfgrtl.h"
55 #include "cfganal.h"
56 #include "lcm.h"
57 #include "cfgbuild.h"
58 #include "cfgcleanup.h"
59 #include "diagnostic-core.h"
60 #include "toplev.h"
61 #include "tm_p.h"
62 #include "target.h"
63 #include "common/common-target.h"
64 #include "langhooks.h"
65 #include "reload.h"
66 #include "cfgloop.h"
67 #include "sched-int.h"
68 #include "internal-fn.h"
69 #include "gimple-fold.h"
70 #include "tree-eh.h"
71 #include "gimplify.h"
72 #include "gimple-iterator.h"
73 #include "gimple-walk.h"
74 #include "intl.h"
75 #include "params.h"
76 #include "tm-constrs.h"
77 #include "ira.h"
78 #include "opts.h"
79 #include "tree-vectorizer.h"
80 #include "dumpfile.h"
81 #include "cgraph.h"
82 #include "target-globals.h"
83 #include "builtins.h"
84 #include "context.h"
85 #include "tree-pass.h"
86 #if TARGET_XCOFF
87 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
88 #endif
89 #if TARGET_MACHO
90 #include "gstab.h" /* for N_SLINE */
91 #endif
93 /* This file should be included last. */
94 #include "target-def.h"
96 #ifndef TARGET_NO_PROTOTYPE
97 #define TARGET_NO_PROTOTYPE 0
98 #endif
100 #define min(A,B) ((A) < (B) ? (A) : (B))
101 #define max(A,B) ((A) > (B) ? (A) : (B))
103 /* Structure used to define the rs6000 stack */
104 typedef struct rs6000_stack {
105 int reload_completed; /* stack info won't change from here on */
106 int first_gp_reg_save; /* first callee saved GP register used */
107 int first_fp_reg_save; /* first callee saved FP register used */
108 int first_altivec_reg_save; /* first callee saved AltiVec register used */
109 int lr_save_p; /* true if the link reg needs to be saved */
110 int cr_save_p; /* true if the CR reg needs to be saved */
111 unsigned int vrsave_mask; /* mask of vec registers to save */
112 int push_p; /* true if we need to allocate stack space */
113 int calls_p; /* true if the function makes any calls */
114 int world_save_p; /* true if we're saving *everything*:
115 r13-r31, cr, f14-f31, vrsave, v20-v31 */
116 enum rs6000_abi abi; /* which ABI to use */
117 int gp_save_offset; /* offset to save GP regs from initial SP */
118 int fp_save_offset; /* offset to save FP regs from initial SP */
119 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
120 int lr_save_offset; /* offset to save LR from initial SP */
121 int cr_save_offset; /* offset to save CR from initial SP */
122 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
123 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
124 int varargs_save_offset; /* offset to save the varargs registers */
125 int ehrd_offset; /* offset to EH return data */
126 int ehcr_offset; /* offset to EH CR field data */
127 int reg_size; /* register size (4 or 8) */
128 HOST_WIDE_INT vars_size; /* variable save area size */
129 int parm_size; /* outgoing parameter size */
130 int save_size; /* save area size */
131 int fixed_size; /* fixed size of stack frame */
132 int gp_size; /* size of saved GP registers */
133 int fp_size; /* size of saved FP registers */
134 int altivec_size; /* size of saved AltiVec registers */
135 int cr_size; /* size to hold CR if not in fixed area */
136 int vrsave_size; /* size to hold VRSAVE */
137 int altivec_padding_size; /* size of altivec alignment padding */
138 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
139 int spe_padding_size;
140 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
141 int spe_64bit_regs_used;
142 int savres_strategy;
143 } rs6000_stack_t;
145 /* A C structure for machine-specific, per-function data.
146 This is added to the cfun structure. */
147 typedef struct GTY(()) machine_function
149 /* Whether the instruction chain has been scanned already. */
150 int insn_chain_scanned_p;
151 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
152 int ra_needs_full_frame;
153 /* Flags if __builtin_return_address (0) was used. */
154 int ra_need_lr;
155 /* Cache lr_save_p after expansion of builtin_eh_return. */
156 int lr_save_state;
157 /* Whether we need to save the TOC to the reserved stack location in the
158 function prologue. */
159 bool save_toc_in_prologue;
160 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
161 varargs save area. */
162 HOST_WIDE_INT varargs_save_offset;
163 /* Temporary stack slot to use for SDmode copies. This slot is
164 64-bits wide and is allocated early enough so that the offset
165 does not overflow the 16-bit load/store offset field. */
166 rtx sdmode_stack_slot;
167 /* Alternative internal arg pointer for -fsplit-stack. */
168 rtx split_stack_arg_pointer;
169 bool split_stack_argp_used;
170 /* Flag if r2 setup is needed with ELFv2 ABI. */
171 bool r2_setup_needed;
172 } machine_function;
174 /* Support targetm.vectorize.builtin_mask_for_load. */
175 static GTY(()) tree altivec_builtin_mask_for_load;
177 /* Set to nonzero once AIX common-mode calls have been defined. */
178 static GTY(()) int common_mode_defined;
180 /* Label number of label created for -mrelocatable, to call to so we can
181 get the address of the GOT section */
182 static int rs6000_pic_labelno;
184 #ifdef USING_ELFOS_H
185 /* Counter for labels which are to be placed in .fixup. */
186 int fixuplabelno = 0;
187 #endif
189 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
190 int dot_symbols;
192 /* Specify the machine mode that pointers have. After generation of rtl, the
193 compiler makes no further distinction between pointers and any other objects
194 of this machine mode. The type is unsigned since not all things that
195 include rs6000.h also include machmode.h. */
196 unsigned rs6000_pmode;
198 /* Width in bits of a pointer. */
199 unsigned rs6000_pointer_size;
201 #ifdef HAVE_AS_GNU_ATTRIBUTE
202 /* Flag whether floating point values have been passed/returned. */
203 static bool rs6000_passes_float;
204 /* Flag whether vector values have been passed/returned. */
205 static bool rs6000_passes_vector;
206 /* Flag whether small (<= 8 byte) structures have been returned. */
207 static bool rs6000_returns_struct;
208 #endif
210 /* Value is TRUE if register/mode pair is acceptable. */
211 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
213 /* Maximum number of registers needed for a given register class and mode. */
214 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
216 /* How many registers are needed for a given register and mode. */
217 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
219 /* Map register number to register class. */
220 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
222 static int dbg_cost_ctrl;
224 /* Built in types. */
225 tree rs6000_builtin_types[RS6000_BTI_MAX];
226 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
228 /* Flag to say the TOC is initialized */
229 int toc_initialized;
230 char toc_label_name[10];
232 /* Cached value of rs6000_variable_issue. This is cached in
233 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
234 static short cached_can_issue_more;
236 static GTY(()) section *read_only_data_section;
237 static GTY(()) section *private_data_section;
238 static GTY(()) section *tls_data_section;
239 static GTY(()) section *tls_private_data_section;
240 static GTY(()) section *read_only_private_data_section;
241 static GTY(()) section *sdata2_section;
242 static GTY(()) section *toc_section;
244 struct builtin_description
246 const HOST_WIDE_INT mask;
247 const enum insn_code icode;
248 const char *const name;
249 const enum rs6000_builtins code;
252 /* Describe the vector unit used for modes. */
253 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
254 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
256 /* Register classes for various constraints that are based on the target
257 switches. */
258 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
260 /* Describe the alignment of a vector. */
261 int rs6000_vector_align[NUM_MACHINE_MODES];
263 /* Map selected modes to types for builtins. */
264 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
266 /* What modes to automatically generate reciprocal divide estimate (fre) and
267 reciprocal sqrt (frsqrte) for. */
268 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
270 /* Masks to determine which reciprocal esitmate instructions to generate
271 automatically. */
272 enum rs6000_recip_mask {
273 RECIP_SF_DIV = 0x001, /* Use divide estimate */
274 RECIP_DF_DIV = 0x002,
275 RECIP_V4SF_DIV = 0x004,
276 RECIP_V2DF_DIV = 0x008,
278 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
279 RECIP_DF_RSQRT = 0x020,
280 RECIP_V4SF_RSQRT = 0x040,
281 RECIP_V2DF_RSQRT = 0x080,
283 /* Various combination of flags for -mrecip=xxx. */
284 RECIP_NONE = 0,
285 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
286 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
287 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
289 RECIP_HIGH_PRECISION = RECIP_ALL,
291 /* On low precision machines like the power5, don't enable double precision
292 reciprocal square root estimate, since it isn't accurate enough. */
293 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
296 /* -mrecip options. */
297 static struct
299 const char *string; /* option name */
300 unsigned int mask; /* mask bits to set */
301 } recip_options[] = {
302 { "all", RECIP_ALL },
303 { "none", RECIP_NONE },
304 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
305 | RECIP_V2DF_DIV) },
306 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
307 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
308 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
309 | RECIP_V2DF_RSQRT) },
310 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
311 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
314 /* Pointer to function (in rs6000-c.c) that can define or undefine target
315 macros that have changed. Languages that don't support the preprocessor
316 don't link in rs6000-c.c, so we can't call it directly. */
317 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
319 /* Simplfy register classes into simpler classifications. We assume
320 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
321 check for standard register classes (gpr/floating/altivec/vsx) and
322 floating/vector classes (float/altivec/vsx). */
324 enum rs6000_reg_type {
325 NO_REG_TYPE,
326 PSEUDO_REG_TYPE,
327 GPR_REG_TYPE,
328 VSX_REG_TYPE,
329 ALTIVEC_REG_TYPE,
330 FPR_REG_TYPE,
331 SPR_REG_TYPE,
332 CR_REG_TYPE,
333 SPE_ACC_TYPE,
334 SPEFSCR_REG_TYPE
337 /* Map register class to register type. */
338 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
340 /* First/last register type for the 'normal' register types (i.e. general
341 purpose, floating point, altivec, and VSX registers). */
342 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
344 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
347 /* Register classes we care about in secondary reload or go if legitimate
348 address. We only need to worry about GPR, FPR, and Altivec registers here,
349 along an ANY field that is the OR of the 3 register classes. */
351 enum rs6000_reload_reg_type {
352 RELOAD_REG_GPR, /* General purpose registers. */
353 RELOAD_REG_FPR, /* Traditional floating point regs. */
354 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
355 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
356 N_RELOAD_REG
359 /* For setting up register classes, loop through the 3 register classes mapping
360 into real registers, and skip the ANY class, which is just an OR of the
361 bits. */
362 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
363 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
365 /* Map reload register type to a register in the register class. */
366 struct reload_reg_map_type {
367 const char *name; /* Register class name. */
368 int reg; /* Register in the register class. */
371 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
372 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
373 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
374 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
375 { "Any", -1 }, /* RELOAD_REG_ANY. */
378 /* Mask bits for each register class, indexed per mode. Historically the
379 compiler has been more restrictive which types can do PRE_MODIFY instead of
380 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
381 typedef unsigned char addr_mask_type;
383 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
384 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
385 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
386 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
387 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
388 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
389 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
391 /* Register type masks based on the type, of valid addressing modes. */
392 struct rs6000_reg_addr {
393 enum insn_code reload_load; /* INSN to reload for loading. */
394 enum insn_code reload_store; /* INSN to reload for storing. */
395 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
396 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
397 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
398 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
399 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
402 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
404 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
405 static inline bool
406 mode_supports_pre_incdec_p (machine_mode mode)
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
409 != 0);
412 /* Helper function to say whether a mode supports PRE_MODIFY. */
413 static inline bool
414 mode_supports_pre_modify_p (machine_mode mode)
416 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
417 != 0);
421 /* Target cpu costs. */
423 struct processor_costs {
424 const int mulsi; /* cost of SImode multiplication. */
425 const int mulsi_const; /* cost of SImode multiplication by constant. */
426 const int mulsi_const9; /* cost of SImode mult by short constant. */
427 const int muldi; /* cost of DImode multiplication. */
428 const int divsi; /* cost of SImode division. */
429 const int divdi; /* cost of DImode division. */
430 const int fp; /* cost of simple SFmode and DFmode insns. */
431 const int dmul; /* cost of DFmode multiplication (and fmadd). */
432 const int sdiv; /* cost of SFmode division (fdivs). */
433 const int ddiv; /* cost of DFmode division (fdiv). */
434 const int cache_line_size; /* cache line size in bytes. */
435 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
436 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
437 const int simultaneous_prefetches; /* number of parallel prefetch
438 operations. */
439 const int sfdf_convert; /* cost of SF->DF conversion. */
442 const struct processor_costs *rs6000_cost;
444 /* Processor costs (relative to an add) */
446 /* Instruction size costs on 32bit processors. */
447 static const
448 struct processor_costs size32_cost = {
449 COSTS_N_INSNS (1), /* mulsi */
450 COSTS_N_INSNS (1), /* mulsi_const */
451 COSTS_N_INSNS (1), /* mulsi_const9 */
452 COSTS_N_INSNS (1), /* muldi */
453 COSTS_N_INSNS (1), /* divsi */
454 COSTS_N_INSNS (1), /* divdi */
455 COSTS_N_INSNS (1), /* fp */
456 COSTS_N_INSNS (1), /* dmul */
457 COSTS_N_INSNS (1), /* sdiv */
458 COSTS_N_INSNS (1), /* ddiv */
459 32, /* cache line size */
460 0, /* l1 cache */
461 0, /* l2 cache */
462 0, /* streams */
463 0, /* SF->DF convert */
466 /* Instruction size costs on 64bit processors. */
467 static const
468 struct processor_costs size64_cost = {
469 COSTS_N_INSNS (1), /* mulsi */
470 COSTS_N_INSNS (1), /* mulsi_const */
471 COSTS_N_INSNS (1), /* mulsi_const9 */
472 COSTS_N_INSNS (1), /* muldi */
473 COSTS_N_INSNS (1), /* divsi */
474 COSTS_N_INSNS (1), /* divdi */
475 COSTS_N_INSNS (1), /* fp */
476 COSTS_N_INSNS (1), /* dmul */
477 COSTS_N_INSNS (1), /* sdiv */
478 COSTS_N_INSNS (1), /* ddiv */
479 128, /* cache line size */
480 0, /* l1 cache */
481 0, /* l2 cache */
482 0, /* streams */
483 0, /* SF->DF convert */
486 /* Instruction costs on RS64A processors. */
487 static const
488 struct processor_costs rs64a_cost = {
489 COSTS_N_INSNS (20), /* mulsi */
490 COSTS_N_INSNS (12), /* mulsi_const */
491 COSTS_N_INSNS (8), /* mulsi_const9 */
492 COSTS_N_INSNS (34), /* muldi */
493 COSTS_N_INSNS (65), /* divsi */
494 COSTS_N_INSNS (67), /* divdi */
495 COSTS_N_INSNS (4), /* fp */
496 COSTS_N_INSNS (4), /* dmul */
497 COSTS_N_INSNS (31), /* sdiv */
498 COSTS_N_INSNS (31), /* ddiv */
499 128, /* cache line size */
500 128, /* l1 cache */
501 2048, /* l2 cache */
502 1, /* streams */
503 0, /* SF->DF convert */
506 /* Instruction costs on MPCCORE processors. */
507 static const
508 struct processor_costs mpccore_cost = {
509 COSTS_N_INSNS (2), /* mulsi */
510 COSTS_N_INSNS (2), /* mulsi_const */
511 COSTS_N_INSNS (2), /* mulsi_const9 */
512 COSTS_N_INSNS (2), /* muldi */
513 COSTS_N_INSNS (6), /* divsi */
514 COSTS_N_INSNS (6), /* divdi */
515 COSTS_N_INSNS (4), /* fp */
516 COSTS_N_INSNS (5), /* dmul */
517 COSTS_N_INSNS (10), /* sdiv */
518 COSTS_N_INSNS (17), /* ddiv */
519 32, /* cache line size */
520 4, /* l1 cache */
521 16, /* l2 cache */
522 1, /* streams */
523 0, /* SF->DF convert */
526 /* Instruction costs on PPC403 processors. */
527 static const
528 struct processor_costs ppc403_cost = {
529 COSTS_N_INSNS (4), /* mulsi */
530 COSTS_N_INSNS (4), /* mulsi_const */
531 COSTS_N_INSNS (4), /* mulsi_const9 */
532 COSTS_N_INSNS (4), /* muldi */
533 COSTS_N_INSNS (33), /* divsi */
534 COSTS_N_INSNS (33), /* divdi */
535 COSTS_N_INSNS (11), /* fp */
536 COSTS_N_INSNS (11), /* dmul */
537 COSTS_N_INSNS (11), /* sdiv */
538 COSTS_N_INSNS (11), /* ddiv */
539 32, /* cache line size */
540 4, /* l1 cache */
541 16, /* l2 cache */
542 1, /* streams */
543 0, /* SF->DF convert */
546 /* Instruction costs on PPC405 processors. */
547 static const
548 struct processor_costs ppc405_cost = {
549 COSTS_N_INSNS (5), /* mulsi */
550 COSTS_N_INSNS (4), /* mulsi_const */
551 COSTS_N_INSNS (3), /* mulsi_const9 */
552 COSTS_N_INSNS (5), /* muldi */
553 COSTS_N_INSNS (35), /* divsi */
554 COSTS_N_INSNS (35), /* divdi */
555 COSTS_N_INSNS (11), /* fp */
556 COSTS_N_INSNS (11), /* dmul */
557 COSTS_N_INSNS (11), /* sdiv */
558 COSTS_N_INSNS (11), /* ddiv */
559 32, /* cache line size */
560 16, /* l1 cache */
561 128, /* l2 cache */
562 1, /* streams */
563 0, /* SF->DF convert */
566 /* Instruction costs on PPC440 processors. */
567 static const
568 struct processor_costs ppc440_cost = {
569 COSTS_N_INSNS (3), /* mulsi */
570 COSTS_N_INSNS (2), /* mulsi_const */
571 COSTS_N_INSNS (2), /* mulsi_const9 */
572 COSTS_N_INSNS (3), /* muldi */
573 COSTS_N_INSNS (34), /* divsi */
574 COSTS_N_INSNS (34), /* divdi */
575 COSTS_N_INSNS (5), /* fp */
576 COSTS_N_INSNS (5), /* dmul */
577 COSTS_N_INSNS (19), /* sdiv */
578 COSTS_N_INSNS (33), /* ddiv */
579 32, /* cache line size */
580 32, /* l1 cache */
581 256, /* l2 cache */
582 1, /* streams */
583 0, /* SF->DF convert */
586 /* Instruction costs on PPC476 processors. */
587 static const
588 struct processor_costs ppc476_cost = {
589 COSTS_N_INSNS (4), /* mulsi */
590 COSTS_N_INSNS (4), /* mulsi_const */
591 COSTS_N_INSNS (4), /* mulsi_const9 */
592 COSTS_N_INSNS (4), /* muldi */
593 COSTS_N_INSNS (11), /* divsi */
594 COSTS_N_INSNS (11), /* divdi */
595 COSTS_N_INSNS (6), /* fp */
596 COSTS_N_INSNS (6), /* dmul */
597 COSTS_N_INSNS (19), /* sdiv */
598 COSTS_N_INSNS (33), /* ddiv */
599 32, /* l1 cache line size */
600 32, /* l1 cache */
601 512, /* l2 cache */
602 1, /* streams */
603 0, /* SF->DF convert */
606 /* Instruction costs on PPC601 processors. */
607 static const
608 struct processor_costs ppc601_cost = {
609 COSTS_N_INSNS (5), /* mulsi */
610 COSTS_N_INSNS (5), /* mulsi_const */
611 COSTS_N_INSNS (5), /* mulsi_const9 */
612 COSTS_N_INSNS (5), /* muldi */
613 COSTS_N_INSNS (36), /* divsi */
614 COSTS_N_INSNS (36), /* divdi */
615 COSTS_N_INSNS (4), /* fp */
616 COSTS_N_INSNS (5), /* dmul */
617 COSTS_N_INSNS (17), /* sdiv */
618 COSTS_N_INSNS (31), /* ddiv */
619 32, /* cache line size */
620 32, /* l1 cache */
621 256, /* l2 cache */
622 1, /* streams */
623 0, /* SF->DF convert */
626 /* Instruction costs on PPC603 processors. */
627 static const
628 struct processor_costs ppc603_cost = {
629 COSTS_N_INSNS (5), /* mulsi */
630 COSTS_N_INSNS (3), /* mulsi_const */
631 COSTS_N_INSNS (2), /* mulsi_const9 */
632 COSTS_N_INSNS (5), /* muldi */
633 COSTS_N_INSNS (37), /* divsi */
634 COSTS_N_INSNS (37), /* divdi */
635 COSTS_N_INSNS (3), /* fp */
636 COSTS_N_INSNS (4), /* dmul */
637 COSTS_N_INSNS (18), /* sdiv */
638 COSTS_N_INSNS (33), /* ddiv */
639 32, /* cache line size */
640 8, /* l1 cache */
641 64, /* l2 cache */
642 1, /* streams */
643 0, /* SF->DF convert */
646 /* Instruction costs on PPC604 processors. */
647 static const
648 struct processor_costs ppc604_cost = {
649 COSTS_N_INSNS (4), /* mulsi */
650 COSTS_N_INSNS (4), /* mulsi_const */
651 COSTS_N_INSNS (4), /* mulsi_const9 */
652 COSTS_N_INSNS (4), /* muldi */
653 COSTS_N_INSNS (20), /* divsi */
654 COSTS_N_INSNS (20), /* divdi */
655 COSTS_N_INSNS (3), /* fp */
656 COSTS_N_INSNS (3), /* dmul */
657 COSTS_N_INSNS (18), /* sdiv */
658 COSTS_N_INSNS (32), /* ddiv */
659 32, /* cache line size */
660 16, /* l1 cache */
661 512, /* l2 cache */
662 1, /* streams */
663 0, /* SF->DF convert */
666 /* Instruction costs on PPC604e processors. */
667 static const
668 struct processor_costs ppc604e_cost = {
669 COSTS_N_INSNS (2), /* mulsi */
670 COSTS_N_INSNS (2), /* mulsi_const */
671 COSTS_N_INSNS (2), /* mulsi_const9 */
672 COSTS_N_INSNS (2), /* muldi */
673 COSTS_N_INSNS (20), /* divsi */
674 COSTS_N_INSNS (20), /* divdi */
675 COSTS_N_INSNS (3), /* fp */
676 COSTS_N_INSNS (3), /* dmul */
677 COSTS_N_INSNS (18), /* sdiv */
678 COSTS_N_INSNS (32), /* ddiv */
679 32, /* cache line size */
680 32, /* l1 cache */
681 1024, /* l2 cache */
682 1, /* streams */
683 0, /* SF->DF convert */
686 /* Instruction costs on PPC620 processors. */
687 static const
688 struct processor_costs ppc620_cost = {
689 COSTS_N_INSNS (5), /* mulsi */
690 COSTS_N_INSNS (4), /* mulsi_const */
691 COSTS_N_INSNS (3), /* mulsi_const9 */
692 COSTS_N_INSNS (7), /* muldi */
693 COSTS_N_INSNS (21), /* divsi */
694 COSTS_N_INSNS (37), /* divdi */
695 COSTS_N_INSNS (3), /* fp */
696 COSTS_N_INSNS (3), /* dmul */
697 COSTS_N_INSNS (18), /* sdiv */
698 COSTS_N_INSNS (32), /* ddiv */
699 128, /* cache line size */
700 32, /* l1 cache */
701 1024, /* l2 cache */
702 1, /* streams */
703 0, /* SF->DF convert */
706 /* Instruction costs on PPC630 processors. */
707 static const
708 struct processor_costs ppc630_cost = {
709 COSTS_N_INSNS (5), /* mulsi */
710 COSTS_N_INSNS (4), /* mulsi_const */
711 COSTS_N_INSNS (3), /* mulsi_const9 */
712 COSTS_N_INSNS (7), /* muldi */
713 COSTS_N_INSNS (21), /* divsi */
714 COSTS_N_INSNS (37), /* divdi */
715 COSTS_N_INSNS (3), /* fp */
716 COSTS_N_INSNS (3), /* dmul */
717 COSTS_N_INSNS (17), /* sdiv */
718 COSTS_N_INSNS (21), /* ddiv */
719 128, /* cache line size */
720 64, /* l1 cache */
721 1024, /* l2 cache */
722 1, /* streams */
723 0, /* SF->DF convert */
726 /* Instruction costs on Cell processor. */
727 /* COSTS_N_INSNS (1) ~ one add. */
728 static const
729 struct processor_costs ppccell_cost = {
730 COSTS_N_INSNS (9/2)+2, /* mulsi */
731 COSTS_N_INSNS (6/2), /* mulsi_const */
732 COSTS_N_INSNS (6/2), /* mulsi_const9 */
733 COSTS_N_INSNS (15/2)+2, /* muldi */
734 COSTS_N_INSNS (38/2), /* divsi */
735 COSTS_N_INSNS (70/2), /* divdi */
736 COSTS_N_INSNS (10/2), /* fp */
737 COSTS_N_INSNS (10/2), /* dmul */
738 COSTS_N_INSNS (74/2), /* sdiv */
739 COSTS_N_INSNS (74/2), /* ddiv */
740 128, /* cache line size */
741 32, /* l1 cache */
742 512, /* l2 cache */
743 6, /* streams */
744 0, /* SF->DF convert */
747 /* Instruction costs on PPC750 and PPC7400 processors. */
748 static const
749 struct processor_costs ppc750_cost = {
750 COSTS_N_INSNS (5), /* mulsi */
751 COSTS_N_INSNS (3), /* mulsi_const */
752 COSTS_N_INSNS (2), /* mulsi_const9 */
753 COSTS_N_INSNS (5), /* muldi */
754 COSTS_N_INSNS (17), /* divsi */
755 COSTS_N_INSNS (17), /* divdi */
756 COSTS_N_INSNS (3), /* fp */
757 COSTS_N_INSNS (3), /* dmul */
758 COSTS_N_INSNS (17), /* sdiv */
759 COSTS_N_INSNS (31), /* ddiv */
760 32, /* cache line size */
761 32, /* l1 cache */
762 512, /* l2 cache */
763 1, /* streams */
764 0, /* SF->DF convert */
767 /* Instruction costs on PPC7450 processors. */
768 static const
769 struct processor_costs ppc7450_cost = {
770 COSTS_N_INSNS (4), /* mulsi */
771 COSTS_N_INSNS (3), /* mulsi_const */
772 COSTS_N_INSNS (3), /* mulsi_const9 */
773 COSTS_N_INSNS (4), /* muldi */
774 COSTS_N_INSNS (23), /* divsi */
775 COSTS_N_INSNS (23), /* divdi */
776 COSTS_N_INSNS (5), /* fp */
777 COSTS_N_INSNS (5), /* dmul */
778 COSTS_N_INSNS (21), /* sdiv */
779 COSTS_N_INSNS (35), /* ddiv */
780 32, /* cache line size */
781 32, /* l1 cache */
782 1024, /* l2 cache */
783 1, /* streams */
784 0, /* SF->DF convert */
787 /* Instruction costs on PPC8540 processors. */
788 static const
789 struct processor_costs ppc8540_cost = {
790 COSTS_N_INSNS (4), /* mulsi */
791 COSTS_N_INSNS (4), /* mulsi_const */
792 COSTS_N_INSNS (4), /* mulsi_const9 */
793 COSTS_N_INSNS (4), /* muldi */
794 COSTS_N_INSNS (19), /* divsi */
795 COSTS_N_INSNS (19), /* divdi */
796 COSTS_N_INSNS (4), /* fp */
797 COSTS_N_INSNS (4), /* dmul */
798 COSTS_N_INSNS (29), /* sdiv */
799 COSTS_N_INSNS (29), /* ddiv */
800 32, /* cache line size */
801 32, /* l1 cache */
802 256, /* l2 cache */
803 1, /* prefetch streams /*/
804 0, /* SF->DF convert */
807 /* Instruction costs on E300C2 and E300C3 cores. */
808 static const
809 struct processor_costs ppce300c2c3_cost = {
810 COSTS_N_INSNS (4), /* mulsi */
811 COSTS_N_INSNS (4), /* mulsi_const */
812 COSTS_N_INSNS (4), /* mulsi_const9 */
813 COSTS_N_INSNS (4), /* muldi */
814 COSTS_N_INSNS (19), /* divsi */
815 COSTS_N_INSNS (19), /* divdi */
816 COSTS_N_INSNS (3), /* fp */
817 COSTS_N_INSNS (4), /* dmul */
818 COSTS_N_INSNS (18), /* sdiv */
819 COSTS_N_INSNS (33), /* ddiv */
821 16, /* l1 cache */
822 16, /* l2 cache */
823 1, /* prefetch streams /*/
824 0, /* SF->DF convert */
827 /* Instruction costs on PPCE500MC processors. */
828 static const
829 struct processor_costs ppce500mc_cost = {
830 COSTS_N_INSNS (4), /* mulsi */
831 COSTS_N_INSNS (4), /* mulsi_const */
832 COSTS_N_INSNS (4), /* mulsi_const9 */
833 COSTS_N_INSNS (4), /* muldi */
834 COSTS_N_INSNS (14), /* divsi */
835 COSTS_N_INSNS (14), /* divdi */
836 COSTS_N_INSNS (8), /* fp */
837 COSTS_N_INSNS (10), /* dmul */
838 COSTS_N_INSNS (36), /* sdiv */
839 COSTS_N_INSNS (66), /* ddiv */
840 64, /* cache line size */
841 32, /* l1 cache */
842 128, /* l2 cache */
843 1, /* prefetch streams /*/
844 0, /* SF->DF convert */
847 /* Instruction costs on PPCE500MC64 processors. */
848 static const
849 struct processor_costs ppce500mc64_cost = {
850 COSTS_N_INSNS (4), /* mulsi */
851 COSTS_N_INSNS (4), /* mulsi_const */
852 COSTS_N_INSNS (4), /* mulsi_const9 */
853 COSTS_N_INSNS (4), /* muldi */
854 COSTS_N_INSNS (14), /* divsi */
855 COSTS_N_INSNS (14), /* divdi */
856 COSTS_N_INSNS (4), /* fp */
857 COSTS_N_INSNS (10), /* dmul */
858 COSTS_N_INSNS (36), /* sdiv */
859 COSTS_N_INSNS (66), /* ddiv */
860 64, /* cache line size */
861 32, /* l1 cache */
862 128, /* l2 cache */
863 1, /* prefetch streams /*/
864 0, /* SF->DF convert */
867 /* Instruction costs on PPCE5500 processors. */
868 static const
869 struct processor_costs ppce5500_cost = {
870 COSTS_N_INSNS (5), /* mulsi */
871 COSTS_N_INSNS (5), /* mulsi_const */
872 COSTS_N_INSNS (4), /* mulsi_const9 */
873 COSTS_N_INSNS (5), /* muldi */
874 COSTS_N_INSNS (14), /* divsi */
875 COSTS_N_INSNS (14), /* divdi */
876 COSTS_N_INSNS (7), /* fp */
877 COSTS_N_INSNS (10), /* dmul */
878 COSTS_N_INSNS (36), /* sdiv */
879 COSTS_N_INSNS (66), /* ddiv */
880 64, /* cache line size */
881 32, /* l1 cache */
882 128, /* l2 cache */
883 1, /* prefetch streams /*/
884 0, /* SF->DF convert */
887 /* Instruction costs on PPCE6500 processors. */
888 static const
889 struct processor_costs ppce6500_cost = {
890 COSTS_N_INSNS (5), /* mulsi */
891 COSTS_N_INSNS (5), /* mulsi_const */
892 COSTS_N_INSNS (4), /* mulsi_const9 */
893 COSTS_N_INSNS (5), /* muldi */
894 COSTS_N_INSNS (14), /* divsi */
895 COSTS_N_INSNS (14), /* divdi */
896 COSTS_N_INSNS (7), /* fp */
897 COSTS_N_INSNS (10), /* dmul */
898 COSTS_N_INSNS (36), /* sdiv */
899 COSTS_N_INSNS (66), /* ddiv */
900 64, /* cache line size */
901 32, /* l1 cache */
902 128, /* l2 cache */
903 1, /* prefetch streams /*/
904 0, /* SF->DF convert */
907 /* Instruction costs on AppliedMicro Titan processors. */
908 static const
909 struct processor_costs titan_cost = {
910 COSTS_N_INSNS (5), /* mulsi */
911 COSTS_N_INSNS (5), /* mulsi_const */
912 COSTS_N_INSNS (5), /* mulsi_const9 */
913 COSTS_N_INSNS (5), /* muldi */
914 COSTS_N_INSNS (18), /* divsi */
915 COSTS_N_INSNS (18), /* divdi */
916 COSTS_N_INSNS (10), /* fp */
917 COSTS_N_INSNS (10), /* dmul */
918 COSTS_N_INSNS (46), /* sdiv */
919 COSTS_N_INSNS (72), /* ddiv */
920 32, /* cache line size */
921 32, /* l1 cache */
922 512, /* l2 cache */
923 1, /* prefetch streams /*/
924 0, /* SF->DF convert */
927 /* Instruction costs on POWER4 and POWER5 processors. */
928 static const
929 struct processor_costs power4_cost = {
930 COSTS_N_INSNS (3), /* mulsi */
931 COSTS_N_INSNS (2), /* mulsi_const */
932 COSTS_N_INSNS (2), /* mulsi_const9 */
933 COSTS_N_INSNS (4), /* muldi */
934 COSTS_N_INSNS (18), /* divsi */
935 COSTS_N_INSNS (34), /* divdi */
936 COSTS_N_INSNS (3), /* fp */
937 COSTS_N_INSNS (3), /* dmul */
938 COSTS_N_INSNS (17), /* sdiv */
939 COSTS_N_INSNS (17), /* ddiv */
940 128, /* cache line size */
941 32, /* l1 cache */
942 1024, /* l2 cache */
943 8, /* prefetch streams /*/
944 0, /* SF->DF convert */
947 /* Instruction costs on POWER6 processors. */
948 static const
949 struct processor_costs power6_cost = {
950 COSTS_N_INSNS (8), /* mulsi */
951 COSTS_N_INSNS (8), /* mulsi_const */
952 COSTS_N_INSNS (8), /* mulsi_const9 */
953 COSTS_N_INSNS (8), /* muldi */
954 COSTS_N_INSNS (22), /* divsi */
955 COSTS_N_INSNS (28), /* divdi */
956 COSTS_N_INSNS (3), /* fp */
957 COSTS_N_INSNS (3), /* dmul */
958 COSTS_N_INSNS (13), /* sdiv */
959 COSTS_N_INSNS (16), /* ddiv */
960 128, /* cache line size */
961 64, /* l1 cache */
962 2048, /* l2 cache */
963 16, /* prefetch streams */
964 0, /* SF->DF convert */
967 /* Instruction costs on POWER7 processors. */
968 static const
969 struct processor_costs power7_cost = {
970 COSTS_N_INSNS (2), /* mulsi */
971 COSTS_N_INSNS (2), /* mulsi_const */
972 COSTS_N_INSNS (2), /* mulsi_const9 */
973 COSTS_N_INSNS (2), /* muldi */
974 COSTS_N_INSNS (18), /* divsi */
975 COSTS_N_INSNS (34), /* divdi */
976 COSTS_N_INSNS (3), /* fp */
977 COSTS_N_INSNS (3), /* dmul */
978 COSTS_N_INSNS (13), /* sdiv */
979 COSTS_N_INSNS (16), /* ddiv */
980 128, /* cache line size */
981 32, /* l1 cache */
982 256, /* l2 cache */
983 12, /* prefetch streams */
984 COSTS_N_INSNS (3), /* SF->DF convert */
987 /* Instruction costs on POWER8 processors. */
988 static const
989 struct processor_costs power8_cost = {
990 COSTS_N_INSNS (3), /* mulsi */
991 COSTS_N_INSNS (3), /* mulsi_const */
992 COSTS_N_INSNS (3), /* mulsi_const9 */
993 COSTS_N_INSNS (3), /* muldi */
994 COSTS_N_INSNS (19), /* divsi */
995 COSTS_N_INSNS (35), /* divdi */
996 COSTS_N_INSNS (3), /* fp */
997 COSTS_N_INSNS (3), /* dmul */
998 COSTS_N_INSNS (14), /* sdiv */
999 COSTS_N_INSNS (17), /* ddiv */
1000 128, /* cache line size */
1001 32, /* l1 cache */
1002 256, /* l2 cache */
1003 12, /* prefetch streams */
1004 COSTS_N_INSNS (3), /* SF->DF convert */
1007 /* Instruction costs on POWER A2 processors. */
1008 static const
1009 struct processor_costs ppca2_cost = {
1010 COSTS_N_INSNS (16), /* mulsi */
1011 COSTS_N_INSNS (16), /* mulsi_const */
1012 COSTS_N_INSNS (16), /* mulsi_const9 */
1013 COSTS_N_INSNS (16), /* muldi */
1014 COSTS_N_INSNS (22), /* divsi */
1015 COSTS_N_INSNS (28), /* divdi */
1016 COSTS_N_INSNS (3), /* fp */
1017 COSTS_N_INSNS (3), /* dmul */
1018 COSTS_N_INSNS (59), /* sdiv */
1019 COSTS_N_INSNS (72), /* ddiv */
1021 16, /* l1 cache */
1022 2048, /* l2 cache */
1023 16, /* prefetch streams */
1024 0, /* SF->DF convert */
1028 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1029 #undef RS6000_BUILTIN_1
1030 #undef RS6000_BUILTIN_2
1031 #undef RS6000_BUILTIN_3
1032 #undef RS6000_BUILTIN_A
1033 #undef RS6000_BUILTIN_D
1034 #undef RS6000_BUILTIN_E
1035 #undef RS6000_BUILTIN_H
1036 #undef RS6000_BUILTIN_P
1037 #undef RS6000_BUILTIN_Q
1038 #undef RS6000_BUILTIN_S
1039 #undef RS6000_BUILTIN_X
1041 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1042 { NAME, ICODE, MASK, ATTR },
1044 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1045 { NAME, ICODE, MASK, ATTR },
1047 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1048 { NAME, ICODE, MASK, ATTR },
1050 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1051 { NAME, ICODE, MASK, ATTR },
1053 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1054 { NAME, ICODE, MASK, ATTR },
1056 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1057 { NAME, ICODE, MASK, ATTR },
1059 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1060 { NAME, ICODE, MASK, ATTR },
1062 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1063 { NAME, ICODE, MASK, ATTR },
1065 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1066 { NAME, ICODE, MASK, ATTR },
1068 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1069 { NAME, ICODE, MASK, ATTR },
1071 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1072 { NAME, ICODE, MASK, ATTR },
1074 struct rs6000_builtin_info_type {
1075 const char *name;
1076 const enum insn_code icode;
1077 const HOST_WIDE_INT mask;
1078 const unsigned attr;
1081 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1083 #include "rs6000-builtin.def"
1086 #undef RS6000_BUILTIN_1
1087 #undef RS6000_BUILTIN_2
1088 #undef RS6000_BUILTIN_3
1089 #undef RS6000_BUILTIN_A
1090 #undef RS6000_BUILTIN_D
1091 #undef RS6000_BUILTIN_E
1092 #undef RS6000_BUILTIN_H
1093 #undef RS6000_BUILTIN_P
1094 #undef RS6000_BUILTIN_Q
1095 #undef RS6000_BUILTIN_S
1096 #undef RS6000_BUILTIN_X
1098 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1099 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1102 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1103 static bool spe_func_has_64bit_regs_p (void);
1104 static struct machine_function * rs6000_init_machine_status (void);
1105 static int rs6000_ra_ever_killed (void);
1106 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1107 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1108 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1109 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1110 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1111 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1112 static bool rs6000_debug_rtx_costs (rtx, machine_mode, int, int, int *, bool);
1113 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1114 bool);
1115 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1116 static bool is_microcoded_insn (rtx_insn *);
1117 static bool is_nonpipeline_insn (rtx_insn *);
1118 static bool is_cracked_insn (rtx_insn *);
1119 static bool is_load_insn (rtx, rtx *);
1120 static bool is_store_insn (rtx, rtx *);
1121 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1122 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1123 static bool insn_must_be_first_in_group (rtx_insn *);
1124 static bool insn_must_be_last_in_group (rtx_insn *);
1125 static void altivec_init_builtins (void);
1126 static tree builtin_function_type (machine_mode, machine_mode,
1127 machine_mode, machine_mode,
1128 enum rs6000_builtins, const char *name);
1129 static void rs6000_common_init_builtins (void);
1130 static void paired_init_builtins (void);
1131 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1132 static void spe_init_builtins (void);
1133 static void htm_init_builtins (void);
1134 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1135 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1136 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1137 static rs6000_stack_t *rs6000_stack_info (void);
1138 static void is_altivec_return_reg (rtx, void *);
1139 int easy_vector_constant (rtx, machine_mode);
1140 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1141 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1142 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1143 bool, bool);
1144 #if TARGET_MACHO
1145 static void macho_branch_islands (void);
1146 #endif
1147 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1148 int, int *);
1149 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1150 int, int, int *);
1151 static bool rs6000_mode_dependent_address (const_rtx);
1152 static bool rs6000_debug_mode_dependent_address (const_rtx);
1153 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1154 machine_mode, rtx);
1155 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1156 machine_mode,
1157 rtx);
1158 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1159 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1160 enum reg_class);
1161 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1162 machine_mode);
1163 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1164 enum reg_class,
1165 machine_mode);
1166 static bool rs6000_cannot_change_mode_class (machine_mode,
1167 machine_mode,
1168 enum reg_class);
1169 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1170 machine_mode,
1171 enum reg_class);
1172 static bool rs6000_save_toc_in_prologue_p (void);
1173 static rtx rs6000_internal_arg_pointer (void);
1175 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1176 int, int *)
1177 = rs6000_legitimize_reload_address;
1179 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1180 = rs6000_mode_dependent_address;
1182 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1183 machine_mode, rtx)
1184 = rs6000_secondary_reload_class;
1186 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1187 = rs6000_preferred_reload_class;
1189 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1190 machine_mode)
1191 = rs6000_secondary_memory_needed;
1193 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1194 machine_mode,
1195 enum reg_class)
1196 = rs6000_cannot_change_mode_class;
1198 const int INSN_NOT_AVAILABLE = -1;
1200 static void rs6000_print_isa_options (FILE *, int, const char *,
1201 HOST_WIDE_INT);
1202 static void rs6000_print_builtin_options (FILE *, int, const char *,
1203 HOST_WIDE_INT);
1205 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1206 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1207 enum rs6000_reg_type,
1208 machine_mode,
1209 secondary_reload_info *,
1210 bool);
1211 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1213 /* Hash table stuff for keeping track of TOC entries. */
1215 struct GTY((for_user)) toc_hash_struct
1217 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1218 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1219 rtx key;
1220 machine_mode key_mode;
1221 int labelno;
1224 struct toc_hasher : ggc_ptr_hash<toc_hash_struct>
1226 static hashval_t hash (toc_hash_struct *);
1227 static bool equal (toc_hash_struct *, toc_hash_struct *);
1230 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1232 /* Hash table to keep track of the argument types for builtin functions. */
1234 struct GTY((for_user)) builtin_hash_struct
1236 tree type;
1237 machine_mode mode[4]; /* return value + 3 arguments. */
1238 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1241 struct builtin_hasher : ggc_ptr_hash<builtin_hash_struct>
1243 static hashval_t hash (builtin_hash_struct *);
1244 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1247 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1250 /* Default register names. */
1251 char rs6000_reg_names[][8] =
1253 "0", "1", "2", "3", "4", "5", "6", "7",
1254 "8", "9", "10", "11", "12", "13", "14", "15",
1255 "16", "17", "18", "19", "20", "21", "22", "23",
1256 "24", "25", "26", "27", "28", "29", "30", "31",
1257 "0", "1", "2", "3", "4", "5", "6", "7",
1258 "8", "9", "10", "11", "12", "13", "14", "15",
1259 "16", "17", "18", "19", "20", "21", "22", "23",
1260 "24", "25", "26", "27", "28", "29", "30", "31",
1261 "mq", "lr", "ctr","ap",
1262 "0", "1", "2", "3", "4", "5", "6", "7",
1263 "ca",
1264 /* AltiVec registers. */
1265 "0", "1", "2", "3", "4", "5", "6", "7",
1266 "8", "9", "10", "11", "12", "13", "14", "15",
1267 "16", "17", "18", "19", "20", "21", "22", "23",
1268 "24", "25", "26", "27", "28", "29", "30", "31",
1269 "vrsave", "vscr",
1270 /* SPE registers. */
1271 "spe_acc", "spefscr",
1272 /* Soft frame pointer. */
1273 "sfp",
1274 /* HTM SPR registers. */
1275 "tfhar", "tfiar", "texasr",
1276 /* SPE High registers. */
1277 "0", "1", "2", "3", "4", "5", "6", "7",
1278 "8", "9", "10", "11", "12", "13", "14", "15",
1279 "16", "17", "18", "19", "20", "21", "22", "23",
1280 "24", "25", "26", "27", "28", "29", "30", "31"
1283 #ifdef TARGET_REGNAMES
1284 static const char alt_reg_names[][8] =
1286 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1287 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1288 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1289 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1290 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1291 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1292 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1293 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1294 "mq", "lr", "ctr", "ap",
1295 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1296 "ca",
1297 /* AltiVec registers. */
1298 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1299 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1300 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1301 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1302 "vrsave", "vscr",
1303 /* SPE registers. */
1304 "spe_acc", "spefscr",
1305 /* Soft frame pointer. */
1306 "sfp",
1307 /* HTM SPR registers. */
1308 "tfhar", "tfiar", "texasr",
1309 /* SPE High registers. */
1310 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1311 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1312 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1313 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1315 #endif
1317 /* Table of valid machine attributes. */
1319 static const struct attribute_spec rs6000_attribute_table[] =
1321 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1322 affects_type_identity } */
1323 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1324 false },
1325 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1326 false },
1327 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1328 false },
1329 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1330 false },
1331 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1332 false },
1333 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1334 SUBTARGET_ATTRIBUTE_TABLE,
1335 #endif
1336 { NULL, 0, 0, false, false, false, NULL, false }
1339 #ifndef TARGET_PROFILE_KERNEL
1340 #define TARGET_PROFILE_KERNEL 0
1341 #endif
1343 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1344 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1346 /* Initialize the GCC target structure. */
1347 #undef TARGET_ATTRIBUTE_TABLE
1348 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1349 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1350 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1351 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1352 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1354 #undef TARGET_ASM_ALIGNED_DI_OP
1355 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1357 /* Default unaligned ops are only provided for ELF. Find the ops needed
1358 for non-ELF systems. */
1359 #ifndef OBJECT_FORMAT_ELF
1360 #if TARGET_XCOFF
1361 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1362 64-bit targets. */
1363 #undef TARGET_ASM_UNALIGNED_HI_OP
1364 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1365 #undef TARGET_ASM_UNALIGNED_SI_OP
1366 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1367 #undef TARGET_ASM_UNALIGNED_DI_OP
1368 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1369 #else
1370 /* For Darwin. */
1371 #undef TARGET_ASM_UNALIGNED_HI_OP
1372 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1373 #undef TARGET_ASM_UNALIGNED_SI_OP
1374 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1375 #undef TARGET_ASM_UNALIGNED_DI_OP
1376 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1377 #undef TARGET_ASM_ALIGNED_DI_OP
1378 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1379 #endif
1380 #endif
1382 /* This hook deals with fixups for relocatable code and DI-mode objects
1383 in 64-bit code. */
1384 #undef TARGET_ASM_INTEGER
1385 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1387 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1388 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1389 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1390 #endif
1392 #undef TARGET_SET_UP_BY_PROLOGUE
1393 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1395 #undef TARGET_EXTRA_LIVE_ON_ENTRY
1396 #define TARGET_EXTRA_LIVE_ON_ENTRY rs6000_live_on_entry
1398 #undef TARGET_INTERNAL_ARG_POINTER
1399 #define TARGET_INTERNAL_ARG_POINTER rs6000_internal_arg_pointer
1401 #undef TARGET_HAVE_TLS
1402 #define TARGET_HAVE_TLS HAVE_AS_TLS
1404 #undef TARGET_CANNOT_FORCE_CONST_MEM
1405 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1407 #undef TARGET_DELEGITIMIZE_ADDRESS
1408 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1410 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1411 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1413 #undef TARGET_ASM_FUNCTION_PROLOGUE
1414 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1415 #undef TARGET_ASM_FUNCTION_EPILOGUE
1416 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1418 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1419 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1421 #undef TARGET_LEGITIMIZE_ADDRESS
1422 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1424 #undef TARGET_SCHED_VARIABLE_ISSUE
1425 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1427 #undef TARGET_SCHED_ISSUE_RATE
1428 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1429 #undef TARGET_SCHED_ADJUST_COST
1430 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1431 #undef TARGET_SCHED_ADJUST_PRIORITY
1432 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1433 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1434 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1435 #undef TARGET_SCHED_INIT
1436 #define TARGET_SCHED_INIT rs6000_sched_init
1437 #undef TARGET_SCHED_FINISH
1438 #define TARGET_SCHED_FINISH rs6000_sched_finish
1439 #undef TARGET_SCHED_REORDER
1440 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1441 #undef TARGET_SCHED_REORDER2
1442 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1444 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1445 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1447 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1448 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1450 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1451 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1452 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1453 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1454 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1455 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1456 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1457 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1459 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1460 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1461 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1462 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1463 rs6000_builtin_support_vector_misalignment
1464 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1465 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1466 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1467 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1468 rs6000_builtin_vectorization_cost
1469 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1470 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1471 rs6000_preferred_simd_mode
1472 #undef TARGET_VECTORIZE_INIT_COST
1473 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1474 #undef TARGET_VECTORIZE_ADD_STMT_COST
1475 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1476 #undef TARGET_VECTORIZE_FINISH_COST
1477 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1478 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1479 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1481 #undef TARGET_INIT_BUILTINS
1482 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1483 #undef TARGET_BUILTIN_DECL
1484 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1486 #undef TARGET_EXPAND_BUILTIN
1487 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1489 #undef TARGET_MANGLE_TYPE
1490 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1492 #undef TARGET_INIT_LIBFUNCS
1493 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1495 #if TARGET_MACHO
1496 #undef TARGET_BINDS_LOCAL_P
1497 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1498 #endif
1500 #undef TARGET_MS_BITFIELD_LAYOUT_P
1501 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1503 #undef TARGET_ASM_OUTPUT_MI_THUNK
1504 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1509 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1510 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1512 #undef TARGET_REGISTER_MOVE_COST
1513 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1514 #undef TARGET_MEMORY_MOVE_COST
1515 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1516 #undef TARGET_RTX_COSTS
1517 #define TARGET_RTX_COSTS rs6000_rtx_costs
1518 #undef TARGET_ADDRESS_COST
1519 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1521 #undef TARGET_DWARF_REGISTER_SPAN
1522 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1524 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1525 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1527 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1528 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1530 #undef TARGET_PROMOTE_FUNCTION_MODE
1531 #define TARGET_PROMOTE_FUNCTION_MODE rs6000_promote_function_mode
1533 #undef TARGET_RETURN_IN_MEMORY
1534 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1536 #undef TARGET_RETURN_IN_MSB
1537 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1539 #undef TARGET_SETUP_INCOMING_VARARGS
1540 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1542 /* Always strict argument naming on rs6000. */
1543 #undef TARGET_STRICT_ARGUMENT_NAMING
1544 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1545 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1546 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1547 #undef TARGET_SPLIT_COMPLEX_ARG
1548 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1549 #undef TARGET_MUST_PASS_IN_STACK
1550 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1551 #undef TARGET_PASS_BY_REFERENCE
1552 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1553 #undef TARGET_ARG_PARTIAL_BYTES
1554 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1555 #undef TARGET_FUNCTION_ARG_ADVANCE
1556 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1557 #undef TARGET_FUNCTION_ARG
1558 #define TARGET_FUNCTION_ARG rs6000_function_arg
1559 #undef TARGET_FUNCTION_ARG_BOUNDARY
1560 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1562 #undef TARGET_BUILD_BUILTIN_VA_LIST
1563 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1565 #undef TARGET_EXPAND_BUILTIN_VA_START
1566 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1568 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1569 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1571 #undef TARGET_EH_RETURN_FILTER_MODE
1572 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1574 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1575 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1577 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1578 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1580 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1581 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1583 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1584 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1586 #undef TARGET_MD_ASM_ADJUST
1587 #define TARGET_MD_ASM_ADJUST rs6000_md_asm_adjust
1589 #undef TARGET_OPTION_OVERRIDE
1590 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1592 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1593 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1594 rs6000_builtin_vectorized_function
1596 #if !TARGET_MACHO
1597 #undef TARGET_STACK_PROTECT_FAIL
1598 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1599 #endif
1601 #ifdef HAVE_AS_TLS
1602 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1603 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1604 #endif
1606 /* Use a 32-bit anchor range. This leads to sequences like:
1608 addis tmp,anchor,high
1609 add dest,tmp,low
1611 where tmp itself acts as an anchor, and can be shared between
1612 accesses to the same 64k page. */
1613 #undef TARGET_MIN_ANCHOR_OFFSET
1614 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1615 #undef TARGET_MAX_ANCHOR_OFFSET
1616 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1617 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1618 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1619 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1620 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1622 #undef TARGET_BUILTIN_RECIPROCAL
1623 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1625 #undef TARGET_EXPAND_TO_RTL_HOOK
1626 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1628 #undef TARGET_INSTANTIATE_DECLS
1629 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1631 #undef TARGET_SECONDARY_RELOAD
1632 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1634 #undef TARGET_LEGITIMATE_ADDRESS_P
1635 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1637 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1638 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1640 #undef TARGET_LRA_P
1641 #define TARGET_LRA_P rs6000_lra_p
1643 #undef TARGET_CAN_ELIMINATE
1644 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1646 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1647 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1649 #undef TARGET_TRAMPOLINE_INIT
1650 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1652 #undef TARGET_FUNCTION_VALUE
1653 #define TARGET_FUNCTION_VALUE rs6000_function_value
1655 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1656 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1658 #undef TARGET_OPTION_SAVE
1659 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1661 #undef TARGET_OPTION_RESTORE
1662 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1664 #undef TARGET_OPTION_PRINT
1665 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1667 #undef TARGET_CAN_INLINE_P
1668 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1670 #undef TARGET_SET_CURRENT_FUNCTION
1671 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1673 #undef TARGET_LEGITIMATE_CONSTANT_P
1674 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1676 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1677 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1679 #undef TARGET_CAN_USE_DOLOOP_P
1680 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1682 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1683 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1685 #undef TARGET_LIBGCC_CMP_RETURN_MODE
1686 #define TARGET_LIBGCC_CMP_RETURN_MODE rs6000_abi_word_mode
1687 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
1688 #define TARGET_LIBGCC_SHIFT_COUNT_MODE rs6000_abi_word_mode
1689 #undef TARGET_UNWIND_WORD_MODE
1690 #define TARGET_UNWIND_WORD_MODE rs6000_abi_word_mode
1693 /* Processor table. */
1694 struct rs6000_ptt
1696 const char *const name; /* Canonical processor name. */
1697 const enum processor_type processor; /* Processor type enum value. */
1698 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1701 static struct rs6000_ptt const processor_target_table[] =
1703 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1704 #include "rs6000-cpus.def"
1705 #undef RS6000_CPU
1708 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1709 name is invalid. */
1711 static int
1712 rs6000_cpu_name_lookup (const char *name)
1714 size_t i;
1716 if (name != NULL)
1718 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1719 if (! strcmp (name, processor_target_table[i].name))
1720 return (int)i;
1723 return -1;
1727 /* Return number of consecutive hard regs needed starting at reg REGNO
1728 to hold something of mode MODE.
1729 This is ordinarily the length in words of a value of mode MODE
1730 but can be less for certain modes in special long registers.
1732 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1733 scalar instructions. The upper 32 bits are only available to the
1734 SIMD instructions.
1736 POWER and PowerPC GPRs hold 32 bits worth;
1737 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1739 static int
1740 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1742 unsigned HOST_WIDE_INT reg_size;
1744 /* 128-bit floating point usually takes 2 registers, unless it is IEEE
1745 128-bit floating point that can go in vector registers, which has VSX
1746 memory addressing. */
1747 if (FP_REGNO_P (regno))
1748 reg_size = (VECTOR_MEM_VSX_P (mode)
1749 ? UNITS_PER_VSX_WORD
1750 : UNITS_PER_FP_WORD);
1752 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1753 reg_size = UNITS_PER_SPE_WORD;
1755 else if (ALTIVEC_REGNO_P (regno))
1756 reg_size = UNITS_PER_ALTIVEC_WORD;
1758 /* The value returned for SCmode in the E500 double case is 2 for
1759 ABI compatibility; storing an SCmode value in a single register
1760 would require function_arg and rs6000_spe_function_arg to handle
1761 SCmode so as to pass the value correctly in a pair of
1762 registers. */
1763 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1764 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1765 reg_size = UNITS_PER_FP_WORD;
1767 else
1768 reg_size = UNITS_PER_WORD;
1770 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1773 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1774 MODE. */
1775 static int
1776 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1778 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1780 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1781 register combinations, and use PTImode where we need to deal with quad
1782 word memory operations. Don't allow quad words in the argument or frame
1783 pointer registers, just registers 0..31. */
1784 if (mode == PTImode)
1785 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1786 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1787 && ((regno & 1) == 0));
1789 /* If we don't allow 128-bit binary floating point, disallow the 128-bit
1790 types from going in any registers. Similarly if __float128 is not
1791 supported, don't allow __float128/__ibm128 types. */
1792 if (!TARGET_LONG_DOUBLE_128
1793 && (mode == TFmode || mode == KFmode || mode == IFmode))
1794 return false;
1796 if (!TARGET_FLOAT128 && (mode == KFmode || mode == IFmode))
1797 return false;
1799 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1800 implementations. Don't allow an item to be split between a FP register
1801 and an Altivec register. Allow TImode in all VSX registers if the user
1802 asked for it. */
1803 if (TARGET_VSX && VSX_REGNO_P (regno)
1804 && (VECTOR_MEM_VSX_P (mode)
1805 || FLOAT128_VECTOR_P (mode)
1806 || reg_addr[mode].scalar_in_vmx_p
1807 || (TARGET_VSX_TIMODE && mode == TImode)
1808 || (TARGET_VADDUQM && mode == V1TImode)))
1810 if (FP_REGNO_P (regno))
1811 return FP_REGNO_P (last_regno);
1813 if (ALTIVEC_REGNO_P (regno))
1815 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1816 return 0;
1818 return ALTIVEC_REGNO_P (last_regno);
1822 /* The GPRs can hold any mode, but values bigger than one register
1823 cannot go past R31. */
1824 if (INT_REGNO_P (regno))
1825 return INT_REGNO_P (last_regno);
1827 /* The float registers (except for VSX vector modes) can only hold floating
1828 modes and DImode. */
1829 if (FP_REGNO_P (regno))
1831 if (FLOAT128_VECTOR_P (mode))
1832 return false;
1834 if (SCALAR_FLOAT_MODE_P (mode)
1835 && (mode != TDmode || (regno % 2) == 0)
1836 && FP_REGNO_P (last_regno))
1837 return 1;
1839 if (GET_MODE_CLASS (mode) == MODE_INT
1840 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1841 return 1;
1843 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1844 && PAIRED_VECTOR_MODE (mode))
1845 return 1;
1847 return 0;
1850 /* The CR register can only hold CC modes. */
1851 if (CR_REGNO_P (regno))
1852 return GET_MODE_CLASS (mode) == MODE_CC;
1854 if (CA_REGNO_P (regno))
1855 return mode == Pmode || mode == SImode;
1857 /* AltiVec only in AldyVec registers. */
1858 if (ALTIVEC_REGNO_P (regno))
1859 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1860 || mode == V1TImode);
1862 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1863 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1864 return 1;
1866 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1867 and it must be able to fit within the register set. */
1869 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1872 /* Print interesting facts about registers. */
1873 static void
1874 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1876 int r, m;
1878 for (r = first_regno; r <= last_regno; ++r)
1880 const char *comma = "";
1881 int len;
1883 if (first_regno == last_regno)
1884 fprintf (stderr, "%s:\t", reg_name);
1885 else
1886 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1888 len = 8;
1889 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1890 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1892 if (len > 70)
1894 fprintf (stderr, ",\n\t");
1895 len = 8;
1896 comma = "";
1899 if (rs6000_hard_regno_nregs[m][r] > 1)
1900 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1901 rs6000_hard_regno_nregs[m][r]);
1902 else
1903 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1905 comma = ", ";
1908 if (call_used_regs[r])
1910 if (len > 70)
1912 fprintf (stderr, ",\n\t");
1913 len = 8;
1914 comma = "";
1917 len += fprintf (stderr, "%s%s", comma, "call-used");
1918 comma = ", ";
1921 if (fixed_regs[r])
1923 if (len > 70)
1925 fprintf (stderr, ",\n\t");
1926 len = 8;
1927 comma = "";
1930 len += fprintf (stderr, "%s%s", comma, "fixed");
1931 comma = ", ";
1934 if (len > 70)
1936 fprintf (stderr, ",\n\t");
1937 comma = "";
1940 len += fprintf (stderr, "%sreg-class = %s", comma,
1941 reg_class_names[(int)rs6000_regno_regclass[r]]);
1942 comma = ", ";
1944 if (len > 70)
1946 fprintf (stderr, ",\n\t");
1947 comma = "";
1950 fprintf (stderr, "%sregno = %d\n", comma, r);
1954 static const char *
1955 rs6000_debug_vector_unit (enum rs6000_vector v)
1957 const char *ret;
1959 switch (v)
1961 case VECTOR_NONE: ret = "none"; break;
1962 case VECTOR_ALTIVEC: ret = "altivec"; break;
1963 case VECTOR_VSX: ret = "vsx"; break;
1964 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1965 case VECTOR_PAIRED: ret = "paired"; break;
1966 case VECTOR_SPE: ret = "spe"; break;
1967 case VECTOR_OTHER: ret = "other"; break;
1968 default: ret = "unknown"; break;
1971 return ret;
1974 /* Inner function printing just the address mask for a particular reload
1975 register class. */
1976 DEBUG_FUNCTION char *
1977 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1979 static char ret[8];
1980 char *p = ret;
1982 if ((mask & RELOAD_REG_VALID) != 0)
1983 *p++ = 'v';
1984 else if (keep_spaces)
1985 *p++ = ' ';
1987 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1988 *p++ = 'm';
1989 else if (keep_spaces)
1990 *p++ = ' ';
1992 if ((mask & RELOAD_REG_INDEXED) != 0)
1993 *p++ = 'i';
1994 else if (keep_spaces)
1995 *p++ = ' ';
1997 if ((mask & RELOAD_REG_OFFSET) != 0)
1998 *p++ = 'o';
1999 else if (keep_spaces)
2000 *p++ = ' ';
2002 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
2003 *p++ = '+';
2004 else if (keep_spaces)
2005 *p++ = ' ';
2007 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
2008 *p++ = '+';
2009 else if (keep_spaces)
2010 *p++ = ' ';
2012 if ((mask & RELOAD_REG_AND_M16) != 0)
2013 *p++ = '&';
2014 else if (keep_spaces)
2015 *p++ = ' ';
2017 *p = '\0';
2019 return ret;
2022 /* Print the address masks in a human readble fashion. */
2023 DEBUG_FUNCTION void
2024 rs6000_debug_print_mode (ssize_t m)
2026 ssize_t rc;
2028 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
2029 for (rc = 0; rc < N_RELOAD_REG; rc++)
2030 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
2031 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
2033 if (rs6000_vector_unit[m] != VECTOR_NONE
2034 || rs6000_vector_mem[m] != VECTOR_NONE
2035 || (reg_addr[m].reload_store != CODE_FOR_nothing)
2036 || (reg_addr[m].reload_load != CODE_FOR_nothing)
2037 || reg_addr[m].scalar_in_vmx_p)
2039 fprintf (stderr,
2040 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
2041 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2042 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2043 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2044 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2045 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2048 fputs ("\n", stderr);
2051 #define DEBUG_FMT_ID "%-32s= "
2052 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2053 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2054 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2056 /* Print various interesting information with -mdebug=reg. */
2057 static void
2058 rs6000_debug_reg_global (void)
2060 static const char *const tf[2] = { "false", "true" };
2061 const char *nl = (const char *)0;
2062 int m;
2063 size_t m1, m2, v;
2064 char costly_num[20];
2065 char nop_num[20];
2066 char flags_buffer[40];
2067 const char *costly_str;
2068 const char *nop_str;
2069 const char *trace_str;
2070 const char *abi_str;
2071 const char *cmodel_str;
2072 const char *float128_str;
2073 struct cl_target_option cl_opts;
2075 /* Modes we want tieable information on. */
2076 static const machine_mode print_tieable_modes[] = {
2077 QImode,
2078 HImode,
2079 SImode,
2080 DImode,
2081 TImode,
2082 PTImode,
2083 SFmode,
2084 DFmode,
2085 TFmode,
2086 IFmode,
2087 KFmode,
2088 SDmode,
2089 DDmode,
2090 TDmode,
2091 V8QImode,
2092 V4HImode,
2093 V2SImode,
2094 V16QImode,
2095 V8HImode,
2096 V4SImode,
2097 V2DImode,
2098 V1TImode,
2099 V32QImode,
2100 V16HImode,
2101 V8SImode,
2102 V4DImode,
2103 V2TImode,
2104 V2SFmode,
2105 V4SFmode,
2106 V2DFmode,
2107 V8SFmode,
2108 V4DFmode,
2109 CCmode,
2110 CCUNSmode,
2111 CCEQmode,
2114 /* Virtual regs we are interested in. */
2115 const static struct {
2116 int regno; /* register number. */
2117 const char *name; /* register name. */
2118 } virtual_regs[] = {
2119 { STACK_POINTER_REGNUM, "stack pointer:" },
2120 { TOC_REGNUM, "toc: " },
2121 { STATIC_CHAIN_REGNUM, "static chain: " },
2122 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2123 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2124 { ARG_POINTER_REGNUM, "arg pointer: " },
2125 { FRAME_POINTER_REGNUM, "frame pointer:" },
2126 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2127 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2128 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2129 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2130 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2131 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2132 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2133 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2134 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2137 fputs ("\nHard register information:\n", stderr);
2138 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2139 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2140 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2141 LAST_ALTIVEC_REGNO,
2142 "vs");
2143 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2144 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2145 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2146 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2147 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2148 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2149 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2150 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2152 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2153 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2154 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2156 fprintf (stderr,
2157 "\n"
2158 "d reg_class = %s\n"
2159 "f reg_class = %s\n"
2160 "v reg_class = %s\n"
2161 "wa reg_class = %s\n"
2162 "wd reg_class = %s\n"
2163 "wf reg_class = %s\n"
2164 "wg reg_class = %s\n"
2165 "wh reg_class = %s\n"
2166 "wi reg_class = %s\n"
2167 "wj reg_class = %s\n"
2168 "wk reg_class = %s\n"
2169 "wl reg_class = %s\n"
2170 "wm reg_class = %s\n"
2171 "wp reg_class = %s\n"
2172 "wq reg_class = %s\n"
2173 "wr reg_class = %s\n"
2174 "ws reg_class = %s\n"
2175 "wt reg_class = %s\n"
2176 "wu reg_class = %s\n"
2177 "wv reg_class = %s\n"
2178 "ww reg_class = %s\n"
2179 "wx reg_class = %s\n"
2180 "wy reg_class = %s\n"
2181 "wz reg_class = %s\n"
2182 "\n",
2183 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2184 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2185 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2186 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2187 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2188 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2189 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2190 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2191 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2192 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2193 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2194 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2195 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2196 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wp]],
2197 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wq]],
2198 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2199 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2200 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2201 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2202 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2203 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2204 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2205 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2206 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2208 nl = "\n";
2209 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2210 rs6000_debug_print_mode (m);
2212 fputs ("\n", stderr);
2214 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2216 machine_mode mode1 = print_tieable_modes[m1];
2217 bool first_time = true;
2219 nl = (const char *)0;
2220 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2222 machine_mode mode2 = print_tieable_modes[m2];
2223 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2225 if (first_time)
2227 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2228 nl = "\n";
2229 first_time = false;
2232 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2236 if (!first_time)
2237 fputs ("\n", stderr);
2240 if (nl)
2241 fputs (nl, stderr);
2243 if (rs6000_recip_control)
2245 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2247 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2248 if (rs6000_recip_bits[m])
2250 fprintf (stderr,
2251 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2252 GET_MODE_NAME (m),
2253 (RS6000_RECIP_AUTO_RE_P (m)
2254 ? "auto"
2255 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2256 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2257 ? "auto"
2258 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2261 fputs ("\n", stderr);
2264 if (rs6000_cpu_index >= 0)
2266 const char *name = processor_target_table[rs6000_cpu_index].name;
2267 HOST_WIDE_INT flags
2268 = processor_target_table[rs6000_cpu_index].target_enable;
2270 sprintf (flags_buffer, "-mcpu=%s flags", name);
2271 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2273 else
2274 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2276 if (rs6000_tune_index >= 0)
2278 const char *name = processor_target_table[rs6000_tune_index].name;
2279 HOST_WIDE_INT flags
2280 = processor_target_table[rs6000_tune_index].target_enable;
2282 sprintf (flags_buffer, "-mtune=%s flags", name);
2283 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2285 else
2286 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2288 cl_target_option_save (&cl_opts, &global_options);
2289 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2290 rs6000_isa_flags);
2292 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2293 rs6000_isa_flags_explicit);
2295 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2296 rs6000_builtin_mask);
2298 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2300 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2301 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2303 switch (rs6000_sched_costly_dep)
2305 case max_dep_latency:
2306 costly_str = "max_dep_latency";
2307 break;
2309 case no_dep_costly:
2310 costly_str = "no_dep_costly";
2311 break;
2313 case all_deps_costly:
2314 costly_str = "all_deps_costly";
2315 break;
2317 case true_store_to_load_dep_costly:
2318 costly_str = "true_store_to_load_dep_costly";
2319 break;
2321 case store_to_load_dep_costly:
2322 costly_str = "store_to_load_dep_costly";
2323 break;
2325 default:
2326 costly_str = costly_num;
2327 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2328 break;
2331 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2333 switch (rs6000_sched_insert_nops)
2335 case sched_finish_regroup_exact:
2336 nop_str = "sched_finish_regroup_exact";
2337 break;
2339 case sched_finish_pad_groups:
2340 nop_str = "sched_finish_pad_groups";
2341 break;
2343 case sched_finish_none:
2344 nop_str = "sched_finish_none";
2345 break;
2347 default:
2348 nop_str = nop_num;
2349 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2350 break;
2353 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2355 switch (rs6000_sdata)
2357 default:
2358 case SDATA_NONE:
2359 break;
2361 case SDATA_DATA:
2362 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2363 break;
2365 case SDATA_SYSV:
2366 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2367 break;
2369 case SDATA_EABI:
2370 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2371 break;
2375 switch (rs6000_traceback)
2377 case traceback_default: trace_str = "default"; break;
2378 case traceback_none: trace_str = "none"; break;
2379 case traceback_part: trace_str = "part"; break;
2380 case traceback_full: trace_str = "full"; break;
2381 default: trace_str = "unknown"; break;
2384 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2386 switch (rs6000_current_cmodel)
2388 case CMODEL_SMALL: cmodel_str = "small"; break;
2389 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2390 case CMODEL_LARGE: cmodel_str = "large"; break;
2391 default: cmodel_str = "unknown"; break;
2394 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2396 switch (rs6000_current_abi)
2398 case ABI_NONE: abi_str = "none"; break;
2399 case ABI_AIX: abi_str = "aix"; break;
2400 case ABI_ELFv2: abi_str = "ELFv2"; break;
2401 case ABI_V4: abi_str = "V4"; break;
2402 case ABI_DARWIN: abi_str = "darwin"; break;
2403 default: abi_str = "unknown"; break;
2406 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2408 if (rs6000_altivec_abi)
2409 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2411 if (rs6000_spe_abi)
2412 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2414 if (rs6000_darwin64_abi)
2415 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2417 if (rs6000_float_gprs)
2418 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2420 fprintf (stderr, DEBUG_FMT_S, "fprs",
2421 (TARGET_FPRS ? "true" : "false"));
2423 fprintf (stderr, DEBUG_FMT_S, "single_float",
2424 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2426 fprintf (stderr, DEBUG_FMT_S, "double_float",
2427 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2429 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2430 (TARGET_SOFT_FLOAT ? "true" : "false"));
2432 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2433 (TARGET_E500_SINGLE ? "true" : "false"));
2435 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2436 (TARGET_E500_DOUBLE ? "true" : "false"));
2438 switch (TARGET_FLOAT128)
2440 case FLOAT128_NONE: float128_str = "none"; break;
2441 case FLOAT128_SW: float128_str = "software"; break;
2442 default: float128_str = "unknown"; break;
2445 fprintf (stderr, DEBUG_FMT_S, "float128", float128_str);
2447 if (TARGET_LINK_STACK)
2448 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2450 if (targetm.lra_p ())
2451 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2453 if (TARGET_P8_FUSION)
2454 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2455 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2457 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2458 TARGET_SECURE_PLT ? "secure" : "bss");
2459 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2460 aix_struct_return ? "aix" : "sysv");
2461 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2462 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2463 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2464 tf[!!rs6000_align_branch_targets]);
2465 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2466 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2467 rs6000_long_double_type_size);
2468 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2469 (int)rs6000_sched_restricted_insns_priority);
2470 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2471 (int)END_BUILTINS);
2472 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2473 (int)RS6000_BUILTIN_COUNT);
2475 if (TARGET_VSX)
2476 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2477 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2481 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2482 legitimate address support to figure out the appropriate addressing to
2483 use. */
2485 static void
2486 rs6000_setup_reg_addr_masks (void)
2488 ssize_t rc, reg, m, nregs;
2489 addr_mask_type any_addr_mask, addr_mask;
2491 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2493 machine_mode m2 = (machine_mode)m;
2495 /* SDmode is special in that we want to access it only via REG+REG
2496 addressing on power7 and above, since we want to use the LFIWZX and
2497 STFIWZX instructions to load it. */
2498 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2500 any_addr_mask = 0;
2501 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2503 addr_mask = 0;
2504 reg = reload_reg_map[rc].reg;
2506 /* Can mode values go in the GPR/FPR/Altivec registers? */
2507 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2509 nregs = rs6000_hard_regno_nregs[m][reg];
2510 addr_mask |= RELOAD_REG_VALID;
2512 /* Indicate if the mode takes more than 1 physical register. If
2513 it takes a single register, indicate it can do REG+REG
2514 addressing. */
2515 if (nregs > 1 || m == BLKmode)
2516 addr_mask |= RELOAD_REG_MULTIPLE;
2517 else
2518 addr_mask |= RELOAD_REG_INDEXED;
2520 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2521 addressing. Restrict addressing on SPE for 64-bit types
2522 because of the SUBREG hackery used to address 64-bit floats in
2523 '32-bit' GPRs. */
2525 if (TARGET_UPDATE
2526 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2527 && GET_MODE_SIZE (m2) <= 8
2528 && !VECTOR_MODE_P (m2)
2529 && !COMPLEX_MODE_P (m2)
2530 && !indexed_only_p
2531 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2533 addr_mask |= RELOAD_REG_PRE_INCDEC;
2535 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2536 we don't allow PRE_MODIFY for some multi-register
2537 operations. */
2538 switch (m)
2540 default:
2541 addr_mask |= RELOAD_REG_PRE_MODIFY;
2542 break;
2544 case DImode:
2545 if (TARGET_POWERPC64)
2546 addr_mask |= RELOAD_REG_PRE_MODIFY;
2547 break;
2549 case DFmode:
2550 case DDmode:
2551 if (TARGET_DF_INSN)
2552 addr_mask |= RELOAD_REG_PRE_MODIFY;
2553 break;
2558 /* GPR and FPR registers can do REG+OFFSET addressing, except
2559 possibly for SDmode. */
2560 if ((addr_mask != 0) && !indexed_only_p
2561 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2562 addr_mask |= RELOAD_REG_OFFSET;
2564 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2565 addressing on 128-bit types. */
2566 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16
2567 && (addr_mask & RELOAD_REG_VALID) != 0)
2568 addr_mask |= RELOAD_REG_AND_M16;
2570 reg_addr[m].addr_mask[rc] = addr_mask;
2571 any_addr_mask |= addr_mask;
2574 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2579 /* Initialize the various global tables that are based on register size. */
2580 static void
2581 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2583 ssize_t r, m, c;
2584 int align64;
2585 int align32;
2587 /* Precalculate REGNO_REG_CLASS. */
2588 rs6000_regno_regclass[0] = GENERAL_REGS;
2589 for (r = 1; r < 32; ++r)
2590 rs6000_regno_regclass[r] = BASE_REGS;
2592 for (r = 32; r < 64; ++r)
2593 rs6000_regno_regclass[r] = FLOAT_REGS;
2595 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2596 rs6000_regno_regclass[r] = NO_REGS;
2598 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2599 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2601 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2602 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2603 rs6000_regno_regclass[r] = CR_REGS;
2605 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2606 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2607 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2608 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2609 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2610 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2611 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2612 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2613 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2614 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2615 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2616 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2618 /* Precalculate register class to simpler reload register class. We don't
2619 need all of the register classes that are combinations of different
2620 classes, just the simple ones that have constraint letters. */
2621 for (c = 0; c < N_REG_CLASSES; c++)
2622 reg_class_to_reg_type[c] = NO_REG_TYPE;
2624 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2625 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2626 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2627 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2628 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2629 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2630 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2631 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2632 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2633 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2634 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2635 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2637 if (TARGET_VSX)
2639 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2640 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2642 else
2644 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2645 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2648 /* Precalculate the valid memory formats as well as the vector information,
2649 this must be set up before the rs6000_hard_regno_nregs_internal calls
2650 below. */
2651 gcc_assert ((int)VECTOR_NONE == 0);
2652 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2653 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2655 gcc_assert ((int)CODE_FOR_nothing == 0);
2656 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2658 gcc_assert ((int)NO_REGS == 0);
2659 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2661 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2662 believes it can use native alignment or still uses 128-bit alignment. */
2663 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2665 align64 = 64;
2666 align32 = 32;
2668 else
2670 align64 = 128;
2671 align32 = 128;
2674 /* V2DF mode, VSX only. */
2675 if (TARGET_VSX)
2677 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2678 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2679 rs6000_vector_align[V2DFmode] = align64;
2682 /* V4SF mode, either VSX or Altivec. */
2683 if (TARGET_VSX)
2685 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2686 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2687 rs6000_vector_align[V4SFmode] = align32;
2689 else if (TARGET_ALTIVEC)
2691 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2692 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2693 rs6000_vector_align[V4SFmode] = align32;
2696 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2697 and stores. */
2698 if (TARGET_ALTIVEC)
2700 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2701 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2702 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2703 rs6000_vector_align[V4SImode] = align32;
2704 rs6000_vector_align[V8HImode] = align32;
2705 rs6000_vector_align[V16QImode] = align32;
2707 if (TARGET_VSX)
2709 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2710 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2711 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2713 else
2715 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2716 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2717 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2721 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2722 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2723 if (TARGET_VSX)
2725 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2726 rs6000_vector_unit[V2DImode]
2727 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2728 rs6000_vector_align[V2DImode] = align64;
2730 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2731 rs6000_vector_unit[V1TImode]
2732 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2733 rs6000_vector_align[V1TImode] = 128;
2736 /* DFmode, see if we want to use the VSX unit. Memory is handled
2737 differently, so don't set rs6000_vector_mem. */
2738 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2740 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2741 rs6000_vector_align[DFmode] = 64;
2744 /* SFmode, see if we want to use the VSX unit. */
2745 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2747 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2748 rs6000_vector_align[SFmode] = 32;
2751 /* Allow TImode in VSX register and set the VSX memory macros. */
2752 if (TARGET_VSX && TARGET_VSX_TIMODE)
2754 rs6000_vector_mem[TImode] = VECTOR_VSX;
2755 rs6000_vector_align[TImode] = align64;
2758 /* TODO add SPE and paired floating point vector support. */
2760 /* Register class constraints for the constraints that depend on compile
2761 switches. When the VSX code was added, different constraints were added
2762 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2763 of the VSX registers are used. The register classes for scalar floating
2764 point types is set, based on whether we allow that type into the upper
2765 (Altivec) registers. GCC has register classes to target the Altivec
2766 registers for load/store operations, to select using a VSX memory
2767 operation instead of the traditional floating point operation. The
2768 constraints are:
2770 d - Register class to use with traditional DFmode instructions.
2771 f - Register class to use with traditional SFmode instructions.
2772 v - Altivec register.
2773 wa - Any VSX register.
2774 wc - Reserved to represent individual CR bits (used in LLVM).
2775 wd - Preferred register class for V2DFmode.
2776 wf - Preferred register class for V4SFmode.
2777 wg - Float register for power6x move insns.
2778 wh - FP register for direct move instructions.
2779 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2780 wj - FP or VSX register to hold 64-bit integers for direct moves.
2781 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2782 wl - Float register if we can do 32-bit signed int loads.
2783 wm - VSX register for ISA 2.07 direct move operations.
2784 wn - always NO_REGS.
2785 wr - GPR if 64-bit mode is permitted.
2786 ws - Register class to do ISA 2.06 DF operations.
2787 wt - VSX register for TImode in VSX registers.
2788 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2789 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2790 ww - Register class to do SF conversions in with VSX operations.
2791 wx - Float register if we can do 32-bit int stores.
2792 wy - Register class to do ISA 2.07 SF operations.
2793 wz - Float register if we can do 32-bit unsigned int loads. */
2795 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2796 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2798 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2799 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2801 if (TARGET_VSX)
2803 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2804 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2805 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2806 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2808 if (TARGET_VSX_TIMODE)
2809 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2811 if (TARGET_UPPER_REGS_DF) /* DFmode */
2813 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2814 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2816 else
2817 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2820 /* Add conditional constraints based on various options, to allow us to
2821 collapse multiple insn patterns. */
2822 if (TARGET_ALTIVEC)
2823 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2825 if (TARGET_MFPGPR) /* DFmode */
2826 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2828 if (TARGET_LFIWAX)
2829 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2831 if (TARGET_DIRECT_MOVE)
2833 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2834 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2835 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2836 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2837 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2838 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2841 if (TARGET_POWERPC64)
2842 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2844 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2846 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2847 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2848 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2850 else if (TARGET_P8_VECTOR)
2852 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2853 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2855 else if (TARGET_VSX)
2856 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2858 if (TARGET_STFIWX)
2859 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2861 if (TARGET_LFIWZX)
2862 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2864 if (TARGET_FLOAT128)
2866 rs6000_constraints[RS6000_CONSTRAINT_wq] = VSX_REGS; /* KFmode */
2867 if (rs6000_ieeequad)
2868 rs6000_constraints[RS6000_CONSTRAINT_wp] = VSX_REGS; /* TFmode */
2871 /* Set up the reload helper and direct move functions. */
2872 if (TARGET_VSX || TARGET_ALTIVEC)
2874 if (TARGET_64BIT)
2876 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2877 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2878 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2879 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2880 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2881 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2882 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2883 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2884 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2885 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2886 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2887 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2888 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2889 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2890 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2891 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2892 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2893 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2894 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2895 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2897 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2898 available. */
2899 if (TARGET_NO_SDMODE_STACK)
2901 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2902 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2905 if (TARGET_VSX_TIMODE)
2907 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2908 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2911 if (TARGET_DIRECT_MOVE)
2913 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2914 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2915 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2916 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2917 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2918 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2919 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2920 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2921 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2923 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2924 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2925 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2926 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2927 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2928 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2929 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2930 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2931 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2934 else
2936 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2937 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2938 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2939 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2940 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2941 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2942 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2943 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2944 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2945 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2946 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2947 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2948 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2949 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2950 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2951 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2952 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2953 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2954 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2955 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2957 /* Only provide a reload handler for SDmode if lfiwzx/stfiwx are
2958 available. */
2959 if (TARGET_NO_SDMODE_STACK)
2961 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2962 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2965 if (TARGET_VSX_TIMODE)
2967 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2968 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2971 if (TARGET_DIRECT_MOVE)
2973 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2974 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2975 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2979 if (TARGET_UPPER_REGS_DF)
2980 reg_addr[DFmode].scalar_in_vmx_p = true;
2982 if (TARGET_UPPER_REGS_SF)
2983 reg_addr[SFmode].scalar_in_vmx_p = true;
2986 /* Precalculate HARD_REGNO_NREGS. */
2987 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2988 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2989 rs6000_hard_regno_nregs[m][r]
2990 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2992 /* Precalculate HARD_REGNO_MODE_OK. */
2993 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2994 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2995 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2996 rs6000_hard_regno_mode_ok_p[m][r] = true;
2998 /* Precalculate CLASS_MAX_NREGS sizes. */
2999 for (c = 0; c < LIM_REG_CLASSES; ++c)
3001 int reg_size;
3003 if (TARGET_VSX && VSX_REG_CLASS_P (c))
3004 reg_size = UNITS_PER_VSX_WORD;
3006 else if (c == ALTIVEC_REGS)
3007 reg_size = UNITS_PER_ALTIVEC_WORD;
3009 else if (c == FLOAT_REGS)
3010 reg_size = UNITS_PER_FP_WORD;
3012 else
3013 reg_size = UNITS_PER_WORD;
3015 for (m = 0; m < NUM_MACHINE_MODES; ++m)
3017 machine_mode m2 = (machine_mode)m;
3018 int reg_size2 = reg_size;
3020 /* TDmode & IBM 128-bit floating point always takes 2 registers, even
3021 in VSX. */
3022 if (TARGET_VSX && VSX_REG_CLASS_P (c) && FLOAT128_2REG_P (m))
3023 reg_size2 = UNITS_PER_FP_WORD;
3025 rs6000_class_max_nregs[m][c]
3026 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
3030 if (TARGET_E500_DOUBLE)
3031 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
3033 /* Calculate which modes to automatically generate code to use a the
3034 reciprocal divide and square root instructions. In the future, possibly
3035 automatically generate the instructions even if the user did not specify
3036 -mrecip. The older machines double precision reciprocal sqrt estimate is
3037 not accurate enough. */
3038 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
3039 if (TARGET_FRES)
3040 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3041 if (TARGET_FRE)
3042 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3043 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3044 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
3045 if (VECTOR_UNIT_VSX_P (V2DFmode))
3046 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
3048 if (TARGET_FRSQRTES)
3049 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3050 if (TARGET_FRSQRTE)
3051 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3052 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
3053 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3054 if (VECTOR_UNIT_VSX_P (V2DFmode))
3055 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
3057 if (rs6000_recip_control)
3059 if (!flag_finite_math_only)
3060 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
3061 if (flag_trapping_math)
3062 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
3063 if (!flag_reciprocal_math)
3064 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
3065 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
3067 if (RS6000_RECIP_HAVE_RE_P (SFmode)
3068 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
3069 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3071 if (RS6000_RECIP_HAVE_RE_P (DFmode)
3072 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
3073 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3075 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
3076 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
3077 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3079 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3080 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3081 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3083 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3084 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3085 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3087 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3088 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3089 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3091 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3092 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3093 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3095 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3096 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3097 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3101 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3102 legitimate address support to figure out the appropriate addressing to
3103 use. */
3104 rs6000_setup_reg_addr_masks ();
3106 if (global_init_p || TARGET_DEBUG_TARGET)
3108 if (TARGET_DEBUG_REG)
3109 rs6000_debug_reg_global ();
3111 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3112 fprintf (stderr,
3113 "SImode variable mult cost = %d\n"
3114 "SImode constant mult cost = %d\n"
3115 "SImode short constant mult cost = %d\n"
3116 "DImode multipliciation cost = %d\n"
3117 "SImode division cost = %d\n"
3118 "DImode division cost = %d\n"
3119 "Simple fp operation cost = %d\n"
3120 "DFmode multiplication cost = %d\n"
3121 "SFmode division cost = %d\n"
3122 "DFmode division cost = %d\n"
3123 "cache line size = %d\n"
3124 "l1 cache size = %d\n"
3125 "l2 cache size = %d\n"
3126 "simultaneous prefetches = %d\n"
3127 "\n",
3128 rs6000_cost->mulsi,
3129 rs6000_cost->mulsi_const,
3130 rs6000_cost->mulsi_const9,
3131 rs6000_cost->muldi,
3132 rs6000_cost->divsi,
3133 rs6000_cost->divdi,
3134 rs6000_cost->fp,
3135 rs6000_cost->dmul,
3136 rs6000_cost->sdiv,
3137 rs6000_cost->ddiv,
3138 rs6000_cost->cache_line_size,
3139 rs6000_cost->l1_cache_size,
3140 rs6000_cost->l2_cache_size,
3141 rs6000_cost->simultaneous_prefetches);
3145 #if TARGET_MACHO
3146 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3148 static void
3149 darwin_rs6000_override_options (void)
3151 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3152 off. */
3153 rs6000_altivec_abi = 1;
3154 TARGET_ALTIVEC_VRSAVE = 1;
3155 rs6000_current_abi = ABI_DARWIN;
3157 if (DEFAULT_ABI == ABI_DARWIN
3158 && TARGET_64BIT)
3159 darwin_one_byte_bool = 1;
3161 if (TARGET_64BIT && ! TARGET_POWERPC64)
3163 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3164 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3166 if (flag_mkernel)
3168 rs6000_default_long_calls = 1;
3169 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3172 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3173 Altivec. */
3174 if (!flag_mkernel && !flag_apple_kext
3175 && TARGET_64BIT
3176 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3177 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3179 /* Unless the user (not the configurer) has explicitly overridden
3180 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3181 G4 unless targeting the kernel. */
3182 if (!flag_mkernel
3183 && !flag_apple_kext
3184 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3185 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3186 && ! global_options_set.x_rs6000_cpu_index)
3188 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3191 #endif
3193 /* If not otherwise specified by a target, make 'long double' equivalent to
3194 'double'. */
3196 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3197 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3198 #endif
3200 /* Return the builtin mask of the various options used that could affect which
3201 builtins were used. In the past we used target_flags, but we've run out of
3202 bits, and some options like SPE and PAIRED are no longer in
3203 target_flags. */
3205 HOST_WIDE_INT
3206 rs6000_builtin_mask_calculate (void)
3208 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3209 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3210 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3211 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3212 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3213 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3214 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3215 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3216 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3217 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3218 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3219 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3220 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3221 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3222 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3223 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3226 /* Implement TARGET_MD_ASM_ADJUST. All asm statements are considered
3227 to clobber the XER[CA] bit because clobbering that bit without telling
3228 the compiler worked just fine with versions of GCC before GCC 5, and
3229 breaking a lot of older code in ways that are hard to track down is
3230 not such a great idea. */
3232 static rtx_insn *
3233 rs6000_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
3234 vec<const char *> &/*constraints*/,
3235 vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs)
3237 clobbers.safe_push (gen_rtx_REG (SImode, CA_REGNO));
3238 SET_HARD_REG_BIT (clobbered_regs, CA_REGNO);
3239 return NULL;
3242 /* Override command line options. Mostly we process the processor type and
3243 sometimes adjust other TARGET_ options. */
3245 static bool
3246 rs6000_option_override_internal (bool global_init_p)
3248 bool ret = true;
3249 bool have_cpu = false;
3251 /* The default cpu requested at configure time, if any. */
3252 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3254 HOST_WIDE_INT set_masks;
3255 int cpu_index;
3256 int tune_index;
3257 struct cl_target_option *main_target_opt
3258 = ((global_init_p || target_option_default_node == NULL)
3259 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3261 /* Print defaults. */
3262 if ((TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) && global_init_p)
3263 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
3265 /* Remember the explicit arguments. */
3266 if (global_init_p)
3267 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3269 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3270 library functions, so warn about it. The flag may be useful for
3271 performance studies from time to time though, so don't disable it
3272 entirely. */
3273 if (global_options_set.x_rs6000_alignment_flags
3274 && rs6000_alignment_flags == MASK_ALIGN_POWER
3275 && DEFAULT_ABI == ABI_DARWIN
3276 && TARGET_64BIT)
3277 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3278 " it is incompatible with the installed C and C++ libraries");
3280 /* Numerous experiment shows that IRA based loop pressure
3281 calculation works better for RTL loop invariant motion on targets
3282 with enough (>= 32) registers. It is an expensive optimization.
3283 So it is on only for peak performance. */
3284 if (optimize >= 3 && global_init_p
3285 && !global_options_set.x_flag_ira_loop_pressure)
3286 flag_ira_loop_pressure = 1;
3288 /* Set the pointer size. */
3289 if (TARGET_64BIT)
3291 rs6000_pmode = (int)DImode;
3292 rs6000_pointer_size = 64;
3294 else
3296 rs6000_pmode = (int)SImode;
3297 rs6000_pointer_size = 32;
3300 /* Some OSs don't support saving the high part of 64-bit registers on context
3301 switch. Other OSs don't support saving Altivec registers. On those OSs,
3302 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3303 if the user wants either, the user must explicitly specify them and we
3304 won't interfere with the user's specification. */
3306 set_masks = POWERPC_MASKS;
3307 #ifdef OS_MISSING_POWERPC64
3308 if (OS_MISSING_POWERPC64)
3309 set_masks &= ~OPTION_MASK_POWERPC64;
3310 #endif
3311 #ifdef OS_MISSING_ALTIVEC
3312 if (OS_MISSING_ALTIVEC)
3313 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3314 #endif
3316 /* Don't override by the processor default if given explicitly. */
3317 set_masks &= ~rs6000_isa_flags_explicit;
3319 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3320 the cpu in a target attribute or pragma, but did not specify a tuning
3321 option, use the cpu for the tuning option rather than the option specified
3322 with -mtune on the command line. Process a '--with-cpu' configuration
3323 request as an implicit --cpu. */
3324 if (rs6000_cpu_index >= 0)
3326 cpu_index = rs6000_cpu_index;
3327 have_cpu = true;
3329 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3331 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3332 have_cpu = true;
3334 else if (implicit_cpu)
3336 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3337 have_cpu = true;
3339 else
3341 /* PowerPC 64-bit LE requires at least ISA 2.07. */
3342 const char *default_cpu = ((!TARGET_POWERPC64)
3343 ? "powerpc"
3344 : ((BYTES_BIG_ENDIAN)
3345 ? "powerpc64"
3346 : "powerpc64le"));
3348 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3349 have_cpu = false;
3352 gcc_assert (cpu_index >= 0);
3354 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3355 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3356 with those from the cpu, except for options that were explicitly set. If
3357 we don't have a cpu, do not override the target bits set in
3358 TARGET_DEFAULT. */
3359 if (have_cpu)
3361 rs6000_isa_flags &= ~set_masks;
3362 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3363 & set_masks);
3365 else
3367 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3368 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3369 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3370 to using rs6000_isa_flags, we need to do the initialization here.
3372 If there is a TARGET_DEFAULT, use that. Otherwise fall back to using
3373 -mcpu=powerpc, -mcpu=powerpc64, or -mcpu=powerpc64le defaults. */
3374 HOST_WIDE_INT flags = ((TARGET_DEFAULT) ? TARGET_DEFAULT
3375 : processor_target_table[cpu_index].target_enable);
3376 rs6000_isa_flags |= (flags & ~rs6000_isa_flags_explicit);
3379 if (rs6000_tune_index >= 0)
3380 tune_index = rs6000_tune_index;
3381 else if (have_cpu)
3382 rs6000_tune_index = tune_index = cpu_index;
3383 else
3385 size_t i;
3386 enum processor_type tune_proc
3387 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3389 tune_index = -1;
3390 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3391 if (processor_target_table[i].processor == tune_proc)
3393 rs6000_tune_index = tune_index = i;
3394 break;
3398 gcc_assert (tune_index >= 0);
3399 rs6000_cpu = processor_target_table[tune_index].processor;
3401 /* Pick defaults for SPE related control flags. Do this early to make sure
3402 that the TARGET_ macros are representative ASAP. */
3404 int spe_capable_cpu =
3405 (rs6000_cpu == PROCESSOR_PPC8540
3406 || rs6000_cpu == PROCESSOR_PPC8548);
3408 if (!global_options_set.x_rs6000_spe_abi)
3409 rs6000_spe_abi = spe_capable_cpu;
3411 if (!global_options_set.x_rs6000_spe)
3412 rs6000_spe = spe_capable_cpu;
3414 if (!global_options_set.x_rs6000_float_gprs)
3415 rs6000_float_gprs =
3416 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3417 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3418 : 0);
3421 if (global_options_set.x_rs6000_spe_abi
3422 && rs6000_spe_abi
3423 && !TARGET_SPE_ABI)
3424 error ("not configured for SPE ABI");
3426 if (global_options_set.x_rs6000_spe
3427 && rs6000_spe
3428 && !TARGET_SPE)
3429 error ("not configured for SPE instruction set");
3431 if (main_target_opt != NULL
3432 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3433 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3434 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3435 error ("target attribute or pragma changes SPE ABI");
3437 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3438 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3439 || rs6000_cpu == PROCESSOR_PPCE5500)
3441 if (TARGET_ALTIVEC)
3442 error ("AltiVec not supported in this target");
3443 if (TARGET_SPE)
3444 error ("SPE not supported in this target");
3446 if (rs6000_cpu == PROCESSOR_PPCE6500)
3448 if (TARGET_SPE)
3449 error ("SPE not supported in this target");
3452 /* Disable Cell microcode if we are optimizing for the Cell
3453 and not optimizing for size. */
3454 if (rs6000_gen_cell_microcode == -1)
3455 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3456 && !optimize_size);
3458 /* If we are optimizing big endian systems for space and it's OK to
3459 use instructions that would be microcoded on the Cell, use the
3460 load/store multiple and string instructions. */
3461 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3462 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3463 | OPTION_MASK_STRING);
3465 /* Don't allow -mmultiple or -mstring on little endian systems
3466 unless the cpu is a 750, because the hardware doesn't support the
3467 instructions used in little endian mode, and causes an alignment
3468 trap. The 750 does not cause an alignment trap (except when the
3469 target is unaligned). */
3471 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3473 if (TARGET_MULTIPLE)
3475 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3476 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3477 warning (0, "-mmultiple is not supported on little endian systems");
3480 if (TARGET_STRING)
3482 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3483 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3484 warning (0, "-mstring is not supported on little endian systems");
3488 /* If little-endian, default to -mstrict-align on older processors.
3489 Testing for htm matches power8 and later. */
3490 if (!BYTES_BIG_ENDIAN
3491 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3492 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3494 /* -maltivec={le,be} implies -maltivec. */
3495 if (rs6000_altivec_element_order != 0)
3496 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3498 /* Disallow -maltivec=le in big endian mode for now. This is not
3499 known to be useful for anyone. */
3500 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3502 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3503 rs6000_altivec_element_order = 0;
3506 /* Add some warnings for VSX. */
3507 if (TARGET_VSX)
3509 const char *msg = NULL;
3510 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3511 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3513 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3514 msg = N_("-mvsx requires hardware floating point");
3515 else
3517 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3518 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3521 else if (TARGET_PAIRED_FLOAT)
3522 msg = N_("-mvsx and -mpaired are incompatible");
3523 else if (TARGET_AVOID_XFORM > 0)
3524 msg = N_("-mvsx needs indexed addressing");
3525 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3526 & OPTION_MASK_ALTIVEC))
3528 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3529 msg = N_("-mvsx and -mno-altivec are incompatible");
3530 else
3531 msg = N_("-mno-altivec disables vsx");
3534 if (msg)
3536 warning (0, msg);
3537 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3538 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3542 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3543 the -mcpu setting to enable options that conflict. */
3544 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3545 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3546 | OPTION_MASK_ALTIVEC
3547 | OPTION_MASK_VSX)) != 0)
3548 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3549 | OPTION_MASK_DIRECT_MOVE)
3550 & ~rs6000_isa_flags_explicit);
3552 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3553 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3555 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3556 unless the user explicitly used the -mno-<option> to disable the code. */
3557 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3558 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3559 else if (TARGET_VSX)
3560 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3561 else if (TARGET_POPCNTD)
3562 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3563 else if (TARGET_DFP)
3564 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3565 else if (TARGET_CMPB)
3566 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3567 else if (TARGET_FPRND)
3568 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3569 else if (TARGET_POPCNTB)
3570 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3571 else if (TARGET_ALTIVEC)
3572 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3574 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3576 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3577 error ("-mcrypto requires -maltivec");
3578 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3581 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3583 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3584 error ("-mdirect-move requires -mvsx");
3585 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3588 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3590 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3591 error ("-mpower8-vector requires -maltivec");
3592 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3595 if (TARGET_P8_VECTOR && !TARGET_VSX)
3597 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3598 error ("-mpower8-vector requires -mvsx");
3599 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3602 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3604 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3605 error ("-mvsx-timode requires -mvsx");
3606 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3609 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3611 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3612 error ("-mhard-dfp requires -mhard-float");
3613 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3616 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3617 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3618 the individual option. */
3619 if (TARGET_UPPER_REGS > 0)
3621 if (TARGET_VSX
3622 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3624 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3625 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3627 if (TARGET_P8_VECTOR
3628 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3630 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3631 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3634 else if (TARGET_UPPER_REGS == 0)
3636 if (TARGET_VSX
3637 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3639 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3640 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3642 if (TARGET_P8_VECTOR
3643 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3645 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3646 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3650 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3652 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3653 error ("-mupper-regs-df requires -mvsx");
3654 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3657 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3659 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3660 error ("-mupper-regs-sf requires -mpower8-vector");
3661 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3664 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3665 silently turn off quad memory mode. */
3666 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3668 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3669 warning (0, N_("-mquad-memory requires 64-bit mode"));
3671 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3672 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3674 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3675 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3678 /* Non-atomic quad memory load/store are disabled for little endian, since
3679 the words are reversed, but atomic operations can still be done by
3680 swapping the words. */
3681 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3683 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3684 warning (0, N_("-mquad-memory is not available in little endian mode"));
3686 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3689 /* Assume if the user asked for normal quad memory instructions, they want
3690 the atomic versions as well, unless they explicity told us not to use quad
3691 word atomic instructions. */
3692 if (TARGET_QUAD_MEMORY
3693 && !TARGET_QUAD_MEMORY_ATOMIC
3694 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3695 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3697 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3698 generating power8 instructions. */
3699 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3700 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3701 & OPTION_MASK_P8_FUSION);
3703 /* Power8 does not fuse sign extended loads with the addis. If we are
3704 optimizing at high levels for speed, convert a sign extended load into a
3705 zero extending load, and an explicit sign extension. */
3706 if (TARGET_P8_FUSION
3707 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3708 && optimize_function_for_speed_p (cfun)
3709 && optimize >= 3)
3710 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3712 /* Set the appropriate IEEE 128-bit floating option. Do not enable float128
3713 support by default until the libgcc support is added. */
3714 if (TARGET_FLOAT128 == FLOAT128_UNSET)
3715 TARGET_FLOAT128 = FLOAT128_NONE;
3716 else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX)
3717 error ("-mfloat128-software requires VSX support");
3719 /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
3720 support. If we only have ISA 2.06 support, and the user did not specify
3721 the switch, leave it set to -1 so the movmisalign patterns are enabled,
3722 but we don't enable the full vectorization support */
3723 if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
3724 TARGET_ALLOW_MOVMISALIGN = 1;
3726 else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
3728 if (TARGET_ALLOW_MOVMISALIGN > 0)
3729 error ("-mallow-movmisalign requires -mvsx");
3731 TARGET_ALLOW_MOVMISALIGN = 0;
3734 /* Determine when unaligned vector accesses are permitted, and when
3735 they are preferred over masked Altivec loads. Note that if
3736 TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
3737 TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
3738 not true. */
3739 if (TARGET_EFFICIENT_UNALIGNED_VSX)
3741 if (!TARGET_VSX)
3743 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3744 error ("-mefficient-unaligned-vsx requires -mvsx");
3746 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3749 else if (!TARGET_ALLOW_MOVMISALIGN)
3751 if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
3752 error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
3754 rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
3758 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3759 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3761 /* E500mc does "better" if we inline more aggressively. Respect the
3762 user's opinion, though. */
3763 if (rs6000_block_move_inline_limit == 0
3764 && (rs6000_cpu == PROCESSOR_PPCE500MC
3765 || rs6000_cpu == PROCESSOR_PPCE500MC64
3766 || rs6000_cpu == PROCESSOR_PPCE5500
3767 || rs6000_cpu == PROCESSOR_PPCE6500))
3768 rs6000_block_move_inline_limit = 128;
3770 /* store_one_arg depends on expand_block_move to handle at least the
3771 size of reg_parm_stack_space. */
3772 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3773 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3775 if (global_init_p)
3777 /* If the appropriate debug option is enabled, replace the target hooks
3778 with debug versions that call the real version and then prints
3779 debugging information. */
3780 if (TARGET_DEBUG_COST)
3782 targetm.rtx_costs = rs6000_debug_rtx_costs;
3783 targetm.address_cost = rs6000_debug_address_cost;
3784 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3787 if (TARGET_DEBUG_ADDR)
3789 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3790 targetm.legitimize_address = rs6000_debug_legitimize_address;
3791 rs6000_secondary_reload_class_ptr
3792 = rs6000_debug_secondary_reload_class;
3793 rs6000_secondary_memory_needed_ptr
3794 = rs6000_debug_secondary_memory_needed;
3795 rs6000_cannot_change_mode_class_ptr
3796 = rs6000_debug_cannot_change_mode_class;
3797 rs6000_preferred_reload_class_ptr
3798 = rs6000_debug_preferred_reload_class;
3799 rs6000_legitimize_reload_address_ptr
3800 = rs6000_debug_legitimize_reload_address;
3801 rs6000_mode_dependent_address_ptr
3802 = rs6000_debug_mode_dependent_address;
3805 if (rs6000_veclibabi_name)
3807 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3808 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3809 else
3811 error ("unknown vectorization library ABI type (%s) for "
3812 "-mveclibabi= switch", rs6000_veclibabi_name);
3813 ret = false;
3818 if (!global_options_set.x_rs6000_long_double_type_size)
3820 if (main_target_opt != NULL
3821 && (main_target_opt->x_rs6000_long_double_type_size
3822 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3823 error ("target attribute or pragma changes long double size");
3824 else
3825 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3828 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3829 if (!global_options_set.x_rs6000_ieeequad)
3830 rs6000_ieeequad = 1;
3831 #endif
3833 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3834 target attribute or pragma which automatically enables both options,
3835 unless the altivec ABI was set. This is set by default for 64-bit, but
3836 not for 32-bit. */
3837 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3838 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3839 & ~rs6000_isa_flags_explicit);
3841 /* Enable Altivec ABI for AIX -maltivec. */
3842 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3844 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3845 error ("target attribute or pragma changes AltiVec ABI");
3846 else
3847 rs6000_altivec_abi = 1;
3850 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3851 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3852 be explicitly overridden in either case. */
3853 if (TARGET_ELF)
3855 if (!global_options_set.x_rs6000_altivec_abi
3856 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3858 if (main_target_opt != NULL &&
3859 !main_target_opt->x_rs6000_altivec_abi)
3860 error ("target attribute or pragma changes AltiVec ABI");
3861 else
3862 rs6000_altivec_abi = 1;
3866 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3867 So far, the only darwin64 targets are also MACH-O. */
3868 if (TARGET_MACHO
3869 && DEFAULT_ABI == ABI_DARWIN
3870 && TARGET_64BIT)
3872 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3873 error ("target attribute or pragma changes darwin64 ABI");
3874 else
3876 rs6000_darwin64_abi = 1;
3877 /* Default to natural alignment, for better performance. */
3878 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3882 /* Place FP constants in the constant pool instead of TOC
3883 if section anchors enabled. */
3884 if (flag_section_anchors
3885 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3886 TARGET_NO_FP_IN_TOC = 1;
3888 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3889 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3891 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3892 SUBTARGET_OVERRIDE_OPTIONS;
3893 #endif
3894 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3895 SUBSUBTARGET_OVERRIDE_OPTIONS;
3896 #endif
3897 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3898 SUB3TARGET_OVERRIDE_OPTIONS;
3899 #endif
3901 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3902 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3904 /* For the E500 family of cores, reset the single/double FP flags to let us
3905 check that they remain constant across attributes or pragmas. Also,
3906 clear a possible request for string instructions, not supported and which
3907 we might have silently queried above for -Os.
3909 For other families, clear ISEL in case it was set implicitly.
3912 switch (rs6000_cpu)
3914 case PROCESSOR_PPC8540:
3915 case PROCESSOR_PPC8548:
3916 case PROCESSOR_PPCE500MC:
3917 case PROCESSOR_PPCE500MC64:
3918 case PROCESSOR_PPCE5500:
3919 case PROCESSOR_PPCE6500:
3921 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3922 rs6000_double_float = TARGET_E500_DOUBLE;
3924 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3926 break;
3928 default:
3930 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3931 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3933 break;
3936 if (main_target_opt)
3938 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3939 error ("target attribute or pragma changes single precision floating "
3940 "point");
3941 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3942 error ("target attribute or pragma changes double precision floating "
3943 "point");
3946 /* Detect invalid option combinations with E500. */
3947 CHECK_E500_OPTIONS;
3949 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3950 && rs6000_cpu != PROCESSOR_POWER5
3951 && rs6000_cpu != PROCESSOR_POWER6
3952 && rs6000_cpu != PROCESSOR_POWER7
3953 && rs6000_cpu != PROCESSOR_POWER8
3954 && rs6000_cpu != PROCESSOR_PPCA2
3955 && rs6000_cpu != PROCESSOR_CELL
3956 && rs6000_cpu != PROCESSOR_PPC476);
3957 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3958 || rs6000_cpu == PROCESSOR_POWER5
3959 || rs6000_cpu == PROCESSOR_POWER7
3960 || rs6000_cpu == PROCESSOR_POWER8);
3961 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3962 || rs6000_cpu == PROCESSOR_POWER5
3963 || rs6000_cpu == PROCESSOR_POWER6
3964 || rs6000_cpu == PROCESSOR_POWER7
3965 || rs6000_cpu == PROCESSOR_POWER8
3966 || rs6000_cpu == PROCESSOR_PPCE500MC
3967 || rs6000_cpu == PROCESSOR_PPCE500MC64
3968 || rs6000_cpu == PROCESSOR_PPCE5500
3969 || rs6000_cpu == PROCESSOR_PPCE6500);
3971 /* Allow debug switches to override the above settings. These are set to -1
3972 in rs6000.opt to indicate the user hasn't directly set the switch. */
3973 if (TARGET_ALWAYS_HINT >= 0)
3974 rs6000_always_hint = TARGET_ALWAYS_HINT;
3976 if (TARGET_SCHED_GROUPS >= 0)
3977 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3979 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3980 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3982 rs6000_sched_restricted_insns_priority
3983 = (rs6000_sched_groups ? 1 : 0);
3985 /* Handle -msched-costly-dep option. */
3986 rs6000_sched_costly_dep
3987 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3989 if (rs6000_sched_costly_dep_str)
3991 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3992 rs6000_sched_costly_dep = no_dep_costly;
3993 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3994 rs6000_sched_costly_dep = all_deps_costly;
3995 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3996 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3997 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3998 rs6000_sched_costly_dep = store_to_load_dep_costly;
3999 else
4000 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
4001 atoi (rs6000_sched_costly_dep_str));
4004 /* Handle -minsert-sched-nops option. */
4005 rs6000_sched_insert_nops
4006 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
4008 if (rs6000_sched_insert_nops_str)
4010 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
4011 rs6000_sched_insert_nops = sched_finish_none;
4012 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
4013 rs6000_sched_insert_nops = sched_finish_pad_groups;
4014 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
4015 rs6000_sched_insert_nops = sched_finish_regroup_exact;
4016 else
4017 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
4018 atoi (rs6000_sched_insert_nops_str));
4021 if (global_init_p)
4023 #ifdef TARGET_REGNAMES
4024 /* If the user desires alternate register names, copy in the
4025 alternate names now. */
4026 if (TARGET_REGNAMES)
4027 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
4028 #endif
4030 /* Set aix_struct_return last, after the ABI is determined.
4031 If -maix-struct-return or -msvr4-struct-return was explicitly
4032 used, don't override with the ABI default. */
4033 if (!global_options_set.x_aix_struct_return)
4034 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
4036 #if 0
4037 /* IBM XL compiler defaults to unsigned bitfields. */
4038 if (TARGET_XL_COMPAT)
4039 flag_signed_bitfields = 0;
4040 #endif
4042 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
4043 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
4045 if (TARGET_TOC)
4046 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
4048 /* We can only guarantee the availability of DI pseudo-ops when
4049 assembling for 64-bit targets. */
4050 if (!TARGET_64BIT)
4052 targetm.asm_out.aligned_op.di = NULL;
4053 targetm.asm_out.unaligned_op.di = NULL;
4057 /* Set branch target alignment, if not optimizing for size. */
4058 if (!optimize_size)
4060 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
4061 aligned 8byte to avoid misprediction by the branch predictor. */
4062 if (rs6000_cpu == PROCESSOR_TITAN
4063 || rs6000_cpu == PROCESSOR_CELL)
4065 if (align_functions <= 0)
4066 align_functions = 8;
4067 if (align_jumps <= 0)
4068 align_jumps = 8;
4069 if (align_loops <= 0)
4070 align_loops = 8;
4072 if (rs6000_align_branch_targets)
4074 if (align_functions <= 0)
4075 align_functions = 16;
4076 if (align_jumps <= 0)
4077 align_jumps = 16;
4078 if (align_loops <= 0)
4080 can_override_loop_align = 1;
4081 align_loops = 16;
4084 if (align_jumps_max_skip <= 0)
4085 align_jumps_max_skip = 15;
4086 if (align_loops_max_skip <= 0)
4087 align_loops_max_skip = 15;
4090 /* Arrange to save and restore machine status around nested functions. */
4091 init_machine_status = rs6000_init_machine_status;
4093 /* We should always be splitting complex arguments, but we can't break
4094 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
4095 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
4096 targetm.calls.split_complex_arg = NULL;
4099 /* Initialize rs6000_cost with the appropriate target costs. */
4100 if (optimize_size)
4101 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
4102 else
4103 switch (rs6000_cpu)
4105 case PROCESSOR_RS64A:
4106 rs6000_cost = &rs64a_cost;
4107 break;
4109 case PROCESSOR_MPCCORE:
4110 rs6000_cost = &mpccore_cost;
4111 break;
4113 case PROCESSOR_PPC403:
4114 rs6000_cost = &ppc403_cost;
4115 break;
4117 case PROCESSOR_PPC405:
4118 rs6000_cost = &ppc405_cost;
4119 break;
4121 case PROCESSOR_PPC440:
4122 rs6000_cost = &ppc440_cost;
4123 break;
4125 case PROCESSOR_PPC476:
4126 rs6000_cost = &ppc476_cost;
4127 break;
4129 case PROCESSOR_PPC601:
4130 rs6000_cost = &ppc601_cost;
4131 break;
4133 case PROCESSOR_PPC603:
4134 rs6000_cost = &ppc603_cost;
4135 break;
4137 case PROCESSOR_PPC604:
4138 rs6000_cost = &ppc604_cost;
4139 break;
4141 case PROCESSOR_PPC604e:
4142 rs6000_cost = &ppc604e_cost;
4143 break;
4145 case PROCESSOR_PPC620:
4146 rs6000_cost = &ppc620_cost;
4147 break;
4149 case PROCESSOR_PPC630:
4150 rs6000_cost = &ppc630_cost;
4151 break;
4153 case PROCESSOR_CELL:
4154 rs6000_cost = &ppccell_cost;
4155 break;
4157 case PROCESSOR_PPC750:
4158 case PROCESSOR_PPC7400:
4159 rs6000_cost = &ppc750_cost;
4160 break;
4162 case PROCESSOR_PPC7450:
4163 rs6000_cost = &ppc7450_cost;
4164 break;
4166 case PROCESSOR_PPC8540:
4167 case PROCESSOR_PPC8548:
4168 rs6000_cost = &ppc8540_cost;
4169 break;
4171 case PROCESSOR_PPCE300C2:
4172 case PROCESSOR_PPCE300C3:
4173 rs6000_cost = &ppce300c2c3_cost;
4174 break;
4176 case PROCESSOR_PPCE500MC:
4177 rs6000_cost = &ppce500mc_cost;
4178 break;
4180 case PROCESSOR_PPCE500MC64:
4181 rs6000_cost = &ppce500mc64_cost;
4182 break;
4184 case PROCESSOR_PPCE5500:
4185 rs6000_cost = &ppce5500_cost;
4186 break;
4188 case PROCESSOR_PPCE6500:
4189 rs6000_cost = &ppce6500_cost;
4190 break;
4192 case PROCESSOR_TITAN:
4193 rs6000_cost = &titan_cost;
4194 break;
4196 case PROCESSOR_POWER4:
4197 case PROCESSOR_POWER5:
4198 rs6000_cost = &power4_cost;
4199 break;
4201 case PROCESSOR_POWER6:
4202 rs6000_cost = &power6_cost;
4203 break;
4205 case PROCESSOR_POWER7:
4206 rs6000_cost = &power7_cost;
4207 break;
4209 case PROCESSOR_POWER8:
4210 rs6000_cost = &power8_cost;
4211 break;
4213 case PROCESSOR_PPCA2:
4214 rs6000_cost = &ppca2_cost;
4215 break;
4217 default:
4218 gcc_unreachable ();
4221 if (global_init_p)
4223 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4224 rs6000_cost->simultaneous_prefetches,
4225 global_options.x_param_values,
4226 global_options_set.x_param_values);
4227 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4228 global_options.x_param_values,
4229 global_options_set.x_param_values);
4230 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4231 rs6000_cost->cache_line_size,
4232 global_options.x_param_values,
4233 global_options_set.x_param_values);
4234 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4235 global_options.x_param_values,
4236 global_options_set.x_param_values);
4238 /* Increase loop peeling limits based on performance analysis. */
4239 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4240 global_options.x_param_values,
4241 global_options_set.x_param_values);
4242 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4243 global_options.x_param_values,
4244 global_options_set.x_param_values);
4246 /* If using typedef char *va_list, signal that
4247 __builtin_va_start (&ap, 0) can be optimized to
4248 ap = __builtin_next_arg (0). */
4249 if (DEFAULT_ABI != ABI_V4)
4250 targetm.expand_builtin_va_start = NULL;
4253 /* Set up single/double float flags.
4254 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4255 then set both flags. */
4256 if (TARGET_HARD_FLOAT && TARGET_FPRS
4257 && rs6000_single_float == 0 && rs6000_double_float == 0)
4258 rs6000_single_float = rs6000_double_float = 1;
4260 /* If not explicitly specified via option, decide whether to generate indexed
4261 load/store instructions. */
4262 if (TARGET_AVOID_XFORM == -1)
4263 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4264 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4265 need indexed accesses and the type used is the scalar type of the element
4266 being loaded or stored. */
4267 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4268 && !TARGET_ALTIVEC);
4270 /* Set the -mrecip options. */
4271 if (rs6000_recip_name)
4273 char *p = ASTRDUP (rs6000_recip_name);
4274 char *q;
4275 unsigned int mask, i;
4276 bool invert;
4278 while ((q = strtok (p, ",")) != NULL)
4280 p = NULL;
4281 if (*q == '!')
4283 invert = true;
4284 q++;
4286 else
4287 invert = false;
4289 if (!strcmp (q, "default"))
4290 mask = ((TARGET_RECIP_PRECISION)
4291 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4292 else
4294 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4295 if (!strcmp (q, recip_options[i].string))
4297 mask = recip_options[i].mask;
4298 break;
4301 if (i == ARRAY_SIZE (recip_options))
4303 error ("unknown option for -mrecip=%s", q);
4304 invert = false;
4305 mask = 0;
4306 ret = false;
4310 if (invert)
4311 rs6000_recip_control &= ~mask;
4312 else
4313 rs6000_recip_control |= mask;
4317 /* Set the builtin mask of the various options used that could affect which
4318 builtins were used. In the past we used target_flags, but we've run out
4319 of bits, and some options like SPE and PAIRED are no longer in
4320 target_flags. */
4321 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4322 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4323 rs6000_print_builtin_options (stderr, 0, "builtin mask",
4324 rs6000_builtin_mask);
4326 /* Initialize all of the registers. */
4327 rs6000_init_hard_regno_mode_ok (global_init_p);
4329 /* Save the initial options in case the user does function specific options */
4330 if (global_init_p)
4331 target_option_default_node = target_option_current_node
4332 = build_target_option_node (&global_options);
4334 /* If not explicitly specified via option, decide whether to generate the
4335 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4336 if (TARGET_LINK_STACK == -1)
4337 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4339 return ret;
4342 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4343 define the target cpu type. */
4345 static void
4346 rs6000_option_override (void)
4348 (void) rs6000_option_override_internal (true);
4350 /* Register machine-specific passes. This needs to be done at start-up.
4351 It's convenient to do it here (like i386 does). */
4352 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4354 struct register_pass_info analyze_swaps_info
4355 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4357 register_pass (&analyze_swaps_info);
4361 /* Implement targetm.vectorize.builtin_mask_for_load. */
4362 static tree
4363 rs6000_builtin_mask_for_load (void)
4365 /* Don't use lvsl/vperm for P8 and similarly efficient machines. */
4366 if ((TARGET_ALTIVEC && !TARGET_VSX)
4367 || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX))
4368 return altivec_builtin_mask_for_load;
4369 else
4370 return 0;
4373 /* Implement LOOP_ALIGN. */
4375 rs6000_loop_align (rtx label)
4377 basic_block bb;
4378 int ninsns;
4380 /* Don't override loop alignment if -falign-loops was specified. */
4381 if (!can_override_loop_align)
4382 return align_loops_log;
4384 bb = BLOCK_FOR_INSN (label);
4385 ninsns = num_loop_insns(bb->loop_father);
4387 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4388 if (ninsns > 4 && ninsns <= 8
4389 && (rs6000_cpu == PROCESSOR_POWER4
4390 || rs6000_cpu == PROCESSOR_POWER5
4391 || rs6000_cpu == PROCESSOR_POWER6
4392 || rs6000_cpu == PROCESSOR_POWER7
4393 || rs6000_cpu == PROCESSOR_POWER8))
4394 return 5;
4395 else
4396 return align_loops_log;
4399 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4400 static int
4401 rs6000_loop_align_max_skip (rtx_insn *label)
4403 return (1 << rs6000_loop_align (label)) - 1;
4406 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4407 after applying N number of iterations. This routine does not determine
4408 how may iterations are required to reach desired alignment. */
4410 static bool
4411 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4413 if (is_packed)
4414 return false;
4416 if (TARGET_32BIT)
4418 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4419 return true;
4421 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4422 return true;
4424 return false;
4426 else
4428 if (TARGET_MACHO)
4429 return false;
4431 /* Assuming that all other types are naturally aligned. CHECKME! */
4432 return true;
4436 /* Return true if the vector misalignment factor is supported by the
4437 target. */
4438 static bool
4439 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4440 const_tree type,
4441 int misalignment,
4442 bool is_packed)
4444 if (TARGET_VSX)
4446 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4447 return true;
4449 /* Return if movmisalign pattern is not supported for this mode. */
4450 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4451 return false;
4453 if (misalignment == -1)
4455 /* Misalignment factor is unknown at compile time but we know
4456 it's word aligned. */
4457 if (rs6000_vector_alignment_reachable (type, is_packed))
4459 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4461 if (element_size == 64 || element_size == 32)
4462 return true;
4465 return false;
4468 /* VSX supports word-aligned vector. */
4469 if (misalignment % 4 == 0)
4470 return true;
4472 return false;
4475 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4476 static int
4477 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4478 tree vectype, int misalign)
4480 unsigned elements;
4481 tree elem_type;
4483 switch (type_of_cost)
4485 case scalar_stmt:
4486 case scalar_load:
4487 case scalar_store:
4488 case vector_stmt:
4489 case vector_load:
4490 case vector_store:
4491 case vec_to_scalar:
4492 case scalar_to_vec:
4493 case cond_branch_not_taken:
4494 return 1;
4496 case vec_perm:
4497 if (TARGET_VSX)
4498 return 3;
4499 else
4500 return 1;
4502 case vec_promote_demote:
4503 if (TARGET_VSX)
4504 return 4;
4505 else
4506 return 1;
4508 case cond_branch_taken:
4509 return 3;
4511 case unaligned_load:
4512 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4513 return 1;
4515 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4517 elements = TYPE_VECTOR_SUBPARTS (vectype);
4518 if (elements == 2)
4519 /* Double word aligned. */
4520 return 2;
4522 if (elements == 4)
4524 switch (misalign)
4526 case 8:
4527 /* Double word aligned. */
4528 return 2;
4530 case -1:
4531 /* Unknown misalignment. */
4532 case 4:
4533 case 12:
4534 /* Word aligned. */
4535 return 22;
4537 default:
4538 gcc_unreachable ();
4543 if (TARGET_ALTIVEC)
4544 /* Misaligned loads are not supported. */
4545 gcc_unreachable ();
4547 return 2;
4549 case unaligned_store:
4550 if (TARGET_EFFICIENT_UNALIGNED_VSX)
4551 return 1;
4553 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4555 elements = TYPE_VECTOR_SUBPARTS (vectype);
4556 if (elements == 2)
4557 /* Double word aligned. */
4558 return 2;
4560 if (elements == 4)
4562 switch (misalign)
4564 case 8:
4565 /* Double word aligned. */
4566 return 2;
4568 case -1:
4569 /* Unknown misalignment. */
4570 case 4:
4571 case 12:
4572 /* Word aligned. */
4573 return 23;
4575 default:
4576 gcc_unreachable ();
4581 if (TARGET_ALTIVEC)
4582 /* Misaligned stores are not supported. */
4583 gcc_unreachable ();
4585 return 2;
4587 case vec_construct:
4588 elements = TYPE_VECTOR_SUBPARTS (vectype);
4589 elem_type = TREE_TYPE (vectype);
4590 /* 32-bit vectors loaded into registers are stored as double
4591 precision, so we need n/2 converts in addition to the usual
4592 n/2 merges to construct a vector of short floats from them. */
4593 if (SCALAR_FLOAT_TYPE_P (elem_type)
4594 && TYPE_PRECISION (elem_type) == 32)
4595 return elements + 1;
4596 else
4597 return elements / 2 + 1;
4599 default:
4600 gcc_unreachable ();
4604 /* Implement targetm.vectorize.preferred_simd_mode. */
4606 static machine_mode
4607 rs6000_preferred_simd_mode (machine_mode mode)
4609 if (TARGET_VSX)
4610 switch (mode)
4612 case DFmode:
4613 return V2DFmode;
4614 default:;
4616 if (TARGET_ALTIVEC || TARGET_VSX)
4617 switch (mode)
4619 case SFmode:
4620 return V4SFmode;
4621 case TImode:
4622 return V1TImode;
4623 case DImode:
4624 return V2DImode;
4625 case SImode:
4626 return V4SImode;
4627 case HImode:
4628 return V8HImode;
4629 case QImode:
4630 return V16QImode;
4631 default:;
4633 if (TARGET_SPE)
4634 switch (mode)
4636 case SFmode:
4637 return V2SFmode;
4638 case SImode:
4639 return V2SImode;
4640 default:;
4642 if (TARGET_PAIRED_FLOAT
4643 && mode == SFmode)
4644 return V2SFmode;
4645 return word_mode;
4648 typedef struct _rs6000_cost_data
4650 struct loop *loop_info;
4651 unsigned cost[3];
4652 } rs6000_cost_data;
4654 /* Test for likely overcommitment of vector hardware resources. If a
4655 loop iteration is relatively large, and too large a percentage of
4656 instructions in the loop are vectorized, the cost model may not
4657 adequately reflect delays from unavailable vector resources.
4658 Penalize the loop body cost for this case. */
4660 static void
4661 rs6000_density_test (rs6000_cost_data *data)
4663 const int DENSITY_PCT_THRESHOLD = 85;
4664 const int DENSITY_SIZE_THRESHOLD = 70;
4665 const int DENSITY_PENALTY = 10;
4666 struct loop *loop = data->loop_info;
4667 basic_block *bbs = get_loop_body (loop);
4668 int nbbs = loop->num_nodes;
4669 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4670 int i, density_pct;
4672 for (i = 0; i < nbbs; i++)
4674 basic_block bb = bbs[i];
4675 gimple_stmt_iterator gsi;
4677 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4679 gimple stmt = gsi_stmt (gsi);
4680 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4682 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4683 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4684 not_vec_cost++;
4688 free (bbs);
4689 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4691 if (density_pct > DENSITY_PCT_THRESHOLD
4692 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4694 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4695 if (dump_enabled_p ())
4696 dump_printf_loc (MSG_NOTE, vect_location,
4697 "density %d%%, cost %d exceeds threshold, penalizing "
4698 "loop body cost by %d%%", density_pct,
4699 vec_cost + not_vec_cost, DENSITY_PENALTY);
4703 /* Implement targetm.vectorize.init_cost. */
4705 static void *
4706 rs6000_init_cost (struct loop *loop_info)
4708 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4709 data->loop_info = loop_info;
4710 data->cost[vect_prologue] = 0;
4711 data->cost[vect_body] = 0;
4712 data->cost[vect_epilogue] = 0;
4713 return data;
4716 /* Implement targetm.vectorize.add_stmt_cost. */
4718 static unsigned
4719 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4720 struct _stmt_vec_info *stmt_info, int misalign,
4721 enum vect_cost_model_location where)
4723 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4724 unsigned retval = 0;
4726 if (flag_vect_cost_model)
4728 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4729 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4730 misalign);
4731 /* Statements in an inner loop relative to the loop being
4732 vectorized are weighted more heavily. The value here is
4733 arbitrary and could potentially be improved with analysis. */
4734 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4735 count *= 50; /* FIXME. */
4737 retval = (unsigned) (count * stmt_cost);
4738 cost_data->cost[where] += retval;
4741 return retval;
4744 /* Implement targetm.vectorize.finish_cost. */
4746 static void
4747 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4748 unsigned *body_cost, unsigned *epilogue_cost)
4750 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4752 if (cost_data->loop_info)
4753 rs6000_density_test (cost_data);
4755 *prologue_cost = cost_data->cost[vect_prologue];
4756 *body_cost = cost_data->cost[vect_body];
4757 *epilogue_cost = cost_data->cost[vect_epilogue];
4760 /* Implement targetm.vectorize.destroy_cost_data. */
4762 static void
4763 rs6000_destroy_cost_data (void *data)
4765 free (data);
4768 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4769 library with vectorized intrinsics. */
4771 static tree
4772 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4774 char name[32];
4775 const char *suffix = NULL;
4776 tree fntype, new_fndecl, bdecl = NULL_TREE;
4777 int n_args = 1;
4778 const char *bname;
4779 machine_mode el_mode, in_mode;
4780 int n, in_n;
4782 /* Libmass is suitable for unsafe math only as it does not correctly support
4783 parts of IEEE with the required precision such as denormals. Only support
4784 it if we have VSX to use the simd d2 or f4 functions.
4785 XXX: Add variable length support. */
4786 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4787 return NULL_TREE;
4789 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4790 n = TYPE_VECTOR_SUBPARTS (type_out);
4791 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4792 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4793 if (el_mode != in_mode
4794 || n != in_n)
4795 return NULL_TREE;
4797 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4799 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4800 switch (fn)
4802 case BUILT_IN_ATAN2:
4803 case BUILT_IN_HYPOT:
4804 case BUILT_IN_POW:
4805 n_args = 2;
4806 /* fall through */
4808 case BUILT_IN_ACOS:
4809 case BUILT_IN_ACOSH:
4810 case BUILT_IN_ASIN:
4811 case BUILT_IN_ASINH:
4812 case BUILT_IN_ATAN:
4813 case BUILT_IN_ATANH:
4814 case BUILT_IN_CBRT:
4815 case BUILT_IN_COS:
4816 case BUILT_IN_COSH:
4817 case BUILT_IN_ERF:
4818 case BUILT_IN_ERFC:
4819 case BUILT_IN_EXP2:
4820 case BUILT_IN_EXP:
4821 case BUILT_IN_EXPM1:
4822 case BUILT_IN_LGAMMA:
4823 case BUILT_IN_LOG10:
4824 case BUILT_IN_LOG1P:
4825 case BUILT_IN_LOG2:
4826 case BUILT_IN_LOG:
4827 case BUILT_IN_SIN:
4828 case BUILT_IN_SINH:
4829 case BUILT_IN_SQRT:
4830 case BUILT_IN_TAN:
4831 case BUILT_IN_TANH:
4832 bdecl = builtin_decl_implicit (fn);
4833 suffix = "d2"; /* pow -> powd2 */
4834 if (el_mode != DFmode
4835 || n != 2
4836 || !bdecl)
4837 return NULL_TREE;
4838 break;
4840 case BUILT_IN_ATAN2F:
4841 case BUILT_IN_HYPOTF:
4842 case BUILT_IN_POWF:
4843 n_args = 2;
4844 /* fall through */
4846 case BUILT_IN_ACOSF:
4847 case BUILT_IN_ACOSHF:
4848 case BUILT_IN_ASINF:
4849 case BUILT_IN_ASINHF:
4850 case BUILT_IN_ATANF:
4851 case BUILT_IN_ATANHF:
4852 case BUILT_IN_CBRTF:
4853 case BUILT_IN_COSF:
4854 case BUILT_IN_COSHF:
4855 case BUILT_IN_ERFF:
4856 case BUILT_IN_ERFCF:
4857 case BUILT_IN_EXP2F:
4858 case BUILT_IN_EXPF:
4859 case BUILT_IN_EXPM1F:
4860 case BUILT_IN_LGAMMAF:
4861 case BUILT_IN_LOG10F:
4862 case BUILT_IN_LOG1PF:
4863 case BUILT_IN_LOG2F:
4864 case BUILT_IN_LOGF:
4865 case BUILT_IN_SINF:
4866 case BUILT_IN_SINHF:
4867 case BUILT_IN_SQRTF:
4868 case BUILT_IN_TANF:
4869 case BUILT_IN_TANHF:
4870 bdecl = builtin_decl_implicit (fn);
4871 suffix = "4"; /* powf -> powf4 */
4872 if (el_mode != SFmode
4873 || n != 4
4874 || !bdecl)
4875 return NULL_TREE;
4876 break;
4878 default:
4879 return NULL_TREE;
4882 else
4883 return NULL_TREE;
4885 gcc_assert (suffix != NULL);
4886 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4887 if (!bname)
4888 return NULL_TREE;
4890 strcpy (name, bname + sizeof ("__builtin_") - 1);
4891 strcat (name, suffix);
4893 if (n_args == 1)
4894 fntype = build_function_type_list (type_out, type_in, NULL);
4895 else if (n_args == 2)
4896 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4897 else
4898 gcc_unreachable ();
4900 /* Build a function declaration for the vectorized function. */
4901 new_fndecl = build_decl (BUILTINS_LOCATION,
4902 FUNCTION_DECL, get_identifier (name), fntype);
4903 TREE_PUBLIC (new_fndecl) = 1;
4904 DECL_EXTERNAL (new_fndecl) = 1;
4905 DECL_IS_NOVOPS (new_fndecl) = 1;
4906 TREE_READONLY (new_fndecl) = 1;
4908 return new_fndecl;
4911 /* Returns a function decl for a vectorized version of the builtin function
4912 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4913 if it is not available. */
4915 static tree
4916 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4917 tree type_in)
4919 machine_mode in_mode, out_mode;
4920 int in_n, out_n;
4922 if (TARGET_DEBUG_BUILTIN)
4923 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4924 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4925 GET_MODE_NAME (TYPE_MODE (type_out)),
4926 GET_MODE_NAME (TYPE_MODE (type_in)));
4928 if (TREE_CODE (type_out) != VECTOR_TYPE
4929 || TREE_CODE (type_in) != VECTOR_TYPE
4930 || !TARGET_VECTORIZE_BUILTINS)
4931 return NULL_TREE;
4933 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4934 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4935 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4936 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4938 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4940 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4941 switch (fn)
4943 case BUILT_IN_CLZIMAX:
4944 case BUILT_IN_CLZLL:
4945 case BUILT_IN_CLZL:
4946 case BUILT_IN_CLZ:
4947 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4949 if (out_mode == QImode && out_n == 16)
4950 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4951 else if (out_mode == HImode && out_n == 8)
4952 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4953 else if (out_mode == SImode && out_n == 4)
4954 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4955 else if (out_mode == DImode && out_n == 2)
4956 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4958 break;
4959 case BUILT_IN_COPYSIGN:
4960 if (VECTOR_UNIT_VSX_P (V2DFmode)
4961 && out_mode == DFmode && out_n == 2
4962 && in_mode == DFmode && in_n == 2)
4963 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4964 break;
4965 case BUILT_IN_COPYSIGNF:
4966 if (out_mode != SFmode || out_n != 4
4967 || in_mode != SFmode || in_n != 4)
4968 break;
4969 if (VECTOR_UNIT_VSX_P (V4SFmode))
4970 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4971 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4972 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4973 break;
4974 case BUILT_IN_POPCOUNTIMAX:
4975 case BUILT_IN_POPCOUNTLL:
4976 case BUILT_IN_POPCOUNTL:
4977 case BUILT_IN_POPCOUNT:
4978 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4980 if (out_mode == QImode && out_n == 16)
4981 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4982 else if (out_mode == HImode && out_n == 8)
4983 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4984 else if (out_mode == SImode && out_n == 4)
4985 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4986 else if (out_mode == DImode && out_n == 2)
4987 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4989 break;
4990 case BUILT_IN_SQRT:
4991 if (VECTOR_UNIT_VSX_P (V2DFmode)
4992 && out_mode == DFmode && out_n == 2
4993 && in_mode == DFmode && in_n == 2)
4994 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4995 break;
4996 case BUILT_IN_SQRTF:
4997 if (VECTOR_UNIT_VSX_P (V4SFmode)
4998 && out_mode == SFmode && out_n == 4
4999 && in_mode == SFmode && in_n == 4)
5000 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
5001 break;
5002 case BUILT_IN_CEIL:
5003 if (VECTOR_UNIT_VSX_P (V2DFmode)
5004 && out_mode == DFmode && out_n == 2
5005 && in_mode == DFmode && in_n == 2)
5006 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
5007 break;
5008 case BUILT_IN_CEILF:
5009 if (out_mode != SFmode || out_n != 4
5010 || in_mode != SFmode || in_n != 4)
5011 break;
5012 if (VECTOR_UNIT_VSX_P (V4SFmode))
5013 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
5014 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5015 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
5016 break;
5017 case BUILT_IN_FLOOR:
5018 if (VECTOR_UNIT_VSX_P (V2DFmode)
5019 && out_mode == DFmode && out_n == 2
5020 && in_mode == DFmode && in_n == 2)
5021 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
5022 break;
5023 case BUILT_IN_FLOORF:
5024 if (out_mode != SFmode || out_n != 4
5025 || in_mode != SFmode || in_n != 4)
5026 break;
5027 if (VECTOR_UNIT_VSX_P (V4SFmode))
5028 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
5029 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5030 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
5031 break;
5032 case BUILT_IN_FMA:
5033 if (VECTOR_UNIT_VSX_P (V2DFmode)
5034 && out_mode == DFmode && out_n == 2
5035 && in_mode == DFmode && in_n == 2)
5036 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
5037 break;
5038 case BUILT_IN_FMAF:
5039 if (VECTOR_UNIT_VSX_P (V4SFmode)
5040 && out_mode == SFmode && out_n == 4
5041 && in_mode == SFmode && in_n == 4)
5042 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
5043 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
5044 && out_mode == SFmode && out_n == 4
5045 && in_mode == SFmode && in_n == 4)
5046 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
5047 break;
5048 case BUILT_IN_TRUNC:
5049 if (VECTOR_UNIT_VSX_P (V2DFmode)
5050 && out_mode == DFmode && out_n == 2
5051 && in_mode == DFmode && in_n == 2)
5052 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
5053 break;
5054 case BUILT_IN_TRUNCF:
5055 if (out_mode != SFmode || out_n != 4
5056 || in_mode != SFmode || in_n != 4)
5057 break;
5058 if (VECTOR_UNIT_VSX_P (V4SFmode))
5059 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
5060 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
5061 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
5062 break;
5063 case BUILT_IN_NEARBYINT:
5064 if (VECTOR_UNIT_VSX_P (V2DFmode)
5065 && flag_unsafe_math_optimizations
5066 && out_mode == DFmode && out_n == 2
5067 && in_mode == DFmode && in_n == 2)
5068 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
5069 break;
5070 case BUILT_IN_NEARBYINTF:
5071 if (VECTOR_UNIT_VSX_P (V4SFmode)
5072 && flag_unsafe_math_optimizations
5073 && out_mode == SFmode && out_n == 4
5074 && in_mode == SFmode && in_n == 4)
5075 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
5076 break;
5077 case BUILT_IN_RINT:
5078 if (VECTOR_UNIT_VSX_P (V2DFmode)
5079 && !flag_trapping_math
5080 && out_mode == DFmode && out_n == 2
5081 && in_mode == DFmode && in_n == 2)
5082 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
5083 break;
5084 case BUILT_IN_RINTF:
5085 if (VECTOR_UNIT_VSX_P (V4SFmode)
5086 && !flag_trapping_math
5087 && out_mode == SFmode && out_n == 4
5088 && in_mode == SFmode && in_n == 4)
5089 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
5090 break;
5091 default:
5092 break;
5096 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
5098 enum rs6000_builtins fn
5099 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
5100 switch (fn)
5102 case RS6000_BUILTIN_RSQRTF:
5103 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5104 && out_mode == SFmode && out_n == 4
5105 && in_mode == SFmode && in_n == 4)
5106 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
5107 break;
5108 case RS6000_BUILTIN_RSQRT:
5109 if (VECTOR_UNIT_VSX_P (V2DFmode)
5110 && out_mode == DFmode && out_n == 2
5111 && in_mode == DFmode && in_n == 2)
5112 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
5113 break;
5114 case RS6000_BUILTIN_RECIPF:
5115 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
5116 && out_mode == SFmode && out_n == 4
5117 && in_mode == SFmode && in_n == 4)
5118 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
5119 break;
5120 case RS6000_BUILTIN_RECIP:
5121 if (VECTOR_UNIT_VSX_P (V2DFmode)
5122 && out_mode == DFmode && out_n == 2
5123 && in_mode == DFmode && in_n == 2)
5124 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
5125 break;
5126 default:
5127 break;
5131 /* Generate calls to libmass if appropriate. */
5132 if (rs6000_veclib_handler)
5133 return rs6000_veclib_handler (fndecl, type_out, type_in);
5135 return NULL_TREE;
5138 /* Default CPU string for rs6000*_file_start functions. */
5139 static const char *rs6000_default_cpu;
5141 /* Do anything needed at the start of the asm file. */
5143 static void
5144 rs6000_file_start (void)
5146 char buffer[80];
5147 const char *start = buffer;
5148 FILE *file = asm_out_file;
5150 rs6000_default_cpu = TARGET_CPU_DEFAULT;
5152 default_file_start ();
5154 if (flag_verbose_asm)
5156 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
5158 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5160 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5161 start = "";
5164 if (global_options_set.x_rs6000_cpu_index)
5166 fprintf (file, "%s -mcpu=%s", start,
5167 processor_target_table[rs6000_cpu_index].name);
5168 start = "";
5171 if (global_options_set.x_rs6000_tune_index)
5173 fprintf (file, "%s -mtune=%s", start,
5174 processor_target_table[rs6000_tune_index].name);
5175 start = "";
5178 if (PPC405_ERRATUM77)
5180 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5181 start = "";
5184 #ifdef USING_ELFOS_H
5185 switch (rs6000_sdata)
5187 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5188 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5189 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5190 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5193 if (rs6000_sdata && g_switch_value)
5195 fprintf (file, "%s -G %d", start,
5196 g_switch_value);
5197 start = "";
5199 #endif
5201 if (*start == '\0')
5202 putc ('\n', file);
5205 #ifdef USING_ELFOS_H
5206 if (rs6000_default_cpu == 0 || rs6000_default_cpu[0] == '\0'
5207 || !global_options_set.x_rs6000_cpu_index)
5209 fputs ("\t.machine ", asm_out_file);
5210 if ((rs6000_isa_flags & OPTION_MASK_DIRECT_MOVE) != 0)
5211 fputs ("power8\n", asm_out_file);
5212 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTD) != 0)
5213 fputs ("power7\n", asm_out_file);
5214 else if ((rs6000_isa_flags & OPTION_MASK_CMPB) != 0)
5215 fputs ("power6\n", asm_out_file);
5216 else if ((rs6000_isa_flags & OPTION_MASK_POPCNTB) != 0)
5217 fputs ("power5\n", asm_out_file);
5218 else if ((rs6000_isa_flags & OPTION_MASK_MFCRF) != 0)
5219 fputs ("power4\n", asm_out_file);
5220 else if ((rs6000_isa_flags & OPTION_MASK_POWERPC64) != 0)
5221 fputs ("ppc64\n", asm_out_file);
5222 else
5223 fputs ("ppc\n", asm_out_file);
5225 #endif
5227 if (DEFAULT_ABI == ABI_ELFv2)
5228 fprintf (file, "\t.abiversion 2\n");
5230 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5231 || (TARGET_ELF && flag_pic == 2))
5233 switch_to_section (toc_section);
5234 switch_to_section (text_section);
5239 /* Return nonzero if this function is known to have a null epilogue. */
5242 direct_return (void)
5244 if (reload_completed)
5246 rs6000_stack_t *info = rs6000_stack_info ();
5248 if (info->first_gp_reg_save == 32
5249 && info->first_fp_reg_save == 64
5250 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5251 && ! info->lr_save_p
5252 && ! info->cr_save_p
5253 && info->vrsave_size == 0
5254 && ! info->push_p)
5255 return 1;
5258 return 0;
5261 /* Return the number of instructions it takes to form a constant in an
5262 integer register. */
5265 num_insns_constant_wide (HOST_WIDE_INT value)
5267 /* signed constant loadable with addi */
5268 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5269 return 1;
5271 /* constant loadable with addis */
5272 else if ((value & 0xffff) == 0
5273 && (value >> 31 == -1 || value >> 31 == 0))
5274 return 1;
5276 else if (TARGET_POWERPC64)
5278 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5279 HOST_WIDE_INT high = value >> 31;
5281 if (high == 0 || high == -1)
5282 return 2;
5284 high >>= 1;
5286 if (low == 0)
5287 return num_insns_constant_wide (high) + 1;
5288 else if (high == 0)
5289 return num_insns_constant_wide (low) + 1;
5290 else
5291 return (num_insns_constant_wide (high)
5292 + num_insns_constant_wide (low) + 1);
5295 else
5296 return 2;
5300 num_insns_constant (rtx op, machine_mode mode)
5302 HOST_WIDE_INT low, high;
5304 switch (GET_CODE (op))
5306 case CONST_INT:
5307 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5308 && rs6000_is_valid_and_mask (op, mode))
5309 return 2;
5310 else
5311 return num_insns_constant_wide (INTVAL (op));
5313 case CONST_WIDE_INT:
5315 int i;
5316 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5317 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5318 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5319 return ins;
5322 case CONST_DOUBLE:
5323 if (mode == SFmode || mode == SDmode)
5325 long l;
5326 REAL_VALUE_TYPE rv;
5328 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5329 if (DECIMAL_FLOAT_MODE_P (mode))
5330 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5331 else
5332 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5333 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5336 long l[2];
5337 REAL_VALUE_TYPE rv;
5339 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5340 if (DECIMAL_FLOAT_MODE_P (mode))
5341 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5342 else
5343 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5344 high = l[WORDS_BIG_ENDIAN == 0];
5345 low = l[WORDS_BIG_ENDIAN != 0];
5347 if (TARGET_32BIT)
5348 return (num_insns_constant_wide (low)
5349 + num_insns_constant_wide (high));
5350 else
5352 if ((high == 0 && low >= 0)
5353 || (high == -1 && low < 0))
5354 return num_insns_constant_wide (low);
5356 else if (rs6000_is_valid_and_mask (op, mode))
5357 return 2;
5359 else if (low == 0)
5360 return num_insns_constant_wide (high) + 1;
5362 else
5363 return (num_insns_constant_wide (high)
5364 + num_insns_constant_wide (low) + 1);
5367 default:
5368 gcc_unreachable ();
5372 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5373 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5374 corresponding element of the vector, but for V4SFmode and V2SFmode,
5375 the corresponding "float" is interpreted as an SImode integer. */
5377 HOST_WIDE_INT
5378 const_vector_elt_as_int (rtx op, unsigned int elt)
5380 rtx tmp;
5382 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5383 gcc_assert (GET_MODE (op) != V2DImode
5384 && GET_MODE (op) != V2DFmode);
5386 tmp = CONST_VECTOR_ELT (op, elt);
5387 if (GET_MODE (op) == V4SFmode
5388 || GET_MODE (op) == V2SFmode)
5389 tmp = gen_lowpart (SImode, tmp);
5390 return INTVAL (tmp);
5393 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5394 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5395 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5396 all items are set to the same value and contain COPIES replicas of the
5397 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5398 operand and the others are set to the value of the operand's msb. */
5400 static bool
5401 vspltis_constant (rtx op, unsigned step, unsigned copies)
5403 machine_mode mode = GET_MODE (op);
5404 machine_mode inner = GET_MODE_INNER (mode);
5406 unsigned i;
5407 unsigned nunits;
5408 unsigned bitsize;
5409 unsigned mask;
5411 HOST_WIDE_INT val;
5412 HOST_WIDE_INT splat_val;
5413 HOST_WIDE_INT msb_val;
5415 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5416 return false;
5418 nunits = GET_MODE_NUNITS (mode);
5419 bitsize = GET_MODE_BITSIZE (inner);
5420 mask = GET_MODE_MASK (inner);
5422 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5423 splat_val = val;
5424 msb_val = val >= 0 ? 0 : -1;
5426 /* Construct the value to be splatted, if possible. If not, return 0. */
5427 for (i = 2; i <= copies; i *= 2)
5429 HOST_WIDE_INT small_val;
5430 bitsize /= 2;
5431 small_val = splat_val >> bitsize;
5432 mask >>= bitsize;
5433 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5434 return false;
5435 splat_val = small_val;
5438 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5439 if (EASY_VECTOR_15 (splat_val))
5442 /* Also check if we can splat, and then add the result to itself. Do so if
5443 the value is positive, of if the splat instruction is using OP's mode;
5444 for splat_val < 0, the splat and the add should use the same mode. */
5445 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5446 && (splat_val >= 0 || (step == 1 && copies == 1)))
5449 /* Also check if are loading up the most significant bit which can be done by
5450 loading up -1 and shifting the value left by -1. */
5451 else if (EASY_VECTOR_MSB (splat_val, inner))
5454 else
5455 return false;
5457 /* Check if VAL is present in every STEP-th element, and the
5458 other elements are filled with its most significant bit. */
5459 for (i = 1; i < nunits; ++i)
5461 HOST_WIDE_INT desired_val;
5462 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5463 if ((i & (step - 1)) == 0)
5464 desired_val = val;
5465 else
5466 desired_val = msb_val;
5468 if (desired_val != const_vector_elt_as_int (op, elt))
5469 return false;
5472 return true;
5475 /* Like vsplitis_constant, but allow the value to be shifted left with a VSLDOI
5476 instruction, filling in the bottom elements with 0 or -1.
5478 Return 0 if the constant cannot be generated with VSLDOI. Return positive
5479 for the number of zeroes to shift in, or negative for the number of 0xff
5480 bytes to shift in.
5482 OP is a CONST_VECTOR. */
5485 vspltis_shifted (rtx op)
5487 machine_mode mode = GET_MODE (op);
5488 machine_mode inner = GET_MODE_INNER (mode);
5490 unsigned i, j;
5491 unsigned nunits;
5492 unsigned mask;
5494 HOST_WIDE_INT val;
5496 if (mode != V16QImode && mode != V8HImode && mode != V4SImode)
5497 return false;
5499 /* We need to create pseudo registers to do the shift, so don't recognize
5500 shift vector constants after reload. */
5501 if (!can_create_pseudo_p ())
5502 return false;
5504 nunits = GET_MODE_NUNITS (mode);
5505 mask = GET_MODE_MASK (inner);
5507 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? 0 : nunits - 1);
5509 /* Check if the value can really be the operand of a vspltis[bhw]. */
5510 if (EASY_VECTOR_15 (val))
5513 /* Also check if we are loading up the most significant bit which can be done
5514 by loading up -1 and shifting the value left by -1. */
5515 else if (EASY_VECTOR_MSB (val, inner))
5518 else
5519 return 0;
5521 /* Check if VAL is present in every STEP-th element until we find elements
5522 that are 0 or all 1 bits. */
5523 for (i = 1; i < nunits; ++i)
5525 unsigned elt = BYTES_BIG_ENDIAN ? i : nunits - 1 - i;
5526 HOST_WIDE_INT elt_val = const_vector_elt_as_int (op, elt);
5528 /* If the value isn't the splat value, check for the remaining elements
5529 being 0/-1. */
5530 if (val != elt_val)
5532 if (elt_val == 0)
5534 for (j = i+1; j < nunits; ++j)
5536 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5537 if (const_vector_elt_as_int (op, elt2) != 0)
5538 return 0;
5541 return (nunits - i) * GET_MODE_SIZE (inner);
5544 else if ((elt_val & mask) == mask)
5546 for (j = i+1; j < nunits; ++j)
5548 unsigned elt2 = BYTES_BIG_ENDIAN ? j : nunits - 1 - j;
5549 if ((const_vector_elt_as_int (op, elt2) & mask) != mask)
5550 return 0;
5553 return -((nunits - i) * GET_MODE_SIZE (inner));
5556 else
5557 return 0;
5561 /* If all elements are equal, we don't need to do VLSDOI. */
5562 return 0;
5566 /* Return true if OP is of the given MODE and can be synthesized
5567 with a vspltisb, vspltish or vspltisw. */
5569 bool
5570 easy_altivec_constant (rtx op, machine_mode mode)
5572 unsigned step, copies;
5574 if (mode == VOIDmode)
5575 mode = GET_MODE (op);
5576 else if (mode != GET_MODE (op))
5577 return false;
5579 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5580 constants. */
5581 if (mode == V2DFmode)
5582 return zero_constant (op, mode);
5584 else if (mode == V2DImode)
5586 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5587 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5588 return false;
5590 if (zero_constant (op, mode))
5591 return true;
5593 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5594 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5595 return true;
5597 return false;
5600 /* V1TImode is a special container for TImode. Ignore for now. */
5601 else if (mode == V1TImode)
5602 return false;
5604 /* Start with a vspltisw. */
5605 step = GET_MODE_NUNITS (mode) / 4;
5606 copies = 1;
5608 if (vspltis_constant (op, step, copies))
5609 return true;
5611 /* Then try with a vspltish. */
5612 if (step == 1)
5613 copies <<= 1;
5614 else
5615 step >>= 1;
5617 if (vspltis_constant (op, step, copies))
5618 return true;
5620 /* And finally a vspltisb. */
5621 if (step == 1)
5622 copies <<= 1;
5623 else
5624 step >>= 1;
5626 if (vspltis_constant (op, step, copies))
5627 return true;
5629 if (vspltis_shifted (op) != 0)
5630 return true;
5632 return false;
5635 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5636 result is OP. Abort if it is not possible. */
5639 gen_easy_altivec_constant (rtx op)
5641 machine_mode mode = GET_MODE (op);
5642 int nunits = GET_MODE_NUNITS (mode);
5643 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5644 unsigned step = nunits / 4;
5645 unsigned copies = 1;
5647 /* Start with a vspltisw. */
5648 if (vspltis_constant (op, step, copies))
5649 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5651 /* Then try with a vspltish. */
5652 if (step == 1)
5653 copies <<= 1;
5654 else
5655 step >>= 1;
5657 if (vspltis_constant (op, step, copies))
5658 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5660 /* And finally a vspltisb. */
5661 if (step == 1)
5662 copies <<= 1;
5663 else
5664 step >>= 1;
5666 if (vspltis_constant (op, step, copies))
5667 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5669 gcc_unreachable ();
5672 const char *
5673 output_vec_const_move (rtx *operands)
5675 int cst, cst2, shift;
5676 machine_mode mode;
5677 rtx dest, vec;
5679 dest = operands[0];
5680 vec = operands[1];
5681 mode = GET_MODE (dest);
5683 if (TARGET_VSX)
5685 if (zero_constant (vec, mode))
5686 return "xxlxor %x0,%x0,%x0";
5688 if (TARGET_P8_VECTOR && vec == CONSTM1_RTX (mode))
5689 return "xxlorc %x0,%x0,%x0";
5691 if ((mode == V2DImode || mode == V1TImode)
5692 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5693 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5694 return (TARGET_P8_VECTOR) ? "xxlorc %x0,%x0,%x0" : "vspltisw %0,-1";
5697 if (TARGET_ALTIVEC)
5699 rtx splat_vec;
5700 if (zero_constant (vec, mode))
5701 return "vxor %0,%0,%0";
5703 /* Do we need to construct a value using VSLDOI? */
5704 shift = vspltis_shifted (vec);
5705 if (shift != 0)
5706 return "#";
5708 splat_vec = gen_easy_altivec_constant (vec);
5709 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5710 operands[1] = XEXP (splat_vec, 0);
5711 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5712 return "#";
5714 switch (GET_MODE (splat_vec))
5716 case V4SImode:
5717 return "vspltisw %0,%1";
5719 case V8HImode:
5720 return "vspltish %0,%1";
5722 case V16QImode:
5723 return "vspltisb %0,%1";
5725 default:
5726 gcc_unreachable ();
5730 gcc_assert (TARGET_SPE);
5732 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5733 pattern of V1DI, V4HI, and V2SF.
5735 FIXME: We should probably return # and add post reload
5736 splitters for these, but this way is so easy ;-). */
5737 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5738 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5739 operands[1] = CONST_VECTOR_ELT (vec, 0);
5740 operands[2] = CONST_VECTOR_ELT (vec, 1);
5741 if (cst == cst2)
5742 return "li %0,%1\n\tevmergelo %0,%0,%0";
5743 else if (WORDS_BIG_ENDIAN)
5744 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5745 else
5746 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5749 /* Initialize TARGET of vector PAIRED to VALS. */
5751 void
5752 paired_expand_vector_init (rtx target, rtx vals)
5754 machine_mode mode = GET_MODE (target);
5755 int n_elts = GET_MODE_NUNITS (mode);
5756 int n_var = 0;
5757 rtx x, new_rtx, tmp, constant_op, op1, op2;
5758 int i;
5760 for (i = 0; i < n_elts; ++i)
5762 x = XVECEXP (vals, 0, i);
5763 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5764 ++n_var;
5766 if (n_var == 0)
5768 /* Load from constant pool. */
5769 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5770 return;
5773 if (n_var == 2)
5775 /* The vector is initialized only with non-constants. */
5776 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5777 XVECEXP (vals, 0, 1));
5779 emit_move_insn (target, new_rtx);
5780 return;
5783 /* One field is non-constant and the other one is a constant. Load the
5784 constant from the constant pool and use ps_merge instruction to
5785 construct the whole vector. */
5786 op1 = XVECEXP (vals, 0, 0);
5787 op2 = XVECEXP (vals, 0, 1);
5789 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5791 tmp = gen_reg_rtx (GET_MODE (constant_op));
5792 emit_move_insn (tmp, constant_op);
5794 if (CONSTANT_P (op1))
5795 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5796 else
5797 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5799 emit_move_insn (target, new_rtx);
5802 void
5803 paired_expand_vector_move (rtx operands[])
5805 rtx op0 = operands[0], op1 = operands[1];
5807 emit_move_insn (op0, op1);
5810 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5811 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5812 operands for the relation operation COND. This is a recursive
5813 function. */
5815 static void
5816 paired_emit_vector_compare (enum rtx_code rcode,
5817 rtx dest, rtx op0, rtx op1,
5818 rtx cc_op0, rtx cc_op1)
5820 rtx tmp = gen_reg_rtx (V2SFmode);
5821 rtx tmp1, max, min;
5823 gcc_assert (TARGET_PAIRED_FLOAT);
5824 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5826 switch (rcode)
5828 case LT:
5829 case LTU:
5830 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5831 return;
5832 case GE:
5833 case GEU:
5834 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5835 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5836 return;
5837 case LE:
5838 case LEU:
5839 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5840 return;
5841 case GT:
5842 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5843 return;
5844 case EQ:
5845 tmp1 = gen_reg_rtx (V2SFmode);
5846 max = gen_reg_rtx (V2SFmode);
5847 min = gen_reg_rtx (V2SFmode);
5848 gen_reg_rtx (V2SFmode);
5850 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5851 emit_insn (gen_selv2sf4
5852 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5853 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5854 emit_insn (gen_selv2sf4
5855 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5856 emit_insn (gen_subv2sf3 (tmp1, min, max));
5857 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5858 return;
5859 case NE:
5860 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5861 return;
5862 case UNLE:
5863 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5864 return;
5865 case UNLT:
5866 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5867 return;
5868 case UNGE:
5869 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5870 return;
5871 case UNGT:
5872 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5873 return;
5874 default:
5875 gcc_unreachable ();
5878 return;
5881 /* Emit vector conditional expression.
5882 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5883 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5886 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5887 rtx cond, rtx cc_op0, rtx cc_op1)
5889 enum rtx_code rcode = GET_CODE (cond);
5891 if (!TARGET_PAIRED_FLOAT)
5892 return 0;
5894 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5896 return 1;
5899 /* Initialize vector TARGET to VALS. */
5901 void
5902 rs6000_expand_vector_init (rtx target, rtx vals)
5904 machine_mode mode = GET_MODE (target);
5905 machine_mode inner_mode = GET_MODE_INNER (mode);
5906 int n_elts = GET_MODE_NUNITS (mode);
5907 int n_var = 0, one_var = -1;
5908 bool all_same = true, all_const_zero = true;
5909 rtx x, mem;
5910 int i;
5912 for (i = 0; i < n_elts; ++i)
5914 x = XVECEXP (vals, 0, i);
5915 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5916 ++n_var, one_var = i;
5917 else if (x != CONST0_RTX (inner_mode))
5918 all_const_zero = false;
5920 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5921 all_same = false;
5924 if (n_var == 0)
5926 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5927 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5928 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5930 /* Zero register. */
5931 emit_insn (gen_rtx_SET (target, gen_rtx_XOR (mode, target, target)));
5932 return;
5934 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5936 /* Splat immediate. */
5937 emit_insn (gen_rtx_SET (target, const_vec));
5938 return;
5940 else
5942 /* Load from constant pool. */
5943 emit_move_insn (target, const_vec);
5944 return;
5948 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5949 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5951 rtx op0 = XVECEXP (vals, 0, 0);
5952 rtx op1 = XVECEXP (vals, 0, 1);
5953 if (all_same)
5955 if (!MEM_P (op0) && !REG_P (op0))
5956 op0 = force_reg (inner_mode, op0);
5957 if (mode == V2DFmode)
5958 emit_insn (gen_vsx_splat_v2df (target, op0));
5959 else
5960 emit_insn (gen_vsx_splat_v2di (target, op0));
5962 else
5964 op0 = force_reg (inner_mode, op0);
5965 op1 = force_reg (inner_mode, op1);
5966 if (mode == V2DFmode)
5967 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5968 else
5969 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5971 return;
5974 /* With single precision floating point on VSX, know that internally single
5975 precision is actually represented as a double, and either make 2 V2DF
5976 vectors, and convert these vectors to single precision, or do one
5977 conversion, and splat the result to the other elements. */
5978 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5980 if (all_same)
5982 rtx freg = gen_reg_rtx (V4SFmode);
5983 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5984 rtx cvt = ((TARGET_XSCVDPSPN)
5985 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5986 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5988 emit_insn (cvt);
5989 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5991 else
5993 rtx dbl_even = gen_reg_rtx (V2DFmode);
5994 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5995 rtx flt_even = gen_reg_rtx (V4SFmode);
5996 rtx flt_odd = gen_reg_rtx (V4SFmode);
5997 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5998 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5999 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
6000 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
6002 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
6003 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
6004 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
6005 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
6006 rs6000_expand_extract_even (target, flt_even, flt_odd);
6008 return;
6011 /* Store value to stack temp. Load vector element. Splat. However, splat
6012 of 64-bit items is not supported on Altivec. */
6013 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
6015 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6016 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
6017 XVECEXP (vals, 0, 0));
6018 x = gen_rtx_UNSPEC (VOIDmode,
6019 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6020 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6021 gen_rtvec (2,
6022 gen_rtx_SET (target, mem),
6023 x)));
6024 x = gen_rtx_VEC_SELECT (inner_mode, target,
6025 gen_rtx_PARALLEL (VOIDmode,
6026 gen_rtvec (1, const0_rtx)));
6027 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
6028 return;
6031 /* One field is non-constant. Load constant then overwrite
6032 varying field. */
6033 if (n_var == 1)
6035 rtx copy = copy_rtx (vals);
6037 /* Load constant part of vector, substitute neighboring value for
6038 varying element. */
6039 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
6040 rs6000_expand_vector_init (target, copy);
6042 /* Insert variable. */
6043 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
6044 return;
6047 /* Construct the vector in memory one field at a time
6048 and load the whole vector. */
6049 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6050 for (i = 0; i < n_elts; i++)
6051 emit_move_insn (adjust_address_nv (mem, inner_mode,
6052 i * GET_MODE_SIZE (inner_mode)),
6053 XVECEXP (vals, 0, i));
6054 emit_move_insn (target, mem);
6057 /* Set field ELT of TARGET to VAL. */
6059 void
6060 rs6000_expand_vector_set (rtx target, rtx val, int elt)
6062 machine_mode mode = GET_MODE (target);
6063 machine_mode inner_mode = GET_MODE_INNER (mode);
6064 rtx reg = gen_reg_rtx (mode);
6065 rtx mask, mem, x;
6066 int width = GET_MODE_SIZE (inner_mode);
6067 int i;
6069 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
6071 rtx (*set_func) (rtx, rtx, rtx, rtx)
6072 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
6073 emit_insn (set_func (target, target, val, GEN_INT (elt)));
6074 return;
6077 /* Simplify setting single element vectors like V1TImode. */
6078 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
6080 emit_move_insn (target, gen_lowpart (mode, val));
6081 return;
6084 /* Load single variable value. */
6085 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
6086 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
6087 x = gen_rtx_UNSPEC (VOIDmode,
6088 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
6089 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6090 gen_rtvec (2,
6091 gen_rtx_SET (reg, mem),
6092 x)));
6094 /* Linear sequence. */
6095 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
6096 for (i = 0; i < 16; ++i)
6097 XVECEXP (mask, 0, i) = GEN_INT (i);
6099 /* Set permute mask to insert element into target. */
6100 for (i = 0; i < width; ++i)
6101 XVECEXP (mask, 0, elt*width + i)
6102 = GEN_INT (i + 0x10);
6103 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
6105 if (BYTES_BIG_ENDIAN)
6106 x = gen_rtx_UNSPEC (mode,
6107 gen_rtvec (3, target, reg,
6108 force_reg (V16QImode, x)),
6109 UNSPEC_VPERM);
6110 else
6112 /* Invert selector. We prefer to generate VNAND on P8 so
6113 that future fusion opportunities can kick in, but must
6114 generate VNOR elsewhere. */
6115 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
6116 rtx iorx = (TARGET_P8_VECTOR
6117 ? gen_rtx_IOR (V16QImode, notx, notx)
6118 : gen_rtx_AND (V16QImode, notx, notx));
6119 rtx tmp = gen_reg_rtx (V16QImode);
6120 emit_insn (gen_rtx_SET (tmp, iorx));
6122 /* Permute with operands reversed and adjusted selector. */
6123 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
6124 UNSPEC_VPERM);
6127 emit_insn (gen_rtx_SET (target, x));
6130 /* Extract field ELT from VEC into TARGET. */
6132 void
6133 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
6135 machine_mode mode = GET_MODE (vec);
6136 machine_mode inner_mode = GET_MODE_INNER (mode);
6137 rtx mem;
6139 if (VECTOR_MEM_VSX_P (mode))
6141 switch (mode)
6143 default:
6144 break;
6145 case V1TImode:
6146 gcc_assert (elt == 0 && inner_mode == TImode);
6147 emit_move_insn (target, gen_lowpart (TImode, vec));
6148 break;
6149 case V2DFmode:
6150 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
6151 return;
6152 case V2DImode:
6153 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
6154 return;
6155 case V4SFmode:
6156 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
6157 return;
6161 /* Allocate mode-sized buffer. */
6162 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
6164 emit_move_insn (mem, vec);
6166 /* Add offset to field within buffer matching vector element. */
6167 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
6169 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
6172 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
6174 bool
6175 invalid_e500_subreg (rtx op, machine_mode mode)
6177 if (TARGET_E500_DOUBLE)
6179 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
6180 subreg:TI and reg:TF. Decimal float modes are like integer
6181 modes (only low part of each register used) for this
6182 purpose. */
6183 if (GET_CODE (op) == SUBREG
6184 && (mode == SImode || mode == DImode || mode == TImode
6185 || mode == DDmode || mode == TDmode || mode == PTImode)
6186 && REG_P (SUBREG_REG (op))
6187 && (GET_MODE (SUBREG_REG (op)) == DFmode
6188 || GET_MODE (SUBREG_REG (op)) == TFmode
6189 || GET_MODE (SUBREG_REG (op)) == IFmode
6190 || GET_MODE (SUBREG_REG (op)) == KFmode))
6191 return true;
6193 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
6194 reg:TI. */
6195 if (GET_CODE (op) == SUBREG
6196 && (mode == DFmode || mode == TFmode || mode == IFmode
6197 || mode == KFmode)
6198 && REG_P (SUBREG_REG (op))
6199 && (GET_MODE (SUBREG_REG (op)) == DImode
6200 || GET_MODE (SUBREG_REG (op)) == TImode
6201 || GET_MODE (SUBREG_REG (op)) == PTImode
6202 || GET_MODE (SUBREG_REG (op)) == DDmode
6203 || GET_MODE (SUBREG_REG (op)) == TDmode))
6204 return true;
6207 if (TARGET_SPE
6208 && GET_CODE (op) == SUBREG
6209 && mode == SImode
6210 && REG_P (SUBREG_REG (op))
6211 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
6212 return true;
6214 return false;
6217 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
6218 selects whether the alignment is abi mandated, optional, or
6219 both abi and optional alignment. */
6221 unsigned int
6222 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6224 if (how != align_opt)
6226 if (TREE_CODE (type) == VECTOR_TYPE)
6228 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6229 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6231 if (align < 64)
6232 align = 64;
6234 else if (align < 128)
6235 align = 128;
6237 else if (TARGET_E500_DOUBLE
6238 && TREE_CODE (type) == REAL_TYPE
6239 && TYPE_MODE (type) == DFmode)
6241 if (align < 64)
6242 align = 64;
6246 if (how != align_abi)
6248 if (TREE_CODE (type) == ARRAY_TYPE
6249 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6251 if (align < BITS_PER_WORD)
6252 align = BITS_PER_WORD;
6256 return align;
6259 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6261 bool
6262 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6264 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6266 if (computed != 128)
6268 static bool warned;
6269 if (!warned && warn_psabi)
6271 warned = true;
6272 inform (input_location,
6273 "the layout of aggregates containing vectors with"
6274 " %d-byte alignment has changed in GCC 5",
6275 computed / BITS_PER_UNIT);
6278 /* In current GCC there is no special case. */
6279 return false;
6282 return false;
6285 /* AIX increases natural record alignment to doubleword if the first
6286 field is an FP double while the FP fields remain word aligned. */
6288 unsigned int
6289 rs6000_special_round_type_align (tree type, unsigned int computed,
6290 unsigned int specified)
6292 unsigned int align = MAX (computed, specified);
6293 tree field = TYPE_FIELDS (type);
6295 /* Skip all non field decls */
6296 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6297 field = DECL_CHAIN (field);
6299 if (field != NULL && field != type)
6301 type = TREE_TYPE (field);
6302 while (TREE_CODE (type) == ARRAY_TYPE)
6303 type = TREE_TYPE (type);
6305 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6306 align = MAX (align, 64);
6309 return align;
6312 /* Darwin increases record alignment to the natural alignment of
6313 the first field. */
6315 unsigned int
6316 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6317 unsigned int specified)
6319 unsigned int align = MAX (computed, specified);
6321 if (TYPE_PACKED (type))
6322 return align;
6324 /* Find the first field, looking down into aggregates. */
6325 do {
6326 tree field = TYPE_FIELDS (type);
6327 /* Skip all non field decls */
6328 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6329 field = DECL_CHAIN (field);
6330 if (! field)
6331 break;
6332 /* A packed field does not contribute any extra alignment. */
6333 if (DECL_PACKED (field))
6334 return align;
6335 type = TREE_TYPE (field);
6336 while (TREE_CODE (type) == ARRAY_TYPE)
6337 type = TREE_TYPE (type);
6338 } while (AGGREGATE_TYPE_P (type));
6340 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6341 align = MAX (align, TYPE_ALIGN (type));
6343 return align;
6346 /* Return 1 for an operand in small memory on V.4/eabi. */
6349 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6350 machine_mode mode ATTRIBUTE_UNUSED)
6352 #if TARGET_ELF
6353 rtx sym_ref;
6355 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6356 return 0;
6358 if (DEFAULT_ABI != ABI_V4)
6359 return 0;
6361 /* Vector and float memory instructions have a limited offset on the
6362 SPE, so using a vector or float variable directly as an operand is
6363 not useful. */
6364 if (TARGET_SPE
6365 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6366 return 0;
6368 if (GET_CODE (op) == SYMBOL_REF)
6369 sym_ref = op;
6371 else if (GET_CODE (op) != CONST
6372 || GET_CODE (XEXP (op, 0)) != PLUS
6373 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6374 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6375 return 0;
6377 else
6379 rtx sum = XEXP (op, 0);
6380 HOST_WIDE_INT summand;
6382 /* We have to be careful here, because it is the referenced address
6383 that must be 32k from _SDA_BASE_, not just the symbol. */
6384 summand = INTVAL (XEXP (sum, 1));
6385 if (summand < 0 || summand > g_switch_value)
6386 return 0;
6388 sym_ref = XEXP (sum, 0);
6391 return SYMBOL_REF_SMALL_P (sym_ref);
6392 #else
6393 return 0;
6394 #endif
6397 /* Return true if either operand is a general purpose register. */
6399 bool
6400 gpr_or_gpr_p (rtx op0, rtx op1)
6402 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6403 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6406 /* Return true if this is a move direct operation between GPR registers and
6407 floating point/VSX registers. */
6409 bool
6410 direct_move_p (rtx op0, rtx op1)
6412 int regno0, regno1;
6414 if (!REG_P (op0) || !REG_P (op1))
6415 return false;
6417 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6418 return false;
6420 regno0 = REGNO (op0);
6421 regno1 = REGNO (op1);
6422 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6423 return false;
6425 if (INT_REGNO_P (regno0))
6426 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6428 else if (INT_REGNO_P (regno1))
6430 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6431 return true;
6433 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6434 return true;
6437 return false;
6440 /* Return true if this is a load or store quad operation. This function does
6441 not handle the atomic quad memory instructions. */
6443 bool
6444 quad_load_store_p (rtx op0, rtx op1)
6446 bool ret;
6448 if (!TARGET_QUAD_MEMORY)
6449 ret = false;
6451 else if (REG_P (op0) && MEM_P (op1))
6452 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6453 && quad_memory_operand (op1, GET_MODE (op1))
6454 && !reg_overlap_mentioned_p (op0, op1));
6456 else if (MEM_P (op0) && REG_P (op1))
6457 ret = (quad_memory_operand (op0, GET_MODE (op0))
6458 && quad_int_reg_operand (op1, GET_MODE (op1)));
6460 else
6461 ret = false;
6463 if (TARGET_DEBUG_ADDR)
6465 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6466 ret ? "true" : "false");
6467 debug_rtx (gen_rtx_SET (op0, op1));
6470 return ret;
6473 /* Given an address, return a constant offset term if one exists. */
6475 static rtx
6476 address_offset (rtx op)
6478 if (GET_CODE (op) == PRE_INC
6479 || GET_CODE (op) == PRE_DEC)
6480 op = XEXP (op, 0);
6481 else if (GET_CODE (op) == PRE_MODIFY
6482 || GET_CODE (op) == LO_SUM)
6483 op = XEXP (op, 1);
6485 if (GET_CODE (op) == CONST)
6486 op = XEXP (op, 0);
6488 if (GET_CODE (op) == PLUS)
6489 op = XEXP (op, 1);
6491 if (CONST_INT_P (op))
6492 return op;
6494 return NULL_RTX;
6497 /* Return true if the MEM operand is a memory operand suitable for use
6498 with a (full width, possibly multiple) gpr load/store. On
6499 powerpc64 this means the offset must be divisible by 4.
6500 Implements 'Y' constraint.
6502 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6503 a constraint function we know the operand has satisfied a suitable
6504 memory predicate. Also accept some odd rtl generated by reload
6505 (see rs6000_legitimize_reload_address for various forms). It is
6506 important that reload rtl be accepted by appropriate constraints
6507 but not by the operand predicate.
6509 Offsetting a lo_sum should not be allowed, except where we know by
6510 alignment that a 32k boundary is not crossed, but see the ???
6511 comment in rs6000_legitimize_reload_address. Note that by
6512 "offsetting" here we mean a further offset to access parts of the
6513 MEM. It's fine to have a lo_sum where the inner address is offset
6514 from a sym, since the same sym+offset will appear in the high part
6515 of the address calculation. */
6517 bool
6518 mem_operand_gpr (rtx op, machine_mode mode)
6520 unsigned HOST_WIDE_INT offset;
6521 int extra;
6522 rtx addr = XEXP (op, 0);
6524 op = address_offset (addr);
6525 if (op == NULL_RTX)
6526 return true;
6528 offset = INTVAL (op);
6529 if (TARGET_POWERPC64 && (offset & 3) != 0)
6530 return false;
6532 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6533 if (extra < 0)
6534 extra = 0;
6536 if (GET_CODE (addr) == LO_SUM)
6537 /* For lo_sum addresses, we must allow any offset except one that
6538 causes a wrap, so test only the low 16 bits. */
6539 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6541 return offset + 0x8000 < 0x10000u - extra;
6544 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6546 static bool
6547 reg_offset_addressing_ok_p (machine_mode mode)
6549 switch (mode)
6551 case V16QImode:
6552 case V8HImode:
6553 case V4SFmode:
6554 case V4SImode:
6555 case V2DFmode:
6556 case V2DImode:
6557 case V1TImode:
6558 case TImode:
6559 case TFmode:
6560 case KFmode:
6561 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6562 TImode is not a vector mode, if we want to use the VSX registers to
6563 move it around, we need to restrict ourselves to reg+reg addressing.
6564 Similarly for IEEE 128-bit floating point that is passed in a single
6565 vector register. */
6566 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6567 return false;
6568 break;
6570 case V4HImode:
6571 case V2SImode:
6572 case V1DImode:
6573 case V2SFmode:
6574 /* Paired vector modes. Only reg+reg addressing is valid. */
6575 if (TARGET_PAIRED_FLOAT)
6576 return false;
6577 break;
6579 case SDmode:
6580 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6581 addressing for the LFIWZX and STFIWX instructions. */
6582 if (TARGET_NO_SDMODE_STACK)
6583 return false;
6584 break;
6586 default:
6587 break;
6590 return true;
6593 static bool
6594 virtual_stack_registers_memory_p (rtx op)
6596 int regnum;
6598 if (GET_CODE (op) == REG)
6599 regnum = REGNO (op);
6601 else if (GET_CODE (op) == PLUS
6602 && GET_CODE (XEXP (op, 0)) == REG
6603 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6604 regnum = REGNO (XEXP (op, 0));
6606 else
6607 return false;
6609 return (regnum >= FIRST_VIRTUAL_REGISTER
6610 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6613 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6614 is known to not straddle a 32k boundary. This function is used
6615 to determine whether -mcmodel=medium code can use TOC pointer
6616 relative addressing for OP. This means the alignment of the TOC
6617 pointer must also be taken into account, and unfortunately that is
6618 only 8 bytes. */
6620 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
6621 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
6622 #endif
6624 static bool
6625 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6626 machine_mode mode)
6628 tree decl;
6629 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6631 if (GET_CODE (op) != SYMBOL_REF)
6632 return false;
6634 dsize = GET_MODE_SIZE (mode);
6635 decl = SYMBOL_REF_DECL (op);
6636 if (!decl)
6638 if (dsize == 0)
6639 return false;
6641 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6642 replacing memory addresses with an anchor plus offset. We
6643 could find the decl by rummaging around in the block->objects
6644 VEC for the given offset but that seems like too much work. */
6645 dalign = BITS_PER_UNIT;
6646 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6647 && SYMBOL_REF_ANCHOR_P (op)
6648 && SYMBOL_REF_BLOCK (op) != NULL)
6650 struct object_block *block = SYMBOL_REF_BLOCK (op);
6652 dalign = block->alignment;
6653 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6655 else if (CONSTANT_POOL_ADDRESS_P (op))
6657 /* It would be nice to have get_pool_align().. */
6658 machine_mode cmode = get_pool_mode (op);
6660 dalign = GET_MODE_ALIGNMENT (cmode);
6663 else if (DECL_P (decl))
6665 dalign = DECL_ALIGN (decl);
6667 if (dsize == 0)
6669 /* Allow BLKmode when the entire object is known to not
6670 cross a 32k boundary. */
6671 if (!DECL_SIZE_UNIT (decl))
6672 return false;
6674 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6675 return false;
6677 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6678 if (dsize > 32768)
6679 return false;
6681 dalign /= BITS_PER_UNIT;
6682 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
6683 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
6684 return dalign >= dsize;
6687 else
6688 gcc_unreachable ();
6690 /* Find how many bits of the alignment we know for this access. */
6691 dalign /= BITS_PER_UNIT;
6692 if (dalign > POWERPC64_TOC_POINTER_ALIGNMENT)
6693 dalign = POWERPC64_TOC_POINTER_ALIGNMENT;
6694 mask = dalign - 1;
6695 lsb = offset & -offset;
6696 mask &= lsb - 1;
6697 dalign = mask + 1;
6699 return dalign >= dsize;
6702 static bool
6703 constant_pool_expr_p (rtx op)
6705 rtx base, offset;
6707 split_const (op, &base, &offset);
6708 return (GET_CODE (base) == SYMBOL_REF
6709 && CONSTANT_POOL_ADDRESS_P (base)
6710 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6713 static const_rtx tocrel_base, tocrel_offset;
6715 /* Return true if OP is a toc pointer relative address (the output
6716 of create_TOC_reference). If STRICT, do not match high part or
6717 non-split -mcmodel=large/medium toc pointer relative addresses. */
6719 bool
6720 toc_relative_expr_p (const_rtx op, bool strict)
6722 if (!TARGET_TOC)
6723 return false;
6725 if (TARGET_CMODEL != CMODEL_SMALL)
6727 /* Only match the low part. */
6728 if (GET_CODE (op) == LO_SUM
6729 && REG_P (XEXP (op, 0))
6730 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6731 op = XEXP (op, 1);
6732 else if (strict)
6733 return false;
6736 tocrel_base = op;
6737 tocrel_offset = const0_rtx;
6738 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6740 tocrel_base = XEXP (op, 0);
6741 tocrel_offset = XEXP (op, 1);
6744 return (GET_CODE (tocrel_base) == UNSPEC
6745 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6748 /* Return true if X is a constant pool address, and also for cmodel=medium
6749 if X is a toc-relative address known to be offsettable within MODE. */
6751 bool
6752 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6753 bool strict)
6755 return (toc_relative_expr_p (x, strict)
6756 && (TARGET_CMODEL != CMODEL_MEDIUM
6757 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6758 || mode == QImode
6759 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6760 INTVAL (tocrel_offset), mode)));
6763 static bool
6764 legitimate_small_data_p (machine_mode mode, rtx x)
6766 return (DEFAULT_ABI == ABI_V4
6767 && !flag_pic && !TARGET_TOC
6768 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6769 && small_data_operand (x, mode));
6772 /* SPE offset addressing is limited to 5-bits worth of double words. */
6773 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6775 bool
6776 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6777 bool strict, bool worst_case)
6779 unsigned HOST_WIDE_INT offset;
6780 unsigned int extra;
6782 if (GET_CODE (x) != PLUS)
6783 return false;
6784 if (!REG_P (XEXP (x, 0)))
6785 return false;
6786 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6787 return false;
6788 if (!reg_offset_addressing_ok_p (mode))
6789 return virtual_stack_registers_memory_p (x);
6790 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6791 return true;
6792 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6793 return false;
6795 offset = INTVAL (XEXP (x, 1));
6796 extra = 0;
6797 switch (mode)
6799 case V4HImode:
6800 case V2SImode:
6801 case V1DImode:
6802 case V2SFmode:
6803 /* SPE vector modes. */
6804 return SPE_CONST_OFFSET_OK (offset);
6806 case DFmode:
6807 case DDmode:
6808 case DImode:
6809 /* On e500v2, we may have:
6811 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6813 Which gets addressed with evldd instructions. */
6814 if (TARGET_E500_DOUBLE)
6815 return SPE_CONST_OFFSET_OK (offset);
6817 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6818 addressing. */
6819 if (VECTOR_MEM_VSX_P (mode))
6820 return false;
6822 if (!worst_case)
6823 break;
6824 if (!TARGET_POWERPC64)
6825 extra = 4;
6826 else if (offset & 3)
6827 return false;
6828 break;
6830 case TFmode:
6831 case IFmode:
6832 case KFmode:
6833 if (TARGET_E500_DOUBLE)
6834 return (SPE_CONST_OFFSET_OK (offset)
6835 && SPE_CONST_OFFSET_OK (offset + 8));
6836 /* fall through */
6838 case TDmode:
6839 case TImode:
6840 case PTImode:
6841 extra = 8;
6842 if (!worst_case)
6843 break;
6844 if (!TARGET_POWERPC64)
6845 extra = 12;
6846 else if (offset & 3)
6847 return false;
6848 break;
6850 default:
6851 break;
6854 offset += 0x8000;
6855 return offset < 0x10000 - extra;
6858 bool
6859 legitimate_indexed_address_p (rtx x, int strict)
6861 rtx op0, op1;
6863 if (GET_CODE (x) != PLUS)
6864 return false;
6866 op0 = XEXP (x, 0);
6867 op1 = XEXP (x, 1);
6869 /* Recognize the rtl generated by reload which we know will later be
6870 replaced with proper base and index regs. */
6871 if (!strict
6872 && reload_in_progress
6873 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6874 && REG_P (op1))
6875 return true;
6877 return (REG_P (op0) && REG_P (op1)
6878 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6879 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6880 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6881 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6884 bool
6885 avoiding_indexed_address_p (machine_mode mode)
6887 /* Avoid indexed addressing for modes that have non-indexed
6888 load/store instruction forms. */
6889 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6892 bool
6893 legitimate_indirect_address_p (rtx x, int strict)
6895 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6898 bool
6899 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6901 if (!TARGET_MACHO || !flag_pic
6902 || mode != SImode || GET_CODE (x) != MEM)
6903 return false;
6904 x = XEXP (x, 0);
6906 if (GET_CODE (x) != LO_SUM)
6907 return false;
6908 if (GET_CODE (XEXP (x, 0)) != REG)
6909 return false;
6910 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6911 return false;
6912 x = XEXP (x, 1);
6914 return CONSTANT_P (x);
6917 static bool
6918 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6920 if (GET_CODE (x) != LO_SUM)
6921 return false;
6922 if (GET_CODE (XEXP (x, 0)) != REG)
6923 return false;
6924 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6925 return false;
6926 /* Restrict addressing for DI because of our SUBREG hackery. */
6927 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6928 return false;
6929 x = XEXP (x, 1);
6931 if (TARGET_ELF || TARGET_MACHO)
6933 bool large_toc_ok;
6935 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6936 return false;
6937 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6938 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6939 recognizes some LO_SUM addresses as valid although this
6940 function says opposite. In most cases, LRA through different
6941 transformations can generate correct code for address reloads.
6942 It can not manage only some LO_SUM cases. So we need to add
6943 code analogous to one in rs6000_legitimize_reload_address for
6944 LOW_SUM here saying that some addresses are still valid. */
6945 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6946 && small_toc_ref (x, VOIDmode));
6947 if (TARGET_TOC && ! large_toc_ok)
6948 return false;
6949 if (GET_MODE_NUNITS (mode) != 1)
6950 return false;
6951 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6952 && !(/* ??? Assume floating point reg based on mode? */
6953 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6954 && (mode == DFmode || mode == DDmode)))
6955 return false;
6957 return CONSTANT_P (x) || large_toc_ok;
6960 return false;
6964 /* Try machine-dependent ways of modifying an illegitimate address
6965 to be legitimate. If we find one, return the new, valid address.
6966 This is used from only one place: `memory_address' in explow.c.
6968 OLDX is the address as it was before break_out_memory_refs was
6969 called. In some cases it is useful to look at this to decide what
6970 needs to be done.
6972 It is always safe for this function to do nothing. It exists to
6973 recognize opportunities to optimize the output.
6975 On RS/6000, first check for the sum of a register with a constant
6976 integer that is out of range. If so, generate code to add the
6977 constant with the low-order 16 bits masked to the register and force
6978 this result into another register (this can be done with `cau').
6979 Then generate an address of REG+(CONST&0xffff), allowing for the
6980 possibility of bit 16 being a one.
6982 Then check for the sum of a register and something not constant, try to
6983 load the other things into a register and return the sum. */
6985 static rtx
6986 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6987 machine_mode mode)
6989 unsigned int extra;
6991 if (!reg_offset_addressing_ok_p (mode))
6993 if (virtual_stack_registers_memory_p (x))
6994 return x;
6996 /* In theory we should not be seeing addresses of the form reg+0,
6997 but just in case it is generated, optimize it away. */
6998 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6999 return force_reg (Pmode, XEXP (x, 0));
7001 /* For TImode with load/store quad, restrict addresses to just a single
7002 pointer, so it works with both GPRs and VSX registers. */
7003 /* Make sure both operands are registers. */
7004 else if (GET_CODE (x) == PLUS
7005 && (mode != TImode || !TARGET_QUAD_MEMORY))
7006 return gen_rtx_PLUS (Pmode,
7007 force_reg (Pmode, XEXP (x, 0)),
7008 force_reg (Pmode, XEXP (x, 1)));
7009 else
7010 return force_reg (Pmode, x);
7012 if (GET_CODE (x) == SYMBOL_REF)
7014 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
7015 if (model != 0)
7016 return rs6000_legitimize_tls_address (x, model);
7019 extra = 0;
7020 switch (mode)
7022 case TFmode:
7023 case TDmode:
7024 case TImode:
7025 case PTImode:
7026 case IFmode:
7027 case KFmode:
7028 /* As in legitimate_offset_address_p we do not assume
7029 worst-case. The mode here is just a hint as to the registers
7030 used. A TImode is usually in gprs, but may actually be in
7031 fprs. Leave worst-case scenario for reload to handle via
7032 insn constraints. PTImode is only GPRs. */
7033 extra = 8;
7034 break;
7035 default:
7036 break;
7039 if (GET_CODE (x) == PLUS
7040 && GET_CODE (XEXP (x, 0)) == REG
7041 && GET_CODE (XEXP (x, 1)) == CONST_INT
7042 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
7043 >= 0x10000 - extra)
7044 && !(SPE_VECTOR_MODE (mode)
7045 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
7047 HOST_WIDE_INT high_int, low_int;
7048 rtx sum;
7049 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
7050 if (low_int >= 0x8000 - extra)
7051 low_int = 0;
7052 high_int = INTVAL (XEXP (x, 1)) - low_int;
7053 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
7054 GEN_INT (high_int)), 0);
7055 return plus_constant (Pmode, sum, low_int);
7057 else if (GET_CODE (x) == PLUS
7058 && GET_CODE (XEXP (x, 0)) == REG
7059 && GET_CODE (XEXP (x, 1)) != CONST_INT
7060 && GET_MODE_NUNITS (mode) == 1
7061 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7062 || (/* ??? Assume floating point reg based on mode? */
7063 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7064 && (mode == DFmode || mode == DDmode)))
7065 && !avoiding_indexed_address_p (mode))
7067 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
7068 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
7070 else if (SPE_VECTOR_MODE (mode)
7071 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
7073 if (mode == DImode)
7074 return x;
7075 /* We accept [reg + reg] and [reg + OFFSET]. */
7077 if (GET_CODE (x) == PLUS)
7079 rtx op1 = XEXP (x, 0);
7080 rtx op2 = XEXP (x, 1);
7081 rtx y;
7083 op1 = force_reg (Pmode, op1);
7085 if (GET_CODE (op2) != REG
7086 && (GET_CODE (op2) != CONST_INT
7087 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
7088 || (GET_MODE_SIZE (mode) > 8
7089 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
7090 op2 = force_reg (Pmode, op2);
7092 /* We can't always do [reg + reg] for these, because [reg +
7093 reg + offset] is not a legitimate addressing mode. */
7094 y = gen_rtx_PLUS (Pmode, op1, op2);
7096 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
7097 return force_reg (Pmode, y);
7098 else
7099 return y;
7102 return force_reg (Pmode, x);
7104 else if ((TARGET_ELF
7105 #if TARGET_MACHO
7106 || !MACHO_DYNAMIC_NO_PIC_P
7107 #endif
7109 && TARGET_32BIT
7110 && TARGET_NO_TOC
7111 && ! flag_pic
7112 && GET_CODE (x) != CONST_INT
7113 && GET_CODE (x) != CONST_WIDE_INT
7114 && GET_CODE (x) != CONST_DOUBLE
7115 && CONSTANT_P (x)
7116 && GET_MODE_NUNITS (mode) == 1
7117 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
7118 || (/* ??? Assume floating point reg based on mode? */
7119 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7120 && (mode == DFmode || mode == DDmode))))
7122 rtx reg = gen_reg_rtx (Pmode);
7123 if (TARGET_ELF)
7124 emit_insn (gen_elf_high (reg, x));
7125 else
7126 emit_insn (gen_macho_high (reg, x));
7127 return gen_rtx_LO_SUM (Pmode, reg, x);
7129 else if (TARGET_TOC
7130 && GET_CODE (x) == SYMBOL_REF
7131 && constant_pool_expr_p (x)
7132 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
7133 return create_TOC_reference (x, NULL_RTX);
7134 else
7135 return x;
7138 /* Debug version of rs6000_legitimize_address. */
7139 static rtx
7140 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
7142 rtx ret;
7143 rtx_insn *insns;
7145 start_sequence ();
7146 ret = rs6000_legitimize_address (x, oldx, mode);
7147 insns = get_insns ();
7148 end_sequence ();
7150 if (ret != x)
7152 fprintf (stderr,
7153 "\nrs6000_legitimize_address: mode %s, old code %s, "
7154 "new code %s, modified\n",
7155 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
7156 GET_RTX_NAME (GET_CODE (ret)));
7158 fprintf (stderr, "Original address:\n");
7159 debug_rtx (x);
7161 fprintf (stderr, "oldx:\n");
7162 debug_rtx (oldx);
7164 fprintf (stderr, "New address:\n");
7165 debug_rtx (ret);
7167 if (insns)
7169 fprintf (stderr, "Insns added:\n");
7170 debug_rtx_list (insns, 20);
7173 else
7175 fprintf (stderr,
7176 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
7177 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
7179 debug_rtx (x);
7182 if (insns)
7183 emit_insn (insns);
7185 return ret;
7188 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7189 We need to emit DTP-relative relocations. */
7191 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7192 static void
7193 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
7195 switch (size)
7197 case 4:
7198 fputs ("\t.long\t", file);
7199 break;
7200 case 8:
7201 fputs (DOUBLE_INT_ASM_OP, file);
7202 break;
7203 default:
7204 gcc_unreachable ();
7206 output_addr_const (file, x);
7207 fputs ("@dtprel+0x8000", file);
7210 /* Return true if X is a symbol that refers to real (rather than emulated)
7211 TLS. */
7213 static bool
7214 rs6000_real_tls_symbol_ref_p (rtx x)
7216 return (GET_CODE (x) == SYMBOL_REF
7217 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7220 /* In the name of slightly smaller debug output, and to cater to
7221 general assembler lossage, recognize various UNSPEC sequences
7222 and turn them back into a direct symbol reference. */
7224 static rtx
7225 rs6000_delegitimize_address (rtx orig_x)
7227 rtx x, y, offset;
7229 orig_x = delegitimize_mem_from_attrs (orig_x);
7230 x = orig_x;
7231 if (MEM_P (x))
7232 x = XEXP (x, 0);
7234 y = x;
7235 if (TARGET_CMODEL != CMODEL_SMALL
7236 && GET_CODE (y) == LO_SUM)
7237 y = XEXP (y, 1);
7239 offset = NULL_RTX;
7240 if (GET_CODE (y) == PLUS
7241 && GET_MODE (y) == Pmode
7242 && CONST_INT_P (XEXP (y, 1)))
7244 offset = XEXP (y, 1);
7245 y = XEXP (y, 0);
7248 if (GET_CODE (y) == UNSPEC
7249 && XINT (y, 1) == UNSPEC_TOCREL)
7251 y = XVECEXP (y, 0, 0);
7253 #ifdef HAVE_AS_TLS
7254 /* Do not associate thread-local symbols with the original
7255 constant pool symbol. */
7256 if (TARGET_XCOFF
7257 && GET_CODE (y) == SYMBOL_REF
7258 && CONSTANT_POOL_ADDRESS_P (y)
7259 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7260 return orig_x;
7261 #endif
7263 if (offset != NULL_RTX)
7264 y = gen_rtx_PLUS (Pmode, y, offset);
7265 if (!MEM_P (orig_x))
7266 return y;
7267 else
7268 return replace_equiv_address_nv (orig_x, y);
7271 if (TARGET_MACHO
7272 && GET_CODE (orig_x) == LO_SUM
7273 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7275 y = XEXP (XEXP (orig_x, 1), 0);
7276 if (GET_CODE (y) == UNSPEC
7277 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7278 return XVECEXP (y, 0, 0);
7281 return orig_x;
7284 /* Return true if X shouldn't be emitted into the debug info.
7285 The linker doesn't like .toc section references from
7286 .debug_* sections, so reject .toc section symbols. */
7288 static bool
7289 rs6000_const_not_ok_for_debug_p (rtx x)
7291 if (GET_CODE (x) == SYMBOL_REF
7292 && CONSTANT_POOL_ADDRESS_P (x))
7294 rtx c = get_pool_constant (x);
7295 machine_mode cmode = get_pool_mode (x);
7296 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7297 return true;
7300 return false;
7303 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7305 static GTY(()) rtx rs6000_tls_symbol;
7306 static rtx
7307 rs6000_tls_get_addr (void)
7309 if (!rs6000_tls_symbol)
7310 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7312 return rs6000_tls_symbol;
7315 /* Construct the SYMBOL_REF for TLS GOT references. */
7317 static GTY(()) rtx rs6000_got_symbol;
7318 static rtx
7319 rs6000_got_sym (void)
7321 if (!rs6000_got_symbol)
7323 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7324 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7325 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7328 return rs6000_got_symbol;
7331 /* AIX Thread-Local Address support. */
7333 static rtx
7334 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7336 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7337 const char *name;
7338 char *tlsname;
7340 name = XSTR (addr, 0);
7341 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7342 or the symbol will be in TLS private data section. */
7343 if (name[strlen (name) - 1] != ']'
7344 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7345 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7347 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7348 strcpy (tlsname, name);
7349 strcat (tlsname,
7350 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7351 tlsaddr = copy_rtx (addr);
7352 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7354 else
7355 tlsaddr = addr;
7357 /* Place addr into TOC constant pool. */
7358 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7360 /* Output the TOC entry and create the MEM referencing the value. */
7361 if (constant_pool_expr_p (XEXP (sym, 0))
7362 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7364 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7365 mem = gen_const_mem (Pmode, tocref);
7366 set_mem_alias_set (mem, get_TOC_alias_set ());
7368 else
7369 return sym;
7371 /* Use global-dynamic for local-dynamic. */
7372 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7373 || model == TLS_MODEL_LOCAL_DYNAMIC)
7375 /* Create new TOC reference for @m symbol. */
7376 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7377 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7378 strcpy (tlsname, "*LCM");
7379 strcat (tlsname, name + 3);
7380 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7381 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7382 tocref = create_TOC_reference (modaddr, NULL_RTX);
7383 rtx modmem = gen_const_mem (Pmode, tocref);
7384 set_mem_alias_set (modmem, get_TOC_alias_set ());
7386 rtx modreg = gen_reg_rtx (Pmode);
7387 emit_insn (gen_rtx_SET (modreg, modmem));
7389 tmpreg = gen_reg_rtx (Pmode);
7390 emit_insn (gen_rtx_SET (tmpreg, mem));
7392 dest = gen_reg_rtx (Pmode);
7393 if (TARGET_32BIT)
7394 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7395 else
7396 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7397 return dest;
7399 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7400 else if (TARGET_32BIT)
7402 tlsreg = gen_reg_rtx (SImode);
7403 emit_insn (gen_tls_get_tpointer (tlsreg));
7405 else
7406 tlsreg = gen_rtx_REG (DImode, 13);
7408 /* Load the TOC value into temporary register. */
7409 tmpreg = gen_reg_rtx (Pmode);
7410 emit_insn (gen_rtx_SET (tmpreg, mem));
7411 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7412 gen_rtx_MINUS (Pmode, addr, tlsreg));
7414 /* Add TOC symbol value to TLS pointer. */
7415 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7417 return dest;
7420 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7421 this (thread-local) address. */
7423 static rtx
7424 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7426 rtx dest, insn;
7428 if (TARGET_XCOFF)
7429 return rs6000_legitimize_tls_address_aix (addr, model);
7431 dest = gen_reg_rtx (Pmode);
7432 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7434 rtx tlsreg;
7436 if (TARGET_64BIT)
7438 tlsreg = gen_rtx_REG (Pmode, 13);
7439 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7441 else
7443 tlsreg = gen_rtx_REG (Pmode, 2);
7444 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7446 emit_insn (insn);
7448 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7450 rtx tlsreg, tmp;
7452 tmp = gen_reg_rtx (Pmode);
7453 if (TARGET_64BIT)
7455 tlsreg = gen_rtx_REG (Pmode, 13);
7456 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7458 else
7460 tlsreg = gen_rtx_REG (Pmode, 2);
7461 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7463 emit_insn (insn);
7464 if (TARGET_64BIT)
7465 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7466 else
7467 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7468 emit_insn (insn);
7470 else
7472 rtx r3, got, tga, tmp1, tmp2, call_insn;
7474 /* We currently use relocations like @got@tlsgd for tls, which
7475 means the linker will handle allocation of tls entries, placing
7476 them in the .got section. So use a pointer to the .got section,
7477 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7478 or to secondary GOT sections used by 32-bit -fPIC. */
7479 if (TARGET_64BIT)
7480 got = gen_rtx_REG (Pmode, 2);
7481 else
7483 if (flag_pic == 1)
7484 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7485 else
7487 rtx gsym = rs6000_got_sym ();
7488 got = gen_reg_rtx (Pmode);
7489 if (flag_pic == 0)
7490 rs6000_emit_move (got, gsym, Pmode);
7491 else
7493 rtx mem, lab, last;
7495 tmp1 = gen_reg_rtx (Pmode);
7496 tmp2 = gen_reg_rtx (Pmode);
7497 mem = gen_const_mem (Pmode, tmp1);
7498 lab = gen_label_rtx ();
7499 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7500 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7501 if (TARGET_LINK_STACK)
7502 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7503 emit_move_insn (tmp2, mem);
7504 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7505 set_unique_reg_note (last, REG_EQUAL, gsym);
7510 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7512 tga = rs6000_tls_get_addr ();
7513 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7514 1, const0_rtx, Pmode);
7516 r3 = gen_rtx_REG (Pmode, 3);
7517 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7519 if (TARGET_64BIT)
7520 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7521 else
7522 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7524 else if (DEFAULT_ABI == ABI_V4)
7525 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7526 else
7527 gcc_unreachable ();
7528 call_insn = last_call_insn ();
7529 PATTERN (call_insn) = insn;
7530 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7531 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7532 pic_offset_table_rtx);
7534 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7536 tga = rs6000_tls_get_addr ();
7537 tmp1 = gen_reg_rtx (Pmode);
7538 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7539 1, const0_rtx, Pmode);
7541 r3 = gen_rtx_REG (Pmode, 3);
7542 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7544 if (TARGET_64BIT)
7545 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7546 else
7547 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7549 else if (DEFAULT_ABI == ABI_V4)
7550 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7551 else
7552 gcc_unreachable ();
7553 call_insn = last_call_insn ();
7554 PATTERN (call_insn) = insn;
7555 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7556 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7557 pic_offset_table_rtx);
7559 if (rs6000_tls_size == 16)
7561 if (TARGET_64BIT)
7562 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7563 else
7564 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7566 else if (rs6000_tls_size == 32)
7568 tmp2 = gen_reg_rtx (Pmode);
7569 if (TARGET_64BIT)
7570 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7571 else
7572 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7573 emit_insn (insn);
7574 if (TARGET_64BIT)
7575 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7576 else
7577 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7579 else
7581 tmp2 = gen_reg_rtx (Pmode);
7582 if (TARGET_64BIT)
7583 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7584 else
7585 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7586 emit_insn (insn);
7587 insn = gen_rtx_SET (dest, gen_rtx_PLUS (Pmode, tmp2, tmp1));
7589 emit_insn (insn);
7591 else
7593 /* IE, or 64-bit offset LE. */
7594 tmp2 = gen_reg_rtx (Pmode);
7595 if (TARGET_64BIT)
7596 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7597 else
7598 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7599 emit_insn (insn);
7600 if (TARGET_64BIT)
7601 insn = gen_tls_tls_64 (dest, tmp2, addr);
7602 else
7603 insn = gen_tls_tls_32 (dest, tmp2, addr);
7604 emit_insn (insn);
7608 return dest;
7611 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7613 static bool
7614 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7616 if (GET_CODE (x) == HIGH
7617 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7618 return true;
7620 /* A TLS symbol in the TOC cannot contain a sum. */
7621 if (GET_CODE (x) == CONST
7622 && GET_CODE (XEXP (x, 0)) == PLUS
7623 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7624 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7625 return true;
7627 /* Do not place an ELF TLS symbol in the constant pool. */
7628 return TARGET_ELF && tls_referenced_p (x);
7631 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7632 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7633 can be addressed relative to the toc pointer. */
7635 static bool
7636 use_toc_relative_ref (rtx sym, machine_mode mode)
7638 return ((constant_pool_expr_p (sym)
7639 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7640 get_pool_mode (sym)))
7641 || (TARGET_CMODEL == CMODEL_MEDIUM
7642 && SYMBOL_REF_LOCAL_P (sym)
7643 && GET_MODE_SIZE (mode) <= POWERPC64_TOC_POINTER_ALIGNMENT));
7646 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7647 replace the input X, or the original X if no replacement is called for.
7648 The output parameter *WIN is 1 if the calling macro should goto WIN,
7649 0 if it should not.
7651 For RS/6000, we wish to handle large displacements off a base
7652 register by splitting the addend across an addiu/addis and the mem insn.
7653 This cuts number of extra insns needed from 3 to 1.
7655 On Darwin, we use this to generate code for floating point constants.
7656 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7657 The Darwin code is inside #if TARGET_MACHO because only then are the
7658 machopic_* functions defined. */
7659 static rtx
7660 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7661 int opnum, int type,
7662 int ind_levels ATTRIBUTE_UNUSED, int *win)
7664 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7666 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7667 DFmode/DImode MEM. */
7668 if (reg_offset_p
7669 && opnum == 1
7670 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7671 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7672 reg_offset_p = false;
7674 /* We must recognize output that we have already generated ourselves. */
7675 if (GET_CODE (x) == PLUS
7676 && GET_CODE (XEXP (x, 0)) == PLUS
7677 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7678 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7679 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7681 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7682 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7683 opnum, (enum reload_type) type);
7684 *win = 1;
7685 return x;
7688 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7689 if (GET_CODE (x) == LO_SUM
7690 && GET_CODE (XEXP (x, 0)) == HIGH)
7692 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7693 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7694 opnum, (enum reload_type) type);
7695 *win = 1;
7696 return x;
7699 #if TARGET_MACHO
7700 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7701 && GET_CODE (x) == LO_SUM
7702 && GET_CODE (XEXP (x, 0)) == PLUS
7703 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7704 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7705 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7706 && machopic_operand_p (XEXP (x, 1)))
7708 /* Result of previous invocation of this function on Darwin
7709 floating point constant. */
7710 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7711 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7712 opnum, (enum reload_type) type);
7713 *win = 1;
7714 return x;
7716 #endif
7718 if (TARGET_CMODEL != CMODEL_SMALL
7719 && reg_offset_p
7720 && small_toc_ref (x, VOIDmode))
7722 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7723 x = gen_rtx_LO_SUM (Pmode, hi, x);
7724 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7725 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7726 opnum, (enum reload_type) type);
7727 *win = 1;
7728 return x;
7731 if (GET_CODE (x) == PLUS
7732 && GET_CODE (XEXP (x, 0)) == REG
7733 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7734 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7735 && GET_CODE (XEXP (x, 1)) == CONST_INT
7736 && reg_offset_p
7737 && !SPE_VECTOR_MODE (mode)
7738 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7739 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7741 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7742 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7743 HOST_WIDE_INT high
7744 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7746 /* Check for 32-bit overflow. */
7747 if (high + low != val)
7749 *win = 0;
7750 return x;
7753 /* Reload the high part into a base reg; leave the low part
7754 in the mem directly. */
7756 x = gen_rtx_PLUS (GET_MODE (x),
7757 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7758 GEN_INT (high)),
7759 GEN_INT (low));
7761 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7762 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7763 opnum, (enum reload_type) type);
7764 *win = 1;
7765 return x;
7768 if (GET_CODE (x) == SYMBOL_REF
7769 && reg_offset_p
7770 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7771 && !SPE_VECTOR_MODE (mode)
7772 #if TARGET_MACHO
7773 && DEFAULT_ABI == ABI_DARWIN
7774 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7775 && machopic_symbol_defined_p (x)
7776 #else
7777 && DEFAULT_ABI == ABI_V4
7778 && !flag_pic
7779 #endif
7780 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7781 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7782 without fprs.
7783 ??? Assume floating point reg based on mode? This assumption is
7784 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7785 where reload ends up doing a DFmode load of a constant from
7786 mem using two gprs. Unfortunately, at this point reload
7787 hasn't yet selected regs so poking around in reload data
7788 won't help and even if we could figure out the regs reliably,
7789 we'd still want to allow this transformation when the mem is
7790 naturally aligned. Since we say the address is good here, we
7791 can't disable offsets from LO_SUMs in mem_operand_gpr.
7792 FIXME: Allow offset from lo_sum for other modes too, when
7793 mem is sufficiently aligned.
7795 Also disallow this if the type can go in VMX/Altivec registers, since
7796 those registers do not have d-form (reg+offset) address modes. */
7797 && !reg_addr[mode].scalar_in_vmx_p
7798 && mode != TFmode
7799 && mode != TDmode
7800 && mode != IFmode
7801 && mode != KFmode
7802 && (mode != TImode || !TARGET_VSX_TIMODE)
7803 && mode != PTImode
7804 && (mode != DImode || TARGET_POWERPC64)
7805 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7806 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7808 #if TARGET_MACHO
7809 if (flag_pic)
7811 rtx offset = machopic_gen_offset (x);
7812 x = gen_rtx_LO_SUM (GET_MODE (x),
7813 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7814 gen_rtx_HIGH (Pmode, offset)), offset);
7816 else
7817 #endif
7818 x = gen_rtx_LO_SUM (GET_MODE (x),
7819 gen_rtx_HIGH (Pmode, x), x);
7821 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7822 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7823 opnum, (enum reload_type) type);
7824 *win = 1;
7825 return x;
7828 /* Reload an offset address wrapped by an AND that represents the
7829 masking of the lower bits. Strip the outer AND and let reload
7830 convert the offset address into an indirect address. For VSX,
7831 force reload to create the address with an AND in a separate
7832 register, because we can't guarantee an altivec register will
7833 be used. */
7834 if (VECTOR_MEM_ALTIVEC_P (mode)
7835 && GET_CODE (x) == AND
7836 && GET_CODE (XEXP (x, 0)) == PLUS
7837 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7838 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7839 && GET_CODE (XEXP (x, 1)) == CONST_INT
7840 && INTVAL (XEXP (x, 1)) == -16)
7842 x = XEXP (x, 0);
7843 *win = 1;
7844 return x;
7847 if (TARGET_TOC
7848 && reg_offset_p
7849 && GET_CODE (x) == SYMBOL_REF
7850 && use_toc_relative_ref (x, mode))
7852 x = create_TOC_reference (x, NULL_RTX);
7853 if (TARGET_CMODEL != CMODEL_SMALL)
7854 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7855 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7856 opnum, (enum reload_type) type);
7857 *win = 1;
7858 return x;
7860 *win = 0;
7861 return x;
7864 /* Debug version of rs6000_legitimize_reload_address. */
7865 static rtx
7866 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7867 int opnum, int type,
7868 int ind_levels, int *win)
7870 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7871 ind_levels, win);
7872 fprintf (stderr,
7873 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7874 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7875 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7876 debug_rtx (x);
7878 if (x == ret)
7879 fprintf (stderr, "Same address returned\n");
7880 else if (!ret)
7881 fprintf (stderr, "NULL returned\n");
7882 else
7884 fprintf (stderr, "New address:\n");
7885 debug_rtx (ret);
7888 return ret;
7891 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7892 that is a valid memory address for an instruction.
7893 The MODE argument is the machine mode for the MEM expression
7894 that wants to use this address.
7896 On the RS/6000, there are four valid address: a SYMBOL_REF that
7897 refers to a constant pool entry of an address (or the sum of it
7898 plus a constant), a short (16-bit signed) constant plus a register,
7899 the sum of two registers, or a register indirect, possibly with an
7900 auto-increment. For DFmode, DDmode and DImode with a constant plus
7901 register, we must ensure that both words are addressable or PowerPC64
7902 with offset word aligned.
7904 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7905 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7906 because adjacent memory cells are accessed by adding word-sized offsets
7907 during assembly output. */
7908 static bool
7909 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7911 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7913 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7914 if (VECTOR_MEM_ALTIVEC_P (mode)
7915 && GET_CODE (x) == AND
7916 && GET_CODE (XEXP (x, 1)) == CONST_INT
7917 && INTVAL (XEXP (x, 1)) == -16)
7918 x = XEXP (x, 0);
7920 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7921 return 0;
7922 if (legitimate_indirect_address_p (x, reg_ok_strict))
7923 return 1;
7924 if (TARGET_UPDATE
7925 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7926 && mode_supports_pre_incdec_p (mode)
7927 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7928 return 1;
7929 if (virtual_stack_registers_memory_p (x))
7930 return 1;
7931 if (reg_offset_p && legitimate_small_data_p (mode, x))
7932 return 1;
7933 if (reg_offset_p
7934 && legitimate_constant_pool_address_p (x, mode,
7935 reg_ok_strict || lra_in_progress))
7936 return 1;
7937 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7938 allow register indirect addresses. This will allow the values to go in
7939 either GPRs or VSX registers without reloading. The vector types would
7940 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7941 somewhat split, in that some uses are GPR based, and some VSX based. */
7942 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7943 return 0;
7944 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7945 if (! reg_ok_strict
7946 && reg_offset_p
7947 && GET_CODE (x) == PLUS
7948 && GET_CODE (XEXP (x, 0)) == REG
7949 && (XEXP (x, 0) == virtual_stack_vars_rtx
7950 || XEXP (x, 0) == arg_pointer_rtx)
7951 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7952 return 1;
7953 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7954 return 1;
7955 if (!FLOAT128_2REG_P (mode)
7956 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7957 || TARGET_POWERPC64
7958 || (mode != DFmode && mode != DDmode)
7959 || (TARGET_E500_DOUBLE && mode != DDmode))
7960 && (TARGET_POWERPC64 || mode != DImode)
7961 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7962 && mode != PTImode
7963 && !avoiding_indexed_address_p (mode)
7964 && legitimate_indexed_address_p (x, reg_ok_strict))
7965 return 1;
7966 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7967 && mode_supports_pre_modify_p (mode)
7968 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7969 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7970 reg_ok_strict, false)
7971 || (!avoiding_indexed_address_p (mode)
7972 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7973 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7974 return 1;
7975 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7976 return 1;
7977 return 0;
7980 /* Debug version of rs6000_legitimate_address_p. */
7981 static bool
7982 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7983 bool reg_ok_strict)
7985 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7986 fprintf (stderr,
7987 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7988 "strict = %d, reload = %s, code = %s\n",
7989 ret ? "true" : "false",
7990 GET_MODE_NAME (mode),
7991 reg_ok_strict,
7992 (reload_completed
7993 ? "after"
7994 : (reload_in_progress ? "progress" : "before")),
7995 GET_RTX_NAME (GET_CODE (x)));
7996 debug_rtx (x);
7998 return ret;
8001 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
8003 static bool
8004 rs6000_mode_dependent_address_p (const_rtx addr,
8005 addr_space_t as ATTRIBUTE_UNUSED)
8007 return rs6000_mode_dependent_address_ptr (addr);
8010 /* Go to LABEL if ADDR (a legitimate address expression)
8011 has an effect that depends on the machine mode it is used for.
8013 On the RS/6000 this is true of all integral offsets (since AltiVec
8014 and VSX modes don't allow them) or is a pre-increment or decrement.
8016 ??? Except that due to conceptual problems in offsettable_address_p
8017 we can't really report the problems of integral offsets. So leave
8018 this assuming that the adjustable offset must be valid for the
8019 sub-words of a TFmode operand, which is what we had before. */
8021 static bool
8022 rs6000_mode_dependent_address (const_rtx addr)
8024 switch (GET_CODE (addr))
8026 case PLUS:
8027 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
8028 is considered a legitimate address before reload, so there
8029 are no offset restrictions in that case. Note that this
8030 condition is safe in strict mode because any address involving
8031 virtual_stack_vars_rtx or arg_pointer_rtx would already have
8032 been rejected as illegitimate. */
8033 if (XEXP (addr, 0) != virtual_stack_vars_rtx
8034 && XEXP (addr, 0) != arg_pointer_rtx
8035 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
8037 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
8038 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
8040 break;
8042 case LO_SUM:
8043 /* Anything in the constant pool is sufficiently aligned that
8044 all bytes have the same high part address. */
8045 return !legitimate_constant_pool_address_p (addr, QImode, false);
8047 /* Auto-increment cases are now treated generically in recog.c. */
8048 case PRE_MODIFY:
8049 return TARGET_UPDATE;
8051 /* AND is only allowed in Altivec loads. */
8052 case AND:
8053 return true;
8055 default:
8056 break;
8059 return false;
8062 /* Debug version of rs6000_mode_dependent_address. */
8063 static bool
8064 rs6000_debug_mode_dependent_address (const_rtx addr)
8066 bool ret = rs6000_mode_dependent_address (addr);
8068 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
8069 ret ? "true" : "false");
8070 debug_rtx (addr);
8072 return ret;
8075 /* Implement FIND_BASE_TERM. */
8078 rs6000_find_base_term (rtx op)
8080 rtx base;
8082 base = op;
8083 if (GET_CODE (base) == CONST)
8084 base = XEXP (base, 0);
8085 if (GET_CODE (base) == PLUS)
8086 base = XEXP (base, 0);
8087 if (GET_CODE (base) == UNSPEC)
8088 switch (XINT (base, 1))
8090 case UNSPEC_TOCREL:
8091 case UNSPEC_MACHOPIC_OFFSET:
8092 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
8093 for aliasing purposes. */
8094 return XVECEXP (base, 0, 0);
8097 return op;
8100 /* More elaborate version of recog's offsettable_memref_p predicate
8101 that works around the ??? note of rs6000_mode_dependent_address.
8102 In particular it accepts
8104 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
8106 in 32-bit mode, that the recog predicate rejects. */
8108 static bool
8109 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
8111 bool worst_case;
8113 if (!MEM_P (op))
8114 return false;
8116 /* First mimic offsettable_memref_p. */
8117 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
8118 return true;
8120 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
8121 the latter predicate knows nothing about the mode of the memory
8122 reference and, therefore, assumes that it is the largest supported
8123 mode (TFmode). As a consequence, legitimate offsettable memory
8124 references are rejected. rs6000_legitimate_offset_address_p contains
8125 the correct logic for the PLUS case of rs6000_mode_dependent_address,
8126 at least with a little bit of help here given that we know the
8127 actual registers used. */
8128 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
8129 || GET_MODE_SIZE (reg_mode) == 4);
8130 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
8131 true, worst_case);
8134 /* Change register usage conditional on target flags. */
8135 static void
8136 rs6000_conditional_register_usage (void)
8138 int i;
8140 if (TARGET_DEBUG_TARGET)
8141 fprintf (stderr, "rs6000_conditional_register_usage called\n");
8143 /* Set MQ register fixed (already call_used) so that it will not be
8144 allocated. */
8145 fixed_regs[64] = 1;
8147 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
8148 if (TARGET_64BIT)
8149 fixed_regs[13] = call_used_regs[13]
8150 = call_really_used_regs[13] = 1;
8152 /* Conditionally disable FPRs. */
8153 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
8154 for (i = 32; i < 64; i++)
8155 fixed_regs[i] = call_used_regs[i]
8156 = call_really_used_regs[i] = 1;
8158 /* The TOC register is not killed across calls in a way that is
8159 visible to the compiler. */
8160 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
8161 call_really_used_regs[2] = 0;
8163 if (DEFAULT_ABI == ABI_V4
8164 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8165 && flag_pic == 2)
8166 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8168 if (DEFAULT_ABI == ABI_V4
8169 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
8170 && flag_pic == 1)
8171 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8172 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8173 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8175 if (DEFAULT_ABI == ABI_DARWIN
8176 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
8177 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8178 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8179 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8181 if (TARGET_TOC && TARGET_MINIMAL_TOC)
8182 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
8183 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
8185 if (TARGET_SPE)
8187 global_regs[SPEFSCR_REGNO] = 1;
8188 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
8189 registers in prologues and epilogues. We no longer use r14
8190 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
8191 pool for link-compatibility with older versions of GCC. Once
8192 "old" code has died out, we can return r14 to the allocation
8193 pool. */
8194 fixed_regs[14]
8195 = call_used_regs[14]
8196 = call_really_used_regs[14] = 1;
8199 if (!TARGET_ALTIVEC && !TARGET_VSX)
8201 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
8202 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8203 call_really_used_regs[VRSAVE_REGNO] = 1;
8206 if (TARGET_ALTIVEC || TARGET_VSX)
8207 global_regs[VSCR_REGNO] = 1;
8209 if (TARGET_ALTIVEC_ABI)
8211 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8212 call_used_regs[i] = call_really_used_regs[i] = 1;
8214 /* AIX reserves VR20:31 in non-extended ABI mode. */
8215 if (TARGET_XCOFF)
8216 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8217 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8222 /* Output insns to set DEST equal to the constant SOURCE as a series of
8223 lis, ori and shl instructions and return TRUE. */
8225 bool
8226 rs6000_emit_set_const (rtx dest, rtx source)
8228 machine_mode mode = GET_MODE (dest);
8229 rtx temp, set;
8230 rtx_insn *insn;
8231 HOST_WIDE_INT c;
8233 gcc_checking_assert (CONST_INT_P (source));
8234 c = INTVAL (source);
8235 switch (mode)
8237 case QImode:
8238 case HImode:
8239 emit_insn (gen_rtx_SET (dest, source));
8240 return true;
8242 case SImode:
8243 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8245 emit_insn (gen_rtx_SET (copy_rtx (temp),
8246 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8247 emit_insn (gen_rtx_SET (dest,
8248 gen_rtx_IOR (SImode, copy_rtx (temp),
8249 GEN_INT (c & 0xffff))));
8250 break;
8252 case DImode:
8253 if (!TARGET_POWERPC64)
8255 rtx hi, lo;
8257 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8258 DImode);
8259 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8260 DImode);
8261 emit_move_insn (hi, GEN_INT (c >> 32));
8262 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8263 emit_move_insn (lo, GEN_INT (c));
8265 else
8266 rs6000_emit_set_long_const (dest, c);
8267 break;
8269 default:
8270 gcc_unreachable ();
8273 insn = get_last_insn ();
8274 set = single_set (insn);
8275 if (! CONSTANT_P (SET_SRC (set)))
8276 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8278 return true;
8281 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8282 Output insns to set DEST equal to the constant C as a series of
8283 lis, ori and shl instructions. */
8285 static void
8286 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8288 rtx temp;
8289 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8291 ud1 = c & 0xffff;
8292 c = c >> 16;
8293 ud2 = c & 0xffff;
8294 c = c >> 16;
8295 ud3 = c & 0xffff;
8296 c = c >> 16;
8297 ud4 = c & 0xffff;
8299 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8300 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8301 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8303 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8304 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8306 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8308 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8309 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8310 if (ud1 != 0)
8311 emit_move_insn (dest,
8312 gen_rtx_IOR (DImode, copy_rtx (temp),
8313 GEN_INT (ud1)));
8315 else if (ud3 == 0 && ud4 == 0)
8317 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8319 gcc_assert (ud2 & 0x8000);
8320 emit_move_insn (copy_rtx (temp),
8321 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8322 if (ud1 != 0)
8323 emit_move_insn (copy_rtx (temp),
8324 gen_rtx_IOR (DImode, copy_rtx (temp),
8325 GEN_INT (ud1)));
8326 emit_move_insn (dest,
8327 gen_rtx_ZERO_EXTEND (DImode,
8328 gen_lowpart (SImode,
8329 copy_rtx (temp))));
8331 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8332 || (ud4 == 0 && ! (ud3 & 0x8000)))
8334 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8336 emit_move_insn (copy_rtx (temp),
8337 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8338 if (ud2 != 0)
8339 emit_move_insn (copy_rtx (temp),
8340 gen_rtx_IOR (DImode, copy_rtx (temp),
8341 GEN_INT (ud2)));
8342 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8343 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8344 GEN_INT (16)));
8345 if (ud1 != 0)
8346 emit_move_insn (dest,
8347 gen_rtx_IOR (DImode, copy_rtx (temp),
8348 GEN_INT (ud1)));
8350 else
8352 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8354 emit_move_insn (copy_rtx (temp),
8355 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8356 if (ud3 != 0)
8357 emit_move_insn (copy_rtx (temp),
8358 gen_rtx_IOR (DImode, copy_rtx (temp),
8359 GEN_INT (ud3)));
8361 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8362 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8363 GEN_INT (32)));
8364 if (ud2 != 0)
8365 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8366 gen_rtx_IOR (DImode, copy_rtx (temp),
8367 GEN_INT (ud2 << 16)));
8368 if (ud1 != 0)
8369 emit_move_insn (dest,
8370 gen_rtx_IOR (DImode, copy_rtx (temp),
8371 GEN_INT (ud1)));
8375 /* Helper for the following. Get rid of [r+r] memory refs
8376 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8378 static void
8379 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8381 if (reload_in_progress)
8382 return;
8384 if (GET_CODE (operands[0]) == MEM
8385 && GET_CODE (XEXP (operands[0], 0)) != REG
8386 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8387 GET_MODE (operands[0]), false))
8388 operands[0]
8389 = replace_equiv_address (operands[0],
8390 copy_addr_to_reg (XEXP (operands[0], 0)));
8392 if (GET_CODE (operands[1]) == MEM
8393 && GET_CODE (XEXP (operands[1], 0)) != REG
8394 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8395 GET_MODE (operands[1]), false))
8396 operands[1]
8397 = replace_equiv_address (operands[1],
8398 copy_addr_to_reg (XEXP (operands[1], 0)));
8401 /* Generate a vector of constants to permute MODE for a little-endian
8402 storage operation by swapping the two halves of a vector. */
8403 static rtvec
8404 rs6000_const_vec (machine_mode mode)
8406 int i, subparts;
8407 rtvec v;
8409 switch (mode)
8411 case V1TImode:
8412 subparts = 1;
8413 break;
8414 case V2DFmode:
8415 case V2DImode:
8416 subparts = 2;
8417 break;
8418 case V4SFmode:
8419 case V4SImode:
8420 subparts = 4;
8421 break;
8422 case V8HImode:
8423 subparts = 8;
8424 break;
8425 case V16QImode:
8426 subparts = 16;
8427 break;
8428 default:
8429 gcc_unreachable();
8432 v = rtvec_alloc (subparts);
8434 for (i = 0; i < subparts / 2; ++i)
8435 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8436 for (i = subparts / 2; i < subparts; ++i)
8437 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8439 return v;
8442 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8443 for a VSX load or store operation. */
8445 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8447 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8448 return gen_rtx_VEC_SELECT (mode, source, par);
8451 /* Emit a little-endian load from vector memory location SOURCE to VSX
8452 register DEST in mode MODE. The load is done with two permuting
8453 insn's that represent an lxvd2x and xxpermdi. */
8454 void
8455 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8457 rtx tmp, permute_mem, permute_reg;
8459 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8460 V1TImode). */
8461 if (mode == TImode || mode == V1TImode)
8463 mode = V2DImode;
8464 dest = gen_lowpart (V2DImode, dest);
8465 source = adjust_address (source, V2DImode, 0);
8468 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8469 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8470 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8471 emit_insn (gen_rtx_SET (tmp, permute_mem));
8472 emit_insn (gen_rtx_SET (dest, permute_reg));
8475 /* Emit a little-endian store to vector memory location DEST from VSX
8476 register SOURCE in mode MODE. The store is done with two permuting
8477 insn's that represent an xxpermdi and an stxvd2x. */
8478 void
8479 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8481 rtx tmp, permute_src, permute_tmp;
8483 /* This should never be called during or after reload, because it does
8484 not re-permute the source register. It is intended only for use
8485 during expand. */
8486 gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
8488 /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
8489 V1TImode). */
8490 if (mode == TImode || mode == V1TImode)
8492 mode = V2DImode;
8493 dest = adjust_address (dest, V2DImode, 0);
8494 source = gen_lowpart (V2DImode, source);
8497 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8498 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8499 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8500 emit_insn (gen_rtx_SET (tmp, permute_src));
8501 emit_insn (gen_rtx_SET (dest, permute_tmp));
8504 /* Emit a sequence representing a little-endian VSX load or store,
8505 moving data from SOURCE to DEST in mode MODE. This is done
8506 separately from rs6000_emit_move to ensure it is called only
8507 during expand. LE VSX loads and stores introduced later are
8508 handled with a split. The expand-time RTL generation allows
8509 us to optimize away redundant pairs of register-permutes. */
8510 void
8511 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8513 gcc_assert (!BYTES_BIG_ENDIAN
8514 && VECTOR_MEM_VSX_P (mode)
8515 && !gpr_or_gpr_p (dest, source)
8516 && (MEM_P (source) ^ MEM_P (dest)));
8518 if (MEM_P (source))
8520 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8521 rs6000_emit_le_vsx_load (dest, source, mode);
8523 else
8525 if (!REG_P (source))
8526 source = force_reg (mode, source);
8527 rs6000_emit_le_vsx_store (dest, source, mode);
8531 /* Emit a move from SOURCE to DEST in mode MODE. */
8532 void
8533 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8535 rtx operands[2];
8536 operands[0] = dest;
8537 operands[1] = source;
8539 if (TARGET_DEBUG_ADDR)
8541 fprintf (stderr,
8542 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8543 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8544 GET_MODE_NAME (mode),
8545 reload_in_progress,
8546 reload_completed,
8547 can_create_pseudo_p ());
8548 debug_rtx (dest);
8549 fprintf (stderr, "source:\n");
8550 debug_rtx (source);
8553 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8554 if (CONST_WIDE_INT_P (operands[1])
8555 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8557 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8558 gcc_unreachable ();
8561 /* Check if GCC is setting up a block move that will end up using FP
8562 registers as temporaries. We must make sure this is acceptable. */
8563 if (GET_CODE (operands[0]) == MEM
8564 && GET_CODE (operands[1]) == MEM
8565 && mode == DImode
8566 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8567 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8568 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8569 ? 32 : MEM_ALIGN (operands[0])))
8570 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8571 ? 32
8572 : MEM_ALIGN (operands[1]))))
8573 && ! MEM_VOLATILE_P (operands [0])
8574 && ! MEM_VOLATILE_P (operands [1]))
8576 emit_move_insn (adjust_address (operands[0], SImode, 0),
8577 adjust_address (operands[1], SImode, 0));
8578 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8579 adjust_address (copy_rtx (operands[1]), SImode, 4));
8580 return;
8583 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8584 && !gpc_reg_operand (operands[1], mode))
8585 operands[1] = force_reg (mode, operands[1]);
8587 /* Recognize the case where operand[1] is a reference to thread-local
8588 data and load its address to a register. */
8589 if (tls_referenced_p (operands[1]))
8591 enum tls_model model;
8592 rtx tmp = operands[1];
8593 rtx addend = NULL;
8595 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8597 addend = XEXP (XEXP (tmp, 0), 1);
8598 tmp = XEXP (XEXP (tmp, 0), 0);
8601 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8602 model = SYMBOL_REF_TLS_MODEL (tmp);
8603 gcc_assert (model != 0);
8605 tmp = rs6000_legitimize_tls_address (tmp, model);
8606 if (addend)
8608 tmp = gen_rtx_PLUS (mode, tmp, addend);
8609 tmp = force_operand (tmp, operands[0]);
8611 operands[1] = tmp;
8614 /* Handle the case where reload calls us with an invalid address. */
8615 if (reload_in_progress && mode == Pmode
8616 && (! general_operand (operands[1], mode)
8617 || ! nonimmediate_operand (operands[0], mode)))
8618 goto emit_set;
8620 /* 128-bit constant floating-point values on Darwin should really be loaded
8621 as two parts. However, this premature splitting is a problem when DFmode
8622 values can go into Altivec registers. */
8623 if (FLOAT128_IBM_P (mode) && !reg_addr[DFmode].scalar_in_vmx_p
8624 && GET_CODE (operands[1]) == CONST_DOUBLE)
8626 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8627 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8628 DFmode);
8629 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8630 GET_MODE_SIZE (DFmode)),
8631 simplify_gen_subreg (DFmode, operands[1], mode,
8632 GET_MODE_SIZE (DFmode)),
8633 DFmode);
8634 return;
8637 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8638 cfun->machine->sdmode_stack_slot =
8639 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8642 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8643 p1:SD) if p1 is not of floating point class and p0 is spilled as
8644 we can have no analogous movsd_store for this. */
8645 if (lra_in_progress && mode == DDmode
8646 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8647 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8648 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8649 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8651 enum reg_class cl;
8652 int regno = REGNO (SUBREG_REG (operands[1]));
8654 if (regno >= FIRST_PSEUDO_REGISTER)
8656 cl = reg_preferred_class (regno);
8657 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8659 if (regno >= 0 && ! FP_REGNO_P (regno))
8661 mode = SDmode;
8662 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8663 operands[1] = SUBREG_REG (operands[1]);
8666 if (lra_in_progress
8667 && mode == SDmode
8668 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8669 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8670 && (REG_P (operands[1])
8671 || (GET_CODE (operands[1]) == SUBREG
8672 && REG_P (SUBREG_REG (operands[1])))))
8674 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8675 ? SUBREG_REG (operands[1]) : operands[1]);
8676 enum reg_class cl;
8678 if (regno >= FIRST_PSEUDO_REGISTER)
8680 cl = reg_preferred_class (regno);
8681 gcc_assert (cl != NO_REGS);
8682 regno = ira_class_hard_regs[cl][0];
8684 if (FP_REGNO_P (regno))
8686 if (GET_MODE (operands[0]) != DDmode)
8687 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8688 emit_insn (gen_movsd_store (operands[0], operands[1]));
8690 else if (INT_REGNO_P (regno))
8691 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8692 else
8693 gcc_unreachable();
8694 return;
8696 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8697 p:DD)) if p0 is not of floating point class and p1 is spilled as
8698 we can have no analogous movsd_load for this. */
8699 if (lra_in_progress && mode == DDmode
8700 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8701 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8702 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8703 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8705 enum reg_class cl;
8706 int regno = REGNO (SUBREG_REG (operands[0]));
8708 if (regno >= FIRST_PSEUDO_REGISTER)
8710 cl = reg_preferred_class (regno);
8711 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8713 if (regno >= 0 && ! FP_REGNO_P (regno))
8715 mode = SDmode;
8716 operands[0] = SUBREG_REG (operands[0]);
8717 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8720 if (lra_in_progress
8721 && mode == SDmode
8722 && (REG_P (operands[0])
8723 || (GET_CODE (operands[0]) == SUBREG
8724 && REG_P (SUBREG_REG (operands[0]))))
8725 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8726 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8728 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8729 ? SUBREG_REG (operands[0]) : operands[0]);
8730 enum reg_class cl;
8732 if (regno >= FIRST_PSEUDO_REGISTER)
8734 cl = reg_preferred_class (regno);
8735 gcc_assert (cl != NO_REGS);
8736 regno = ira_class_hard_regs[cl][0];
8738 if (FP_REGNO_P (regno))
8740 if (GET_MODE (operands[1]) != DDmode)
8741 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8742 emit_insn (gen_movsd_load (operands[0], operands[1]));
8744 else if (INT_REGNO_P (regno))
8745 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8746 else
8747 gcc_unreachable();
8748 return;
8751 if (reload_in_progress
8752 && mode == SDmode
8753 && cfun->machine->sdmode_stack_slot != NULL_RTX
8754 && MEM_P (operands[0])
8755 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8756 && REG_P (operands[1]))
8758 if (FP_REGNO_P (REGNO (operands[1])))
8760 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8761 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8762 emit_insn (gen_movsd_store (mem, operands[1]));
8764 else if (INT_REGNO_P (REGNO (operands[1])))
8766 rtx mem = operands[0];
8767 if (BYTES_BIG_ENDIAN)
8768 mem = adjust_address_nv (mem, mode, 4);
8769 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8770 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8772 else
8773 gcc_unreachable();
8774 return;
8776 if (reload_in_progress
8777 && mode == SDmode
8778 && REG_P (operands[0])
8779 && MEM_P (operands[1])
8780 && cfun->machine->sdmode_stack_slot != NULL_RTX
8781 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8783 if (FP_REGNO_P (REGNO (operands[0])))
8785 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8786 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8787 emit_insn (gen_movsd_load (operands[0], mem));
8789 else if (INT_REGNO_P (REGNO (operands[0])))
8791 rtx mem = operands[1];
8792 if (BYTES_BIG_ENDIAN)
8793 mem = adjust_address_nv (mem, mode, 4);
8794 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8795 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8797 else
8798 gcc_unreachable();
8799 return;
8802 /* FIXME: In the long term, this switch statement should go away
8803 and be replaced by a sequence of tests based on things like
8804 mode == Pmode. */
8805 switch (mode)
8807 case HImode:
8808 case QImode:
8809 if (CONSTANT_P (operands[1])
8810 && GET_CODE (operands[1]) != CONST_INT)
8811 operands[1] = force_const_mem (mode, operands[1]);
8812 break;
8814 case TFmode:
8815 case TDmode:
8816 case IFmode:
8817 case KFmode:
8818 if (FLOAT128_2REG_P (mode))
8819 rs6000_eliminate_indexed_memrefs (operands);
8820 /* fall through */
8822 case DFmode:
8823 case DDmode:
8824 case SFmode:
8825 case SDmode:
8826 if (CONSTANT_P (operands[1])
8827 && ! easy_fp_constant (operands[1], mode))
8828 operands[1] = force_const_mem (mode, operands[1]);
8829 break;
8831 case V16QImode:
8832 case V8HImode:
8833 case V4SFmode:
8834 case V4SImode:
8835 case V4HImode:
8836 case V2SFmode:
8837 case V2SImode:
8838 case V1DImode:
8839 case V2DFmode:
8840 case V2DImode:
8841 case V1TImode:
8842 if (CONSTANT_P (operands[1])
8843 && !easy_vector_constant (operands[1], mode))
8844 operands[1] = force_const_mem (mode, operands[1]);
8845 break;
8847 case SImode:
8848 case DImode:
8849 /* Use default pattern for address of ELF small data */
8850 if (TARGET_ELF
8851 && mode == Pmode
8852 && DEFAULT_ABI == ABI_V4
8853 && (GET_CODE (operands[1]) == SYMBOL_REF
8854 || GET_CODE (operands[1]) == CONST)
8855 && small_data_operand (operands[1], mode))
8857 emit_insn (gen_rtx_SET (operands[0], operands[1]));
8858 return;
8861 if (DEFAULT_ABI == ABI_V4
8862 && mode == Pmode && mode == SImode
8863 && flag_pic == 1 && got_operand (operands[1], mode))
8865 emit_insn (gen_movsi_got (operands[0], operands[1]));
8866 return;
8869 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8870 && TARGET_NO_TOC
8871 && ! flag_pic
8872 && mode == Pmode
8873 && CONSTANT_P (operands[1])
8874 && GET_CODE (operands[1]) != HIGH
8875 && GET_CODE (operands[1]) != CONST_INT)
8877 rtx target = (!can_create_pseudo_p ()
8878 ? operands[0]
8879 : gen_reg_rtx (mode));
8881 /* If this is a function address on -mcall-aixdesc,
8882 convert it to the address of the descriptor. */
8883 if (DEFAULT_ABI == ABI_AIX
8884 && GET_CODE (operands[1]) == SYMBOL_REF
8885 && XSTR (operands[1], 0)[0] == '.')
8887 const char *name = XSTR (operands[1], 0);
8888 rtx new_ref;
8889 while (*name == '.')
8890 name++;
8891 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8892 CONSTANT_POOL_ADDRESS_P (new_ref)
8893 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8894 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8895 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8896 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8897 operands[1] = new_ref;
8900 if (DEFAULT_ABI == ABI_DARWIN)
8902 #if TARGET_MACHO
8903 if (MACHO_DYNAMIC_NO_PIC_P)
8905 /* Take care of any required data indirection. */
8906 operands[1] = rs6000_machopic_legitimize_pic_address (
8907 operands[1], mode, operands[0]);
8908 if (operands[0] != operands[1])
8909 emit_insn (gen_rtx_SET (operands[0], operands[1]));
8910 return;
8912 #endif
8913 emit_insn (gen_macho_high (target, operands[1]));
8914 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8915 return;
8918 emit_insn (gen_elf_high (target, operands[1]));
8919 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8920 return;
8923 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8924 and we have put it in the TOC, we just need to make a TOC-relative
8925 reference to it. */
8926 if (TARGET_TOC
8927 && GET_CODE (operands[1]) == SYMBOL_REF
8928 && use_toc_relative_ref (operands[1], mode))
8929 operands[1] = create_TOC_reference (operands[1], operands[0]);
8930 else if (mode == Pmode
8931 && CONSTANT_P (operands[1])
8932 && GET_CODE (operands[1]) != HIGH
8933 && ((GET_CODE (operands[1]) != CONST_INT
8934 && ! easy_fp_constant (operands[1], mode))
8935 || (GET_CODE (operands[1]) == CONST_INT
8936 && (num_insns_constant (operands[1], mode)
8937 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8938 || (GET_CODE (operands[0]) == REG
8939 && FP_REGNO_P (REGNO (operands[0]))))
8940 && !toc_relative_expr_p (operands[1], false)
8941 && (TARGET_CMODEL == CMODEL_SMALL
8942 || can_create_pseudo_p ()
8943 || (REG_P (operands[0])
8944 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8947 #if TARGET_MACHO
8948 /* Darwin uses a special PIC legitimizer. */
8949 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8951 operands[1] =
8952 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8953 operands[0]);
8954 if (operands[0] != operands[1])
8955 emit_insn (gen_rtx_SET (operands[0], operands[1]));
8956 return;
8958 #endif
8960 /* If we are to limit the number of things we put in the TOC and
8961 this is a symbol plus a constant we can add in one insn,
8962 just put the symbol in the TOC and add the constant. Don't do
8963 this if reload is in progress. */
8964 if (GET_CODE (operands[1]) == CONST
8965 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8966 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8967 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8968 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8969 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8970 && ! side_effects_p (operands[0]))
8972 rtx sym =
8973 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8974 rtx other = XEXP (XEXP (operands[1], 0), 1);
8976 sym = force_reg (mode, sym);
8977 emit_insn (gen_add3_insn (operands[0], sym, other));
8978 return;
8981 operands[1] = force_const_mem (mode, operands[1]);
8983 if (TARGET_TOC
8984 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8985 && constant_pool_expr_p (XEXP (operands[1], 0))
8986 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8987 get_pool_constant (XEXP (operands[1], 0)),
8988 get_pool_mode (XEXP (operands[1], 0))))
8990 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8991 operands[0]);
8992 operands[1] = gen_const_mem (mode, tocref);
8993 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8996 break;
8998 case TImode:
8999 if (!VECTOR_MEM_VSX_P (TImode))
9000 rs6000_eliminate_indexed_memrefs (operands);
9001 break;
9003 case PTImode:
9004 rs6000_eliminate_indexed_memrefs (operands);
9005 break;
9007 default:
9008 fatal_insn ("bad move", gen_rtx_SET (dest, source));
9011 /* Above, we may have called force_const_mem which may have returned
9012 an invalid address. If we can, fix this up; otherwise, reload will
9013 have to deal with it. */
9014 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
9015 operands[1] = validize_mem (operands[1]);
9017 emit_set:
9018 emit_insn (gen_rtx_SET (operands[0], operands[1]));
9021 /* Return true if a structure, union or array containing FIELD should be
9022 accessed using `BLKMODE'.
9024 For the SPE, simd types are V2SI, and gcc can be tempted to put the
9025 entire thing in a DI and use subregs to access the internals.
9026 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
9027 back-end. Because a single GPR can hold a V2SI, but not a DI, the
9028 best thing to do is set structs to BLKmode and avoid Severe Tire
9029 Damage.
9031 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
9032 fit into 1, whereas DI still needs two. */
9034 static bool
9035 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
9037 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
9038 || (TARGET_E500_DOUBLE && mode == DFmode));
9041 /* Nonzero if we can use a floating-point register to pass this arg. */
9042 #define USE_FP_FOR_ARG_P(CUM,MODE) \
9043 (SCALAR_FLOAT_MODE_NOT_VECTOR_P (MODE) \
9044 && (CUM)->fregno <= FP_ARG_MAX_REG \
9045 && TARGET_HARD_FLOAT && TARGET_FPRS)
9047 /* Nonzero if we can use an AltiVec register to pass this arg. */
9048 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
9049 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
9050 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
9051 && TARGET_ALTIVEC_ABI \
9052 && (NAMED))
9054 /* Walk down the type tree of TYPE counting consecutive base elements.
9055 If *MODEP is VOIDmode, then set it to the first valid floating point
9056 or vector type. If a non-floating point or vector type is found, or
9057 if a floating point or vector type that doesn't match a non-VOIDmode
9058 *MODEP is found, then return -1, otherwise return the count in the
9059 sub-tree. */
9061 static int
9062 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
9064 machine_mode mode;
9065 HOST_WIDE_INT size;
9067 switch (TREE_CODE (type))
9069 case REAL_TYPE:
9070 mode = TYPE_MODE (type);
9071 if (!SCALAR_FLOAT_MODE_P (mode))
9072 return -1;
9074 if (*modep == VOIDmode)
9075 *modep = mode;
9077 if (*modep == mode)
9078 return 1;
9080 break;
9082 case COMPLEX_TYPE:
9083 mode = TYPE_MODE (TREE_TYPE (type));
9084 if (!SCALAR_FLOAT_MODE_P (mode))
9085 return -1;
9087 if (*modep == VOIDmode)
9088 *modep = mode;
9090 if (*modep == mode)
9091 return 2;
9093 break;
9095 case VECTOR_TYPE:
9096 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
9097 return -1;
9099 /* Use V4SImode as representative of all 128-bit vector types. */
9100 size = int_size_in_bytes (type);
9101 switch (size)
9103 case 16:
9104 mode = V4SImode;
9105 break;
9106 default:
9107 return -1;
9110 if (*modep == VOIDmode)
9111 *modep = mode;
9113 /* Vector modes are considered to be opaque: two vectors are
9114 equivalent for the purposes of being homogeneous aggregates
9115 if they are the same size. */
9116 if (*modep == mode)
9117 return 1;
9119 break;
9121 case ARRAY_TYPE:
9123 int count;
9124 tree index = TYPE_DOMAIN (type);
9126 /* Can't handle incomplete types nor sizes that are not
9127 fixed. */
9128 if (!COMPLETE_TYPE_P (type)
9129 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9130 return -1;
9132 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
9133 if (count == -1
9134 || !index
9135 || !TYPE_MAX_VALUE (index)
9136 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
9137 || !TYPE_MIN_VALUE (index)
9138 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
9139 || count < 0)
9140 return -1;
9142 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
9143 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
9145 /* There must be no padding. */
9146 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9147 return -1;
9149 return count;
9152 case RECORD_TYPE:
9154 int count = 0;
9155 int sub_count;
9156 tree field;
9158 /* Can't handle incomplete types nor sizes that are not
9159 fixed. */
9160 if (!COMPLETE_TYPE_P (type)
9161 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9162 return -1;
9164 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9166 if (TREE_CODE (field) != FIELD_DECL)
9167 continue;
9169 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9170 if (sub_count < 0)
9171 return -1;
9172 count += sub_count;
9175 /* There must be no padding. */
9176 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9177 return -1;
9179 return count;
9182 case UNION_TYPE:
9183 case QUAL_UNION_TYPE:
9185 /* These aren't very interesting except in a degenerate case. */
9186 int count = 0;
9187 int sub_count;
9188 tree field;
9190 /* Can't handle incomplete types nor sizes that are not
9191 fixed. */
9192 if (!COMPLETE_TYPE_P (type)
9193 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
9194 return -1;
9196 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
9198 if (TREE_CODE (field) != FIELD_DECL)
9199 continue;
9201 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
9202 if (sub_count < 0)
9203 return -1;
9204 count = count > sub_count ? count : sub_count;
9207 /* There must be no padding. */
9208 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
9209 return -1;
9211 return count;
9214 default:
9215 break;
9218 return -1;
9221 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9222 float or vector aggregate that shall be passed in FP/vector registers
9223 according to the ELFv2 ABI, return the homogeneous element mode in
9224 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9226 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9228 static bool
9229 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9230 machine_mode *elt_mode,
9231 int *n_elts)
9233 /* Note that we do not accept complex types at the top level as
9234 homogeneous aggregates; these types are handled via the
9235 targetm.calls.split_complex_arg mechanism. Complex types
9236 can be elements of homogeneous aggregates, however. */
9237 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9239 machine_mode field_mode = VOIDmode;
9240 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9242 if (field_count > 0)
9244 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode) ?
9245 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9247 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9248 up to AGGR_ARG_NUM_REG registers. */
9249 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9251 if (elt_mode)
9252 *elt_mode = field_mode;
9253 if (n_elts)
9254 *n_elts = field_count;
9255 return true;
9260 if (elt_mode)
9261 *elt_mode = mode;
9262 if (n_elts)
9263 *n_elts = 1;
9264 return false;
9267 /* Return a nonzero value to say to return the function value in
9268 memory, just as large structures are always returned. TYPE will be
9269 the data type of the value, and FNTYPE will be the type of the
9270 function doing the returning, or @code{NULL} for libcalls.
9272 The AIX ABI for the RS/6000 specifies that all structures are
9273 returned in memory. The Darwin ABI does the same.
9275 For the Darwin 64 Bit ABI, a function result can be returned in
9276 registers or in memory, depending on the size of the return data
9277 type. If it is returned in registers, the value occupies the same
9278 registers as it would if it were the first and only function
9279 argument. Otherwise, the function places its result in memory at
9280 the location pointed to by GPR3.
9282 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9283 but a draft put them in memory, and GCC used to implement the draft
9284 instead of the final standard. Therefore, aix_struct_return
9285 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9286 compatibility can change DRAFT_V4_STRUCT_RET to override the
9287 default, and -m switches get the final word. See
9288 rs6000_option_override_internal for more details.
9290 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9291 long double support is enabled. These values are returned in memory.
9293 int_size_in_bytes returns -1 for variable size objects, which go in
9294 memory always. The cast to unsigned makes -1 > 8. */
9296 static bool
9297 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9299 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9300 if (TARGET_MACHO
9301 && rs6000_darwin64_abi
9302 && TREE_CODE (type) == RECORD_TYPE
9303 && int_size_in_bytes (type) > 0)
9305 CUMULATIVE_ARGS valcum;
9306 rtx valret;
9308 valcum.words = 0;
9309 valcum.fregno = FP_ARG_MIN_REG;
9310 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9311 /* Do a trial code generation as if this were going to be passed
9312 as an argument; if any part goes in memory, we return NULL. */
9313 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9314 if (valret)
9315 return false;
9316 /* Otherwise fall through to more conventional ABI rules. */
9319 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9320 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9321 NULL, NULL))
9322 return false;
9324 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9325 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9326 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9327 return false;
9329 if (AGGREGATE_TYPE_P (type)
9330 && (aix_struct_return
9331 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9332 return true;
9334 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9335 modes only exist for GCC vector types if -maltivec. */
9336 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9337 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9338 return false;
9340 /* Return synthetic vectors in memory. */
9341 if (TREE_CODE (type) == VECTOR_TYPE
9342 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9344 static bool warned_for_return_big_vectors = false;
9345 if (!warned_for_return_big_vectors)
9347 warning (0, "GCC vector returned by reference: "
9348 "non-standard ABI extension with no compatibility guarantee");
9349 warned_for_return_big_vectors = true;
9351 return true;
9354 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
9355 && FLOAT128_IEEE_P (TYPE_MODE (type)))
9356 return true;
9358 return false;
9361 /* Specify whether values returned in registers should be at the most
9362 significant end of a register. We want aggregates returned by
9363 value to match the way aggregates are passed to functions. */
9365 static bool
9366 rs6000_return_in_msb (const_tree valtype)
9368 return (DEFAULT_ABI == ABI_ELFv2
9369 && BYTES_BIG_ENDIAN
9370 && AGGREGATE_TYPE_P (valtype)
9371 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9374 #ifdef HAVE_AS_GNU_ATTRIBUTE
9375 /* Return TRUE if a call to function FNDECL may be one that
9376 potentially affects the function calling ABI of the object file. */
9378 static bool
9379 call_ABI_of_interest (tree fndecl)
9381 if (symtab->state == EXPANSION)
9383 struct cgraph_node *c_node;
9385 /* Libcalls are always interesting. */
9386 if (fndecl == NULL_TREE)
9387 return true;
9389 /* Any call to an external function is interesting. */
9390 if (DECL_EXTERNAL (fndecl))
9391 return true;
9393 /* Interesting functions that we are emitting in this object file. */
9394 c_node = cgraph_node::get (fndecl);
9395 c_node = c_node->ultimate_alias_target ();
9396 return !c_node->only_called_directly_p ();
9398 return false;
9400 #endif
9402 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9403 for a call to a function whose data type is FNTYPE.
9404 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9406 For incoming args we set the number of arguments in the prototype large
9407 so we never return a PARALLEL. */
9409 void
9410 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9411 rtx libname ATTRIBUTE_UNUSED, int incoming,
9412 int libcall, int n_named_args,
9413 tree fndecl ATTRIBUTE_UNUSED,
9414 machine_mode return_mode ATTRIBUTE_UNUSED)
9416 static CUMULATIVE_ARGS zero_cumulative;
9418 *cum = zero_cumulative;
9419 cum->words = 0;
9420 cum->fregno = FP_ARG_MIN_REG;
9421 cum->vregno = ALTIVEC_ARG_MIN_REG;
9422 cum->prototype = (fntype && prototype_p (fntype));
9423 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9424 ? CALL_LIBCALL : CALL_NORMAL);
9425 cum->sysv_gregno = GP_ARG_MIN_REG;
9426 cum->stdarg = stdarg_p (fntype);
9428 cum->nargs_prototype = 0;
9429 if (incoming || cum->prototype)
9430 cum->nargs_prototype = n_named_args;
9432 /* Check for a longcall attribute. */
9433 if ((!fntype && rs6000_default_long_calls)
9434 || (fntype
9435 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9436 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9437 cum->call_cookie |= CALL_LONG;
9439 if (TARGET_DEBUG_ARG)
9441 fprintf (stderr, "\ninit_cumulative_args:");
9442 if (fntype)
9444 tree ret_type = TREE_TYPE (fntype);
9445 fprintf (stderr, " ret code = %s,",
9446 get_tree_code_name (TREE_CODE (ret_type)));
9449 if (cum->call_cookie & CALL_LONG)
9450 fprintf (stderr, " longcall,");
9452 fprintf (stderr, " proto = %d, nargs = %d\n",
9453 cum->prototype, cum->nargs_prototype);
9456 #ifdef HAVE_AS_GNU_ATTRIBUTE
9457 if (DEFAULT_ABI == ABI_V4)
9459 cum->escapes = call_ABI_of_interest (fndecl);
9460 if (cum->escapes)
9462 tree return_type;
9464 if (fntype)
9466 return_type = TREE_TYPE (fntype);
9467 return_mode = TYPE_MODE (return_type);
9469 else
9470 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9472 if (return_type != NULL)
9474 if (TREE_CODE (return_type) == RECORD_TYPE
9475 && TYPE_TRANSPARENT_AGGR (return_type))
9477 return_type = TREE_TYPE (first_field (return_type));
9478 return_mode = TYPE_MODE (return_type);
9480 if (AGGREGATE_TYPE_P (return_type)
9481 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9482 <= 8))
9483 rs6000_returns_struct = true;
9485 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (return_mode))
9486 rs6000_passes_float = true;
9487 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9488 || SPE_VECTOR_MODE (return_mode))
9489 rs6000_passes_vector = true;
9492 #endif
9494 if (fntype
9495 && !TARGET_ALTIVEC
9496 && TARGET_ALTIVEC_ABI
9497 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9499 error ("cannot return value in vector register because"
9500 " altivec instructions are disabled, use -maltivec"
9501 " to enable them");
9505 /* The mode the ABI uses for a word. This is not the same as word_mode
9506 for -m32 -mpowerpc64. This is used to implement various target hooks. */
9508 static machine_mode
9509 rs6000_abi_word_mode (void)
9511 return TARGET_32BIT ? SImode : DImode;
9514 /* On rs6000, function arguments are promoted, as are function return
9515 values. */
9517 static machine_mode
9518 rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
9519 machine_mode mode,
9520 int *punsignedp ATTRIBUTE_UNUSED,
9521 const_tree, int)
9523 PROMOTE_MODE (mode, *punsignedp, type);
9525 return mode;
9528 /* Return true if TYPE must be passed on the stack and not in registers. */
9530 static bool
9531 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9533 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9534 return must_pass_in_stack_var_size (mode, type);
9535 else
9536 return must_pass_in_stack_var_size_or_pad (mode, type);
9539 /* If defined, a C expression which determines whether, and in which
9540 direction, to pad out an argument with extra space. The value
9541 should be of type `enum direction': either `upward' to pad above
9542 the argument, `downward' to pad below, or `none' to inhibit
9543 padding.
9545 For the AIX ABI structs are always stored left shifted in their
9546 argument slot. */
9548 enum direction
9549 function_arg_padding (machine_mode mode, const_tree type)
9551 #ifndef AGGREGATE_PADDING_FIXED
9552 #define AGGREGATE_PADDING_FIXED 0
9553 #endif
9554 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9555 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9556 #endif
9558 if (!AGGREGATE_PADDING_FIXED)
9560 /* GCC used to pass structures of the same size as integer types as
9561 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9562 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9563 passed padded downward, except that -mstrict-align further
9564 muddied the water in that multi-component structures of 2 and 4
9565 bytes in size were passed padded upward.
9567 The following arranges for best compatibility with previous
9568 versions of gcc, but removes the -mstrict-align dependency. */
9569 if (BYTES_BIG_ENDIAN)
9571 HOST_WIDE_INT size = 0;
9573 if (mode == BLKmode)
9575 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9576 size = int_size_in_bytes (type);
9578 else
9579 size = GET_MODE_SIZE (mode);
9581 if (size == 1 || size == 2 || size == 4)
9582 return downward;
9584 return upward;
9587 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9589 if (type != 0 && AGGREGATE_TYPE_P (type))
9590 return upward;
9593 /* Fall back to the default. */
9594 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9597 /* If defined, a C expression that gives the alignment boundary, in bits,
9598 of an argument with the specified mode and type. If it is not defined,
9599 PARM_BOUNDARY is used for all arguments.
9601 V.4 wants long longs and doubles to be double word aligned. Just
9602 testing the mode size is a boneheaded way to do this as it means
9603 that other types such as complex int are also double word aligned.
9604 However, we're stuck with this because changing the ABI might break
9605 existing library interfaces.
9607 Doubleword align SPE vectors.
9608 Quadword align Altivec/VSX vectors.
9609 Quadword align large synthetic vector types. */
9611 static unsigned int
9612 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9614 machine_mode elt_mode;
9615 int n_elts;
9617 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9619 if (DEFAULT_ABI == ABI_V4
9620 && (GET_MODE_SIZE (mode) == 8
9621 || (TARGET_HARD_FLOAT
9622 && TARGET_FPRS
9623 && FLOAT128_2REG_P (mode))))
9624 return 64;
9625 else if (FLOAT128_VECTOR_P (mode))
9626 return 128;
9627 else if (SPE_VECTOR_MODE (mode)
9628 || (type && TREE_CODE (type) == VECTOR_TYPE
9629 && int_size_in_bytes (type) >= 8
9630 && int_size_in_bytes (type) < 16))
9631 return 64;
9632 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9633 || (type && TREE_CODE (type) == VECTOR_TYPE
9634 && int_size_in_bytes (type) >= 16))
9635 return 128;
9637 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9638 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9639 -mcompat-align-parm is used. */
9640 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9641 || DEFAULT_ABI == ABI_ELFv2)
9642 && type && TYPE_ALIGN (type) > 64)
9644 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9645 or homogeneous float/vector aggregates here. We already handled
9646 vector aggregates above, but still need to check for float here. */
9647 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9648 && !SCALAR_FLOAT_MODE_P (elt_mode));
9650 /* We used to check for BLKmode instead of the above aggregate type
9651 check. Warn when this results in any difference to the ABI. */
9652 if (aggregate_p != (mode == BLKmode))
9654 static bool warned;
9655 if (!warned && warn_psabi)
9657 warned = true;
9658 inform (input_location,
9659 "the ABI of passing aggregates with %d-byte alignment"
9660 " has changed in GCC 5",
9661 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9665 if (aggregate_p)
9666 return 128;
9669 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9670 implement the "aggregate type" check as a BLKmode check here; this
9671 means certain aggregate types are in fact not aligned. */
9672 if (TARGET_MACHO && rs6000_darwin64_abi
9673 && mode == BLKmode
9674 && type && TYPE_ALIGN (type) > 64)
9675 return 128;
9677 return PARM_BOUNDARY;
9680 /* The offset in words to the start of the parameter save area. */
9682 static unsigned int
9683 rs6000_parm_offset (void)
9685 return (DEFAULT_ABI == ABI_V4 ? 2
9686 : DEFAULT_ABI == ABI_ELFv2 ? 4
9687 : 6);
9690 /* For a function parm of MODE and TYPE, return the starting word in
9691 the parameter area. NWORDS of the parameter area are already used. */
9693 static unsigned int
9694 rs6000_parm_start (machine_mode mode, const_tree type,
9695 unsigned int nwords)
9697 unsigned int align;
9699 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9700 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9703 /* Compute the size (in words) of a function argument. */
9705 static unsigned long
9706 rs6000_arg_size (machine_mode mode, const_tree type)
9708 unsigned long size;
9710 if (mode != BLKmode)
9711 size = GET_MODE_SIZE (mode);
9712 else
9713 size = int_size_in_bytes (type);
9715 if (TARGET_32BIT)
9716 return (size + 3) >> 2;
9717 else
9718 return (size + 7) >> 3;
9721 /* Use this to flush pending int fields. */
9723 static void
9724 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9725 HOST_WIDE_INT bitpos, int final)
9727 unsigned int startbit, endbit;
9728 int intregs, intoffset;
9729 machine_mode mode;
9731 /* Handle the situations where a float is taking up the first half
9732 of the GPR, and the other half is empty (typically due to
9733 alignment restrictions). We can detect this by a 8-byte-aligned
9734 int field, or by seeing that this is the final flush for this
9735 argument. Count the word and continue on. */
9736 if (cum->floats_in_gpr == 1
9737 && (cum->intoffset % 64 == 0
9738 || (cum->intoffset == -1 && final)))
9740 cum->words++;
9741 cum->floats_in_gpr = 0;
9744 if (cum->intoffset == -1)
9745 return;
9747 intoffset = cum->intoffset;
9748 cum->intoffset = -1;
9749 cum->floats_in_gpr = 0;
9751 if (intoffset % BITS_PER_WORD != 0)
9753 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9754 MODE_INT, 0);
9755 if (mode == BLKmode)
9757 /* We couldn't find an appropriate mode, which happens,
9758 e.g., in packed structs when there are 3 bytes to load.
9759 Back intoffset back to the beginning of the word in this
9760 case. */
9761 intoffset = intoffset & -BITS_PER_WORD;
9765 startbit = intoffset & -BITS_PER_WORD;
9766 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9767 intregs = (endbit - startbit) / BITS_PER_WORD;
9768 cum->words += intregs;
9769 /* words should be unsigned. */
9770 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9772 int pad = (endbit/BITS_PER_WORD) - cum->words;
9773 cum->words += pad;
9777 /* The darwin64 ABI calls for us to recurse down through structs,
9778 looking for elements passed in registers. Unfortunately, we have
9779 to track int register count here also because of misalignments
9780 in powerpc alignment mode. */
9782 static void
9783 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9784 const_tree type,
9785 HOST_WIDE_INT startbitpos)
9787 tree f;
9789 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9790 if (TREE_CODE (f) == FIELD_DECL)
9792 HOST_WIDE_INT bitpos = startbitpos;
9793 tree ftype = TREE_TYPE (f);
9794 machine_mode mode;
9795 if (ftype == error_mark_node)
9796 continue;
9797 mode = TYPE_MODE (ftype);
9799 if (DECL_SIZE (f) != 0
9800 && tree_fits_uhwi_p (bit_position (f)))
9801 bitpos += int_bit_position (f);
9803 /* ??? FIXME: else assume zero offset. */
9805 if (TREE_CODE (ftype) == RECORD_TYPE)
9806 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9807 else if (USE_FP_FOR_ARG_P (cum, mode))
9809 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9810 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9811 cum->fregno += n_fpregs;
9812 /* Single-precision floats present a special problem for
9813 us, because they are smaller than an 8-byte GPR, and so
9814 the structure-packing rules combined with the standard
9815 varargs behavior mean that we want to pack float/float
9816 and float/int combinations into a single register's
9817 space. This is complicated by the arg advance flushing,
9818 which works on arbitrarily large groups of int-type
9819 fields. */
9820 if (mode == SFmode)
9822 if (cum->floats_in_gpr == 1)
9824 /* Two floats in a word; count the word and reset
9825 the float count. */
9826 cum->words++;
9827 cum->floats_in_gpr = 0;
9829 else if (bitpos % 64 == 0)
9831 /* A float at the beginning of an 8-byte word;
9832 count it and put off adjusting cum->words until
9833 we see if a arg advance flush is going to do it
9834 for us. */
9835 cum->floats_in_gpr++;
9837 else
9839 /* The float is at the end of a word, preceded
9840 by integer fields, so the arg advance flush
9841 just above has already set cum->words and
9842 everything is taken care of. */
9845 else
9846 cum->words += n_fpregs;
9848 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9850 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9851 cum->vregno++;
9852 cum->words += 2;
9854 else if (cum->intoffset == -1)
9855 cum->intoffset = bitpos;
9859 /* Check for an item that needs to be considered specially under the darwin 64
9860 bit ABI. These are record types where the mode is BLK or the structure is
9861 8 bytes in size. */
9862 static int
9863 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9865 return rs6000_darwin64_abi
9866 && ((mode == BLKmode
9867 && TREE_CODE (type) == RECORD_TYPE
9868 && int_size_in_bytes (type) > 0)
9869 || (type && TREE_CODE (type) == RECORD_TYPE
9870 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9873 /* Update the data in CUM to advance over an argument
9874 of mode MODE and data type TYPE.
9875 (TYPE is null for libcalls where that information may not be available.)
9877 Note that for args passed by reference, function_arg will be called
9878 with MODE and TYPE set to that of the pointer to the arg, not the arg
9879 itself. */
9881 static void
9882 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9883 const_tree type, bool named, int depth)
9885 machine_mode elt_mode;
9886 int n_elts;
9888 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9890 /* Only tick off an argument if we're not recursing. */
9891 if (depth == 0)
9892 cum->nargs_prototype--;
9894 #ifdef HAVE_AS_GNU_ATTRIBUTE
9895 if (DEFAULT_ABI == ABI_V4
9896 && cum->escapes)
9898 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode))
9899 rs6000_passes_float = true;
9900 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9901 rs6000_passes_vector = true;
9902 else if (SPE_VECTOR_MODE (mode)
9903 && !cum->stdarg
9904 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9905 rs6000_passes_vector = true;
9907 #endif
9909 if (TARGET_ALTIVEC_ABI
9910 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9911 || (type && TREE_CODE (type) == VECTOR_TYPE
9912 && int_size_in_bytes (type) == 16)))
9914 bool stack = false;
9916 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9918 cum->vregno += n_elts;
9920 if (!TARGET_ALTIVEC)
9921 error ("cannot pass argument in vector register because"
9922 " altivec instructions are disabled, use -maltivec"
9923 " to enable them");
9925 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9926 even if it is going to be passed in a vector register.
9927 Darwin does the same for variable-argument functions. */
9928 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9929 && TARGET_64BIT)
9930 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9931 stack = true;
9933 else
9934 stack = true;
9936 if (stack)
9938 int align;
9940 /* Vector parameters must be 16-byte aligned. In 32-bit
9941 mode this means we need to take into account the offset
9942 to the parameter save area. In 64-bit mode, they just
9943 have to start on an even word, since the parameter save
9944 area is 16-byte aligned. */
9945 if (TARGET_32BIT)
9946 align = -(rs6000_parm_offset () + cum->words) & 3;
9947 else
9948 align = cum->words & 1;
9949 cum->words += align + rs6000_arg_size (mode, type);
9951 if (TARGET_DEBUG_ARG)
9953 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9954 cum->words, align);
9955 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9956 cum->nargs_prototype, cum->prototype,
9957 GET_MODE_NAME (mode));
9961 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9962 && !cum->stdarg
9963 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9964 cum->sysv_gregno++;
9966 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9968 int size = int_size_in_bytes (type);
9969 /* Variable sized types have size == -1 and are
9970 treated as if consisting entirely of ints.
9971 Pad to 16 byte boundary if needed. */
9972 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9973 && (cum->words % 2) != 0)
9974 cum->words++;
9975 /* For varargs, we can just go up by the size of the struct. */
9976 if (!named)
9977 cum->words += (size + 7) / 8;
9978 else
9980 /* It is tempting to say int register count just goes up by
9981 sizeof(type)/8, but this is wrong in a case such as
9982 { int; double; int; } [powerpc alignment]. We have to
9983 grovel through the fields for these too. */
9984 cum->intoffset = 0;
9985 cum->floats_in_gpr = 0;
9986 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9987 rs6000_darwin64_record_arg_advance_flush (cum,
9988 size * BITS_PER_UNIT, 1);
9990 if (TARGET_DEBUG_ARG)
9992 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9993 cum->words, TYPE_ALIGN (type), size);
9994 fprintf (stderr,
9995 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9996 cum->nargs_prototype, cum->prototype,
9997 GET_MODE_NAME (mode));
10000 else if (DEFAULT_ABI == ABI_V4)
10002 if (TARGET_HARD_FLOAT && TARGET_FPRS
10003 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10004 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10005 || FLOAT128_2REG_P (mode)
10006 || DECIMAL_FLOAT_MODE_P (mode)))
10008 /* _Decimal128 must use an even/odd register pair. This assumes
10009 that the register number is odd when fregno is odd. */
10010 if (mode == TDmode && (cum->fregno % 2) == 1)
10011 cum->fregno++;
10013 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10014 <= FP_ARG_V4_MAX_REG)
10015 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
10016 else
10018 cum->fregno = FP_ARG_V4_MAX_REG + 1;
10019 if (mode == DFmode || FLOAT128_IBM_P (mode)
10020 || mode == DDmode || mode == TDmode)
10021 cum->words += cum->words & 1;
10022 cum->words += rs6000_arg_size (mode, type);
10025 else
10027 int n_words = rs6000_arg_size (mode, type);
10028 int gregno = cum->sysv_gregno;
10030 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10031 (r7,r8) or (r9,r10). As does any other 2 word item such
10032 as complex int due to a historical mistake. */
10033 if (n_words == 2)
10034 gregno += (1 - gregno) & 1;
10036 /* Multi-reg args are not split between registers and stack. */
10037 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10039 /* Long long and SPE vectors are aligned on the stack.
10040 So are other 2 word items such as complex int due to
10041 a historical mistake. */
10042 if (n_words == 2)
10043 cum->words += cum->words & 1;
10044 cum->words += n_words;
10047 /* Note: continuing to accumulate gregno past when we've started
10048 spilling to the stack indicates the fact that we've started
10049 spilling to the stack to expand_builtin_saveregs. */
10050 cum->sysv_gregno = gregno + n_words;
10053 if (TARGET_DEBUG_ARG)
10055 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10056 cum->words, cum->fregno);
10057 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
10058 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
10059 fprintf (stderr, "mode = %4s, named = %d\n",
10060 GET_MODE_NAME (mode), named);
10063 else
10065 int n_words = rs6000_arg_size (mode, type);
10066 int start_words = cum->words;
10067 int align_words = rs6000_parm_start (mode, type, start_words);
10069 cum->words = align_words + n_words;
10071 if (SCALAR_FLOAT_MODE_P (elt_mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
10073 /* _Decimal128 must be passed in an even/odd float register pair.
10074 This assumes that the register number is odd when fregno is
10075 odd. */
10076 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10077 cum->fregno++;
10078 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
10081 if (TARGET_DEBUG_ARG)
10083 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
10084 cum->words, cum->fregno);
10085 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
10086 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
10087 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
10088 named, align_words - start_words, depth);
10093 static void
10094 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
10095 const_tree type, bool named)
10097 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
10101 static rtx
10102 spe_build_register_parallel (machine_mode mode, int gregno)
10104 rtx r1, r3, r5, r7;
10106 switch (mode)
10108 case DFmode:
10109 r1 = gen_rtx_REG (DImode, gregno);
10110 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10111 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
10113 case DCmode:
10114 case TFmode:
10115 r1 = gen_rtx_REG (DImode, gregno);
10116 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10117 r3 = gen_rtx_REG (DImode, gregno + 2);
10118 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10119 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
10121 case TCmode:
10122 r1 = gen_rtx_REG (DImode, gregno);
10123 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
10124 r3 = gen_rtx_REG (DImode, gregno + 2);
10125 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
10126 r5 = gen_rtx_REG (DImode, gregno + 4);
10127 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
10128 r7 = gen_rtx_REG (DImode, gregno + 6);
10129 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
10130 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
10132 default:
10133 gcc_unreachable ();
10137 /* Determine where to put a SIMD argument on the SPE. */
10138 static rtx
10139 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
10140 const_tree type)
10142 int gregno = cum->sysv_gregno;
10144 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
10145 are passed and returned in a pair of GPRs for ABI compatibility. */
10146 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
10147 || mode == DCmode || mode == TCmode))
10149 int n_words = rs6000_arg_size (mode, type);
10151 /* Doubles go in an odd/even register pair (r5/r6, etc). */
10152 if (mode == DFmode)
10153 gregno += (1 - gregno) & 1;
10155 /* Multi-reg args are not split between registers and stack. */
10156 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10157 return NULL_RTX;
10159 return spe_build_register_parallel (mode, gregno);
10161 if (cum->stdarg)
10163 int n_words = rs6000_arg_size (mode, type);
10165 /* SPE vectors are put in odd registers. */
10166 if (n_words == 2 && (gregno & 1) == 0)
10167 gregno += 1;
10169 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
10171 rtx r1, r2;
10172 machine_mode m = SImode;
10174 r1 = gen_rtx_REG (m, gregno);
10175 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
10176 r2 = gen_rtx_REG (m, gregno + 1);
10177 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
10178 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
10180 else
10181 return NULL_RTX;
10183 else
10185 if (gregno <= GP_ARG_MAX_REG)
10186 return gen_rtx_REG (mode, gregno);
10187 else
10188 return NULL_RTX;
10192 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
10193 structure between cum->intoffset and bitpos to integer registers. */
10195 static void
10196 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
10197 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
10199 machine_mode mode;
10200 unsigned int regno;
10201 unsigned int startbit, endbit;
10202 int this_regno, intregs, intoffset;
10203 rtx reg;
10205 if (cum->intoffset == -1)
10206 return;
10208 intoffset = cum->intoffset;
10209 cum->intoffset = -1;
10211 /* If this is the trailing part of a word, try to only load that
10212 much into the register. Otherwise load the whole register. Note
10213 that in the latter case we may pick up unwanted bits. It's not a
10214 problem at the moment but may wish to revisit. */
10216 if (intoffset % BITS_PER_WORD != 0)
10218 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
10219 MODE_INT, 0);
10220 if (mode == BLKmode)
10222 /* We couldn't find an appropriate mode, which happens,
10223 e.g., in packed structs when there are 3 bytes to load.
10224 Back intoffset back to the beginning of the word in this
10225 case. */
10226 intoffset = intoffset & -BITS_PER_WORD;
10227 mode = word_mode;
10230 else
10231 mode = word_mode;
10233 startbit = intoffset & -BITS_PER_WORD;
10234 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
10235 intregs = (endbit - startbit) / BITS_PER_WORD;
10236 this_regno = cum->words + intoffset / BITS_PER_WORD;
10238 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
10239 cum->use_stack = 1;
10241 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
10242 if (intregs <= 0)
10243 return;
10245 intoffset /= BITS_PER_UNIT;
10248 regno = GP_ARG_MIN_REG + this_regno;
10249 reg = gen_rtx_REG (mode, regno);
10250 rvec[(*k)++] =
10251 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10253 this_regno += 1;
10254 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10255 mode = word_mode;
10256 intregs -= 1;
10258 while (intregs > 0);
10261 /* Recursive workhorse for the following. */
10263 static void
10264 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10265 HOST_WIDE_INT startbitpos, rtx rvec[],
10266 int *k)
10268 tree f;
10270 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10271 if (TREE_CODE (f) == FIELD_DECL)
10273 HOST_WIDE_INT bitpos = startbitpos;
10274 tree ftype = TREE_TYPE (f);
10275 machine_mode mode;
10276 if (ftype == error_mark_node)
10277 continue;
10278 mode = TYPE_MODE (ftype);
10280 if (DECL_SIZE (f) != 0
10281 && tree_fits_uhwi_p (bit_position (f)))
10282 bitpos += int_bit_position (f);
10284 /* ??? FIXME: else assume zero offset. */
10286 if (TREE_CODE (ftype) == RECORD_TYPE)
10287 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10288 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10290 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10291 #if 0
10292 switch (mode)
10294 case SCmode: mode = SFmode; break;
10295 case DCmode: mode = DFmode; break;
10296 case TCmode: mode = TFmode; break;
10297 default: break;
10299 #endif
10300 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10301 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10303 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10304 && (mode == TFmode || mode == TDmode));
10305 /* Long double or _Decimal128 split over regs and memory. */
10306 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10307 cum->use_stack=1;
10309 rvec[(*k)++]
10310 = gen_rtx_EXPR_LIST (VOIDmode,
10311 gen_rtx_REG (mode, cum->fregno++),
10312 GEN_INT (bitpos / BITS_PER_UNIT));
10313 if (FLOAT128_2REG_P (mode))
10314 cum->fregno++;
10316 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10318 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10319 rvec[(*k)++]
10320 = gen_rtx_EXPR_LIST (VOIDmode,
10321 gen_rtx_REG (mode, cum->vregno++),
10322 GEN_INT (bitpos / BITS_PER_UNIT));
10324 else if (cum->intoffset == -1)
10325 cum->intoffset = bitpos;
10329 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10330 the register(s) to be used for each field and subfield of a struct
10331 being passed by value, along with the offset of where the
10332 register's value may be found in the block. FP fields go in FP
10333 register, vector fields go in vector registers, and everything
10334 else goes in int registers, packed as in memory.
10336 This code is also used for function return values. RETVAL indicates
10337 whether this is the case.
10339 Much of this is taken from the SPARC V9 port, which has a similar
10340 calling convention. */
10342 static rtx
10343 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10344 bool named, bool retval)
10346 rtx rvec[FIRST_PSEUDO_REGISTER];
10347 int k = 1, kbase = 1;
10348 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10349 /* This is a copy; modifications are not visible to our caller. */
10350 CUMULATIVE_ARGS copy_cum = *orig_cum;
10351 CUMULATIVE_ARGS *cum = &copy_cum;
10353 /* Pad to 16 byte boundary if needed. */
10354 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10355 && (cum->words % 2) != 0)
10356 cum->words++;
10358 cum->intoffset = 0;
10359 cum->use_stack = 0;
10360 cum->named = named;
10362 /* Put entries into rvec[] for individual FP and vector fields, and
10363 for the chunks of memory that go in int regs. Note we start at
10364 element 1; 0 is reserved for an indication of using memory, and
10365 may or may not be filled in below. */
10366 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10367 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10369 /* If any part of the struct went on the stack put all of it there.
10370 This hack is because the generic code for
10371 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10372 parts of the struct are not at the beginning. */
10373 if (cum->use_stack)
10375 if (retval)
10376 return NULL_RTX; /* doesn't go in registers at all */
10377 kbase = 0;
10378 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10380 if (k > 1 || cum->use_stack)
10381 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10382 else
10383 return NULL_RTX;
10386 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10388 static rtx
10389 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10390 int align_words)
10392 int n_units;
10393 int i, k;
10394 rtx rvec[GP_ARG_NUM_REG + 1];
10396 if (align_words >= GP_ARG_NUM_REG)
10397 return NULL_RTX;
10399 n_units = rs6000_arg_size (mode, type);
10401 /* Optimize the simple case where the arg fits in one gpr, except in
10402 the case of BLKmode due to assign_parms assuming that registers are
10403 BITS_PER_WORD wide. */
10404 if (n_units == 0
10405 || (n_units == 1 && mode != BLKmode))
10406 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10408 k = 0;
10409 if (align_words + n_units > GP_ARG_NUM_REG)
10410 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10411 using a magic NULL_RTX component.
10412 This is not strictly correct. Only some of the arg belongs in
10413 memory, not all of it. However, the normal scheme using
10414 function_arg_partial_nregs can result in unusual subregs, eg.
10415 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10416 store the whole arg to memory is often more efficient than code
10417 to store pieces, and we know that space is available in the right
10418 place for the whole arg. */
10419 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10421 i = 0;
10424 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10425 rtx off = GEN_INT (i++ * 4);
10426 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10428 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10430 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10433 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10434 but must also be copied into the parameter save area starting at
10435 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10436 to the GPRs and/or memory. Return the number of elements used. */
10438 static int
10439 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10440 int align_words, rtx *rvec)
10442 int k = 0;
10444 if (align_words < GP_ARG_NUM_REG)
10446 int n_words = rs6000_arg_size (mode, type);
10448 if (align_words + n_words > GP_ARG_NUM_REG
10449 || mode == BLKmode
10450 || (TARGET_32BIT && TARGET_POWERPC64))
10452 /* If this is partially on the stack, then we only
10453 include the portion actually in registers here. */
10454 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10455 int i = 0;
10457 if (align_words + n_words > GP_ARG_NUM_REG)
10459 /* Not all of the arg fits in gprs. Say that it goes in memory
10460 too, using a magic NULL_RTX component. Also see comment in
10461 rs6000_mixed_function_arg for why the normal
10462 function_arg_partial_nregs scheme doesn't work in this case. */
10463 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10468 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10469 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10470 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10472 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10474 else
10476 /* The whole arg fits in gprs. */
10477 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10478 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10481 else
10483 /* It's entirely in memory. */
10484 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10487 return k;
10490 /* RVEC is a vector of K components of an argument of mode MODE.
10491 Construct the final function_arg return value from it. */
10493 static rtx
10494 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10496 gcc_assert (k >= 1);
10498 /* Avoid returning a PARALLEL in the trivial cases. */
10499 if (k == 1)
10501 if (XEXP (rvec[0], 0) == NULL_RTX)
10502 return NULL_RTX;
10504 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10505 return XEXP (rvec[0], 0);
10508 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10511 /* Determine where to put an argument to a function.
10512 Value is zero to push the argument on the stack,
10513 or a hard register in which to store the argument.
10515 MODE is the argument's machine mode.
10516 TYPE is the data type of the argument (as a tree).
10517 This is null for libcalls where that information may
10518 not be available.
10519 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10520 the preceding args and about the function being called. It is
10521 not modified in this routine.
10522 NAMED is nonzero if this argument is a named parameter
10523 (otherwise it is an extra parameter matching an ellipsis).
10525 On RS/6000 the first eight words of non-FP are normally in registers
10526 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10527 Under V.4, the first 8 FP args are in registers.
10529 If this is floating-point and no prototype is specified, we use
10530 both an FP and integer register (or possibly FP reg and stack). Library
10531 functions (when CALL_LIBCALL is set) always have the proper types for args,
10532 so we can pass the FP value just in one register. emit_library_function
10533 doesn't support PARALLEL anyway.
10535 Note that for args passed by reference, function_arg will be called
10536 with MODE and TYPE set to that of the pointer to the arg, not the arg
10537 itself. */
10539 static rtx
10540 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10541 const_tree type, bool named)
10543 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10544 enum rs6000_abi abi = DEFAULT_ABI;
10545 machine_mode elt_mode;
10546 int n_elts;
10548 /* Return a marker to indicate whether CR1 needs to set or clear the
10549 bit that V.4 uses to say fp args were passed in registers.
10550 Assume that we don't need the marker for software floating point,
10551 or compiler generated library calls. */
10552 if (mode == VOIDmode)
10554 if (abi == ABI_V4
10555 && (cum->call_cookie & CALL_LIBCALL) == 0
10556 && (cum->stdarg
10557 || (cum->nargs_prototype < 0
10558 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10560 /* For the SPE, we need to crxor CR6 always. */
10561 if (TARGET_SPE_ABI)
10562 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10563 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10564 return GEN_INT (cum->call_cookie
10565 | ((cum->fregno == FP_ARG_MIN_REG)
10566 ? CALL_V4_SET_FP_ARGS
10567 : CALL_V4_CLEAR_FP_ARGS));
10570 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10573 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10575 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10577 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10578 if (rslt != NULL_RTX)
10579 return rslt;
10580 /* Else fall through to usual handling. */
10583 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10585 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10586 rtx r, off;
10587 int i, k = 0;
10589 /* Do we also need to pass this argument in the parameter
10590 save area? */
10591 if (TARGET_64BIT && ! cum->prototype)
10593 int align_words = (cum->words + 1) & ~1;
10594 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10597 /* Describe where this argument goes in the vector registers. */
10598 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10600 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10601 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10602 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10605 return rs6000_finish_function_arg (mode, rvec, k);
10607 else if (TARGET_ALTIVEC_ABI
10608 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10609 || (type && TREE_CODE (type) == VECTOR_TYPE
10610 && int_size_in_bytes (type) == 16)))
10612 if (named || abi == ABI_V4)
10613 return NULL_RTX;
10614 else
10616 /* Vector parameters to varargs functions under AIX or Darwin
10617 get passed in memory and possibly also in GPRs. */
10618 int align, align_words, n_words;
10619 machine_mode part_mode;
10621 /* Vector parameters must be 16-byte aligned. In 32-bit
10622 mode this means we need to take into account the offset
10623 to the parameter save area. In 64-bit mode, they just
10624 have to start on an even word, since the parameter save
10625 area is 16-byte aligned. */
10626 if (TARGET_32BIT)
10627 align = -(rs6000_parm_offset () + cum->words) & 3;
10628 else
10629 align = cum->words & 1;
10630 align_words = cum->words + align;
10632 /* Out of registers? Memory, then. */
10633 if (align_words >= GP_ARG_NUM_REG)
10634 return NULL_RTX;
10636 if (TARGET_32BIT && TARGET_POWERPC64)
10637 return rs6000_mixed_function_arg (mode, type, align_words);
10639 /* The vector value goes in GPRs. Only the part of the
10640 value in GPRs is reported here. */
10641 part_mode = mode;
10642 n_words = rs6000_arg_size (mode, type);
10643 if (align_words + n_words > GP_ARG_NUM_REG)
10644 /* Fortunately, there are only two possibilities, the value
10645 is either wholly in GPRs or half in GPRs and half not. */
10646 part_mode = DImode;
10648 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10651 else if (TARGET_SPE_ABI && TARGET_SPE
10652 && (SPE_VECTOR_MODE (mode)
10653 || (TARGET_E500_DOUBLE && (mode == DFmode
10654 || mode == DCmode
10655 || mode == TFmode
10656 || mode == TCmode))))
10657 return rs6000_spe_function_arg (cum, mode, type);
10659 else if (abi == ABI_V4)
10661 if (TARGET_HARD_FLOAT && TARGET_FPRS
10662 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10663 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10664 || FLOAT128_2REG_P (mode)
10665 || DECIMAL_FLOAT_MODE_P (mode)))
10667 /* _Decimal128 must use an even/odd register pair. This assumes
10668 that the register number is odd when fregno is odd. */
10669 if (mode == TDmode && (cum->fregno % 2) == 1)
10670 cum->fregno++;
10672 if (cum->fregno + (FLOAT128_2REG_P (mode) ? 1 : 0)
10673 <= FP_ARG_V4_MAX_REG)
10674 return gen_rtx_REG (mode, cum->fregno);
10675 else
10676 return NULL_RTX;
10678 else
10680 int n_words = rs6000_arg_size (mode, type);
10681 int gregno = cum->sysv_gregno;
10683 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10684 (r7,r8) or (r9,r10). As does any other 2 word item such
10685 as complex int due to a historical mistake. */
10686 if (n_words == 2)
10687 gregno += (1 - gregno) & 1;
10689 /* Multi-reg args are not split between registers and stack. */
10690 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10691 return NULL_RTX;
10693 if (TARGET_32BIT && TARGET_POWERPC64)
10694 return rs6000_mixed_function_arg (mode, type,
10695 gregno - GP_ARG_MIN_REG);
10696 return gen_rtx_REG (mode, gregno);
10699 else
10701 int align_words = rs6000_parm_start (mode, type, cum->words);
10703 /* _Decimal128 must be passed in an even/odd float register pair.
10704 This assumes that the register number is odd when fregno is odd. */
10705 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10706 cum->fregno++;
10708 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10710 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10711 rtx r, off;
10712 int i, k = 0;
10713 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10714 int fpr_words;
10716 /* Do we also need to pass this argument in the parameter
10717 save area? */
10718 if (type && (cum->nargs_prototype <= 0
10719 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10720 && TARGET_XL_COMPAT
10721 && align_words >= GP_ARG_NUM_REG)))
10722 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10724 /* Describe where this argument goes in the fprs. */
10725 for (i = 0; i < n_elts
10726 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10728 /* Check if the argument is split over registers and memory.
10729 This can only ever happen for long double or _Decimal128;
10730 complex types are handled via split_complex_arg. */
10731 machine_mode fmode = elt_mode;
10732 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10734 gcc_assert (FLOAT128_2REG_P (fmode));
10735 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10738 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10739 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10740 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10743 /* If there were not enough FPRs to hold the argument, the rest
10744 usually goes into memory. However, if the current position
10745 is still within the register parameter area, a portion may
10746 actually have to go into GPRs.
10748 Note that it may happen that the portion of the argument
10749 passed in the first "half" of the first GPR was already
10750 passed in the last FPR as well.
10752 For unnamed arguments, we already set up GPRs to cover the
10753 whole argument in rs6000_psave_function_arg, so there is
10754 nothing further to do at this point. */
10755 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10756 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10757 && cum->nargs_prototype > 0)
10759 static bool warned;
10761 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10762 int n_words = rs6000_arg_size (mode, type);
10764 align_words += fpr_words;
10765 n_words -= fpr_words;
10769 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10770 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10771 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10773 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10775 if (!warned && warn_psabi)
10777 warned = true;
10778 inform (input_location,
10779 "the ABI of passing homogeneous float aggregates"
10780 " has changed in GCC 5");
10784 return rs6000_finish_function_arg (mode, rvec, k);
10786 else if (align_words < GP_ARG_NUM_REG)
10788 if (TARGET_32BIT && TARGET_POWERPC64)
10789 return rs6000_mixed_function_arg (mode, type, align_words);
10791 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10793 else
10794 return NULL_RTX;
10798 /* For an arg passed partly in registers and partly in memory, this is
10799 the number of bytes passed in registers. For args passed entirely in
10800 registers or entirely in memory, zero. When an arg is described by a
10801 PARALLEL, perhaps using more than one register type, this function
10802 returns the number of bytes used by the first element of the PARALLEL. */
10804 static int
10805 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10806 tree type, bool named)
10808 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10809 bool passed_in_gprs = true;
10810 int ret = 0;
10811 int align_words;
10812 machine_mode elt_mode;
10813 int n_elts;
10815 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10817 if (DEFAULT_ABI == ABI_V4)
10818 return 0;
10820 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10822 /* If we are passing this arg in the fixed parameter save area
10823 (gprs or memory) as well as VRs, we do not use the partial
10824 bytes mechanism; instead, rs6000_function_arg will return a
10825 PARALLEL including a memory element as necessary. */
10826 if (TARGET_64BIT && ! cum->prototype)
10827 return 0;
10829 /* Otherwise, we pass in VRs only. Check for partial copies. */
10830 passed_in_gprs = false;
10831 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10832 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10835 /* In this complicated case we just disable the partial_nregs code. */
10836 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10837 return 0;
10839 align_words = rs6000_parm_start (mode, type, cum->words);
10841 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10843 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10845 /* If we are passing this arg in the fixed parameter save area
10846 (gprs or memory) as well as FPRs, we do not use the partial
10847 bytes mechanism; instead, rs6000_function_arg will return a
10848 PARALLEL including a memory element as necessary. */
10849 if (type
10850 && (cum->nargs_prototype <= 0
10851 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10852 && TARGET_XL_COMPAT
10853 && align_words >= GP_ARG_NUM_REG)))
10854 return 0;
10856 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10857 passed_in_gprs = false;
10858 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10860 /* Compute number of bytes / words passed in FPRs. If there
10861 is still space available in the register parameter area
10862 *after* that amount, a part of the argument will be passed
10863 in GPRs. In that case, the total amount passed in any
10864 registers is equal to the amount that would have been passed
10865 in GPRs if everything were passed there, so we fall back to
10866 the GPR code below to compute the appropriate value. */
10867 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10868 * MIN (8, GET_MODE_SIZE (elt_mode)));
10869 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10871 if (align_words + fpr_words < GP_ARG_NUM_REG)
10872 passed_in_gprs = true;
10873 else
10874 ret = fpr;
10878 if (passed_in_gprs
10879 && align_words < GP_ARG_NUM_REG
10880 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10881 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10883 if (ret != 0 && TARGET_DEBUG_ARG)
10884 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10886 return ret;
10889 /* A C expression that indicates when an argument must be passed by
10890 reference. If nonzero for an argument, a copy of that argument is
10891 made in memory and a pointer to the argument is passed instead of
10892 the argument itself. The pointer is passed in whatever way is
10893 appropriate for passing a pointer to that type.
10895 Under V.4, aggregates and long double are passed by reference.
10897 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10898 reference unless the AltiVec vector extension ABI is in force.
10900 As an extension to all ABIs, variable sized types are passed by
10901 reference. */
10903 static bool
10904 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10905 machine_mode mode, const_tree type,
10906 bool named ATTRIBUTE_UNUSED)
10908 if (!type)
10909 return 0;
10911 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD
10912 && FLOAT128_IEEE_P (TYPE_MODE (type)))
10914 if (TARGET_DEBUG_ARG)
10915 fprintf (stderr, "function_arg_pass_by_reference: V4 IEEE 128-bit\n");
10916 return 1;
10919 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10921 if (TARGET_DEBUG_ARG)
10922 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10923 return 1;
10926 if (int_size_in_bytes (type) < 0)
10928 if (TARGET_DEBUG_ARG)
10929 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10930 return 1;
10933 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10934 modes only exist for GCC vector types if -maltivec. */
10935 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10937 if (TARGET_DEBUG_ARG)
10938 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10939 return 1;
10942 /* Pass synthetic vectors in memory. */
10943 if (TREE_CODE (type) == VECTOR_TYPE
10944 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10946 static bool warned_for_pass_big_vectors = false;
10947 if (TARGET_DEBUG_ARG)
10948 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10949 if (!warned_for_pass_big_vectors)
10951 warning (0, "GCC vector passed by reference: "
10952 "non-standard ABI extension with no compatibility guarantee");
10953 warned_for_pass_big_vectors = true;
10955 return 1;
10958 return 0;
10961 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10962 already processes. Return true if the parameter must be passed
10963 (fully or partially) on the stack. */
10965 static bool
10966 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10968 machine_mode mode;
10969 int unsignedp;
10970 rtx entry_parm;
10972 /* Catch errors. */
10973 if (type == NULL || type == error_mark_node)
10974 return true;
10976 /* Handle types with no storage requirement. */
10977 if (TYPE_MODE (type) == VOIDmode)
10978 return false;
10980 /* Handle complex types. */
10981 if (TREE_CODE (type) == COMPLEX_TYPE)
10982 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10983 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10985 /* Handle transparent aggregates. */
10986 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10987 && TYPE_TRANSPARENT_AGGR (type))
10988 type = TREE_TYPE (first_field (type));
10990 /* See if this arg was passed by invisible reference. */
10991 if (pass_by_reference (get_cumulative_args (args_so_far),
10992 TYPE_MODE (type), type, true))
10993 type = build_pointer_type (type);
10995 /* Find mode as it is passed by the ABI. */
10996 unsignedp = TYPE_UNSIGNED (type);
10997 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10999 /* If we must pass in stack, we need a stack. */
11000 if (rs6000_must_pass_in_stack (mode, type))
11001 return true;
11003 /* If there is no incoming register, we need a stack. */
11004 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
11005 if (entry_parm == NULL)
11006 return true;
11008 /* Likewise if we need to pass both in registers and on the stack. */
11009 if (GET_CODE (entry_parm) == PARALLEL
11010 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
11011 return true;
11013 /* Also true if we're partially in registers and partially not. */
11014 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
11015 return true;
11017 /* Update info on where next arg arrives in registers. */
11018 rs6000_function_arg_advance (args_so_far, mode, type, true);
11019 return false;
11022 /* Return true if FUN has no prototype, has a variable argument
11023 list, or passes any parameter in memory. */
11025 static bool
11026 rs6000_function_parms_need_stack (tree fun, bool incoming)
11028 tree fntype, result;
11029 CUMULATIVE_ARGS args_so_far_v;
11030 cumulative_args_t args_so_far;
11032 if (!fun)
11033 /* Must be a libcall, all of which only use reg parms. */
11034 return false;
11036 fntype = fun;
11037 if (!TYPE_P (fun))
11038 fntype = TREE_TYPE (fun);
11040 /* Varargs functions need the parameter save area. */
11041 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
11042 return true;
11044 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
11045 args_so_far = pack_cumulative_args (&args_so_far_v);
11047 /* When incoming, we will have been passed the function decl.
11048 It is necessary to use the decl to handle K&R style functions,
11049 where TYPE_ARG_TYPES may not be available. */
11050 if (incoming)
11052 gcc_assert (DECL_P (fun));
11053 result = DECL_RESULT (fun);
11055 else
11056 result = TREE_TYPE (fntype);
11058 if (result && aggregate_value_p (result, fntype))
11060 if (!TYPE_P (result))
11061 result = TREE_TYPE (result);
11062 result = build_pointer_type (result);
11063 rs6000_parm_needs_stack (args_so_far, result);
11066 if (incoming)
11068 tree parm;
11070 for (parm = DECL_ARGUMENTS (fun);
11071 parm && parm != void_list_node;
11072 parm = TREE_CHAIN (parm))
11073 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
11074 return true;
11076 else
11078 function_args_iterator args_iter;
11079 tree arg_type;
11081 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
11082 if (rs6000_parm_needs_stack (args_so_far, arg_type))
11083 return true;
11086 return false;
11089 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
11090 usually a constant depending on the ABI. However, in the ELFv2 ABI
11091 the register parameter area is optional when calling a function that
11092 has a prototype is scope, has no variable argument list, and passes
11093 all parameters in registers. */
11096 rs6000_reg_parm_stack_space (tree fun, bool incoming)
11098 int reg_parm_stack_space;
11100 switch (DEFAULT_ABI)
11102 default:
11103 reg_parm_stack_space = 0;
11104 break;
11106 case ABI_AIX:
11107 case ABI_DARWIN:
11108 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11109 break;
11111 case ABI_ELFv2:
11112 /* ??? Recomputing this every time is a bit expensive. Is there
11113 a place to cache this information? */
11114 if (rs6000_function_parms_need_stack (fun, incoming))
11115 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
11116 else
11117 reg_parm_stack_space = 0;
11118 break;
11121 return reg_parm_stack_space;
11124 static void
11125 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
11127 int i;
11128 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
11130 if (nregs == 0)
11131 return;
11133 for (i = 0; i < nregs; i++)
11135 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
11136 if (reload_completed)
11138 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
11139 tem = NULL_RTX;
11140 else
11141 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
11142 i * GET_MODE_SIZE (reg_mode));
11144 else
11145 tem = replace_equiv_address (tem, XEXP (tem, 0));
11147 gcc_assert (tem);
11149 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
11153 /* Perform any needed actions needed for a function that is receiving a
11154 variable number of arguments.
11156 CUM is as above.
11158 MODE and TYPE are the mode and type of the current parameter.
11160 PRETEND_SIZE is a variable that should be set to the amount of stack
11161 that must be pushed by the prolog to pretend that our caller pushed
11164 Normally, this macro will push all remaining incoming registers on the
11165 stack and set PRETEND_SIZE to the length of the registers pushed. */
11167 static void
11168 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
11169 tree type, int *pretend_size ATTRIBUTE_UNUSED,
11170 int no_rtl)
11172 CUMULATIVE_ARGS next_cum;
11173 int reg_size = TARGET_32BIT ? 4 : 8;
11174 rtx save_area = NULL_RTX, mem;
11175 int first_reg_offset;
11176 alias_set_type set;
11178 /* Skip the last named argument. */
11179 next_cum = *get_cumulative_args (cum);
11180 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
11182 if (DEFAULT_ABI == ABI_V4)
11184 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
11186 if (! no_rtl)
11188 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
11189 HOST_WIDE_INT offset = 0;
11191 /* Try to optimize the size of the varargs save area.
11192 The ABI requires that ap.reg_save_area is doubleword
11193 aligned, but we don't need to allocate space for all
11194 the bytes, only those to which we actually will save
11195 anything. */
11196 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
11197 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
11198 if (TARGET_HARD_FLOAT && TARGET_FPRS
11199 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11200 && cfun->va_list_fpr_size)
11202 if (gpr_reg_num)
11203 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
11204 * UNITS_PER_FP_WORD;
11205 if (cfun->va_list_fpr_size
11206 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11207 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
11208 else
11209 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
11210 * UNITS_PER_FP_WORD;
11212 if (gpr_reg_num)
11214 offset = -((first_reg_offset * reg_size) & ~7);
11215 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
11217 gpr_reg_num = cfun->va_list_gpr_size;
11218 if (reg_size == 4 && (first_reg_offset & 1))
11219 gpr_reg_num++;
11221 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
11223 else if (fpr_size)
11224 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
11225 * UNITS_PER_FP_WORD
11226 - (int) (GP_ARG_NUM_REG * reg_size);
11228 if (gpr_size + fpr_size)
11230 rtx reg_save_area
11231 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
11232 gcc_assert (GET_CODE (reg_save_area) == MEM);
11233 reg_save_area = XEXP (reg_save_area, 0);
11234 if (GET_CODE (reg_save_area) == PLUS)
11236 gcc_assert (XEXP (reg_save_area, 0)
11237 == virtual_stack_vars_rtx);
11238 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
11239 offset += INTVAL (XEXP (reg_save_area, 1));
11241 else
11242 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11245 cfun->machine->varargs_save_offset = offset;
11246 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11249 else
11251 first_reg_offset = next_cum.words;
11252 save_area = crtl->args.internal_arg_pointer;
11254 if (targetm.calls.must_pass_in_stack (mode, type))
11255 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11258 set = get_varargs_alias_set ();
11259 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11260 && cfun->va_list_gpr_size)
11262 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11264 if (va_list_gpr_counter_field)
11265 /* V4 va_list_gpr_size counts number of registers needed. */
11266 n_gpr = cfun->va_list_gpr_size;
11267 else
11268 /* char * va_list instead counts number of bytes needed. */
11269 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11271 if (nregs > n_gpr)
11272 nregs = n_gpr;
11274 mem = gen_rtx_MEM (BLKmode,
11275 plus_constant (Pmode, save_area,
11276 first_reg_offset * reg_size));
11277 MEM_NOTRAP_P (mem) = 1;
11278 set_mem_alias_set (mem, set);
11279 set_mem_align (mem, BITS_PER_WORD);
11281 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11282 nregs);
11285 /* Save FP registers if needed. */
11286 if (DEFAULT_ABI == ABI_V4
11287 && TARGET_HARD_FLOAT && TARGET_FPRS
11288 && ! no_rtl
11289 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11290 && cfun->va_list_fpr_size)
11292 int fregno = next_cum.fregno, nregs;
11293 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11294 rtx lab = gen_label_rtx ();
11295 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11296 * UNITS_PER_FP_WORD);
11298 emit_jump_insn
11299 (gen_rtx_SET (pc_rtx,
11300 gen_rtx_IF_THEN_ELSE (VOIDmode,
11301 gen_rtx_NE (VOIDmode, cr1,
11302 const0_rtx),
11303 gen_rtx_LABEL_REF (VOIDmode, lab),
11304 pc_rtx)));
11306 for (nregs = 0;
11307 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11308 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11310 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11311 ? DFmode : SFmode,
11312 plus_constant (Pmode, save_area, off));
11313 MEM_NOTRAP_P (mem) = 1;
11314 set_mem_alias_set (mem, set);
11315 set_mem_align (mem, GET_MODE_ALIGNMENT (
11316 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11317 ? DFmode : SFmode));
11318 emit_move_insn (mem, gen_rtx_REG (
11319 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11320 ? DFmode : SFmode, fregno));
11323 emit_label (lab);
11327 /* Create the va_list data type. */
11329 static tree
11330 rs6000_build_builtin_va_list (void)
11332 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11334 /* For AIX, prefer 'char *' because that's what the system
11335 header files like. */
11336 if (DEFAULT_ABI != ABI_V4)
11337 return build_pointer_type (char_type_node);
11339 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11340 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11341 get_identifier ("__va_list_tag"), record);
11343 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11344 unsigned_char_type_node);
11345 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11346 unsigned_char_type_node);
11347 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11348 every user file. */
11349 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11350 get_identifier ("reserved"), short_unsigned_type_node);
11351 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11352 get_identifier ("overflow_arg_area"),
11353 ptr_type_node);
11354 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11355 get_identifier ("reg_save_area"),
11356 ptr_type_node);
11358 va_list_gpr_counter_field = f_gpr;
11359 va_list_fpr_counter_field = f_fpr;
11361 DECL_FIELD_CONTEXT (f_gpr) = record;
11362 DECL_FIELD_CONTEXT (f_fpr) = record;
11363 DECL_FIELD_CONTEXT (f_res) = record;
11364 DECL_FIELD_CONTEXT (f_ovf) = record;
11365 DECL_FIELD_CONTEXT (f_sav) = record;
11367 TYPE_STUB_DECL (record) = type_decl;
11368 TYPE_NAME (record) = type_decl;
11369 TYPE_FIELDS (record) = f_gpr;
11370 DECL_CHAIN (f_gpr) = f_fpr;
11371 DECL_CHAIN (f_fpr) = f_res;
11372 DECL_CHAIN (f_res) = f_ovf;
11373 DECL_CHAIN (f_ovf) = f_sav;
11375 layout_type (record);
11377 /* The correct type is an array type of one element. */
11378 return build_array_type (record, build_index_type (size_zero_node));
11381 /* Implement va_start. */
11383 static void
11384 rs6000_va_start (tree valist, rtx nextarg)
11386 HOST_WIDE_INT words, n_gpr, n_fpr;
11387 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11388 tree gpr, fpr, ovf, sav, t;
11390 /* Only SVR4 needs something special. */
11391 if (DEFAULT_ABI != ABI_V4)
11393 std_expand_builtin_va_start (valist, nextarg);
11394 return;
11397 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11398 f_fpr = DECL_CHAIN (f_gpr);
11399 f_res = DECL_CHAIN (f_fpr);
11400 f_ovf = DECL_CHAIN (f_res);
11401 f_sav = DECL_CHAIN (f_ovf);
11403 valist = build_simple_mem_ref (valist);
11404 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11405 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11406 f_fpr, NULL_TREE);
11407 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11408 f_ovf, NULL_TREE);
11409 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11410 f_sav, NULL_TREE);
11412 /* Count number of gp and fp argument registers used. */
11413 words = crtl->args.info.words;
11414 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11415 GP_ARG_NUM_REG);
11416 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11417 FP_ARG_NUM_REG);
11419 if (TARGET_DEBUG_ARG)
11420 fprintf (stderr, "va_start: words = " HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11421 HOST_WIDE_INT_PRINT_DEC", n_fpr = " HOST_WIDE_INT_PRINT_DEC"\n",
11422 words, n_gpr, n_fpr);
11424 if (cfun->va_list_gpr_size)
11426 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11427 build_int_cst (NULL_TREE, n_gpr));
11428 TREE_SIDE_EFFECTS (t) = 1;
11429 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11432 if (cfun->va_list_fpr_size)
11434 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11435 build_int_cst (NULL_TREE, n_fpr));
11436 TREE_SIDE_EFFECTS (t) = 1;
11437 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11439 #ifdef HAVE_AS_GNU_ATTRIBUTE
11440 if (call_ABI_of_interest (cfun->decl))
11441 rs6000_passes_float = true;
11442 #endif
11445 /* Find the overflow area. */
11446 t = make_tree (TREE_TYPE (ovf), crtl->args.internal_arg_pointer);
11447 if (words != 0)
11448 t = fold_build_pointer_plus_hwi (t, words * MIN_UNITS_PER_WORD);
11449 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11450 TREE_SIDE_EFFECTS (t) = 1;
11451 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11453 /* If there were no va_arg invocations, don't set up the register
11454 save area. */
11455 if (!cfun->va_list_gpr_size
11456 && !cfun->va_list_fpr_size
11457 && n_gpr < GP_ARG_NUM_REG
11458 && n_fpr < FP_ARG_V4_MAX_REG)
11459 return;
11461 /* Find the register save area. */
11462 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11463 if (cfun->machine->varargs_save_offset)
11464 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11465 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11466 TREE_SIDE_EFFECTS (t) = 1;
11467 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11470 /* Implement va_arg. */
11472 static tree
11473 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11474 gimple_seq *post_p)
11476 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11477 tree gpr, fpr, ovf, sav, reg, t, u;
11478 int size, rsize, n_reg, sav_ofs, sav_scale;
11479 tree lab_false, lab_over, addr;
11480 int align;
11481 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11482 int regalign = 0;
11483 gimple stmt;
11485 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11487 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11488 return build_va_arg_indirect_ref (t);
11491 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11492 earlier version of gcc, with the property that it always applied alignment
11493 adjustments to the va-args (even for zero-sized types). The cheapest way
11494 to deal with this is to replicate the effect of the part of
11495 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11496 of relevance.
11497 We don't need to check for pass-by-reference because of the test above.
11498 We can return a simplifed answer, since we know there's no offset to add. */
11500 if (((TARGET_MACHO
11501 && rs6000_darwin64_abi)
11502 || DEFAULT_ABI == ABI_ELFv2
11503 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11504 && integer_zerop (TYPE_SIZE (type)))
11506 unsigned HOST_WIDE_INT align, boundary;
11507 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11508 align = PARM_BOUNDARY / BITS_PER_UNIT;
11509 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11510 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11511 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11512 boundary /= BITS_PER_UNIT;
11513 if (boundary > align)
11515 tree t ;
11516 /* This updates arg ptr by the amount that would be necessary
11517 to align the zero-sized (but not zero-alignment) item. */
11518 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11519 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11520 gimplify_and_add (t, pre_p);
11522 t = fold_convert (sizetype, valist_tmp);
11523 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11524 fold_convert (TREE_TYPE (valist),
11525 fold_build2 (BIT_AND_EXPR, sizetype, t,
11526 size_int (-boundary))));
11527 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11528 gimplify_and_add (t, pre_p);
11530 /* Since it is zero-sized there's no increment for the item itself. */
11531 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11532 return build_va_arg_indirect_ref (valist_tmp);
11535 if (DEFAULT_ABI != ABI_V4)
11537 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11539 tree elem_type = TREE_TYPE (type);
11540 machine_mode elem_mode = TYPE_MODE (elem_type);
11541 int elem_size = GET_MODE_SIZE (elem_mode);
11543 if (elem_size < UNITS_PER_WORD)
11545 tree real_part, imag_part;
11546 gimple_seq post = NULL;
11548 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11549 &post);
11550 /* Copy the value into a temporary, lest the formal temporary
11551 be reused out from under us. */
11552 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11553 gimple_seq_add_seq (pre_p, post);
11555 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11556 post_p);
11558 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11562 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11565 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11566 f_fpr = DECL_CHAIN (f_gpr);
11567 f_res = DECL_CHAIN (f_fpr);
11568 f_ovf = DECL_CHAIN (f_res);
11569 f_sav = DECL_CHAIN (f_ovf);
11571 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11572 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11573 f_fpr, NULL_TREE);
11574 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11575 f_ovf, NULL_TREE);
11576 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11577 f_sav, NULL_TREE);
11579 size = int_size_in_bytes (type);
11580 rsize = (size + 3) / 4;
11581 align = 1;
11583 if (TARGET_HARD_FLOAT && TARGET_FPRS
11584 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11585 || (TARGET_DOUBLE_FLOAT
11586 && (TYPE_MODE (type) == DFmode
11587 || FLOAT128_2REG_P (TYPE_MODE (type))
11588 || DECIMAL_FLOAT_MODE_P (TYPE_MODE (type))))))
11590 /* FP args go in FP registers, if present. */
11591 reg = fpr;
11592 n_reg = (size + 7) / 8;
11593 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11594 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11595 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11596 align = 8;
11598 else
11600 /* Otherwise into GP registers. */
11601 reg = gpr;
11602 n_reg = rsize;
11603 sav_ofs = 0;
11604 sav_scale = 4;
11605 if (n_reg == 2)
11606 align = 8;
11609 /* Pull the value out of the saved registers.... */
11611 lab_over = NULL;
11612 addr = create_tmp_var (ptr_type_node, "addr");
11614 /* AltiVec vectors never go in registers when -mabi=altivec. */
11615 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11616 align = 16;
11617 else
11619 lab_false = create_artificial_label (input_location);
11620 lab_over = create_artificial_label (input_location);
11622 /* Long long and SPE vectors are aligned in the registers.
11623 As are any other 2 gpr item such as complex int due to a
11624 historical mistake. */
11625 u = reg;
11626 if (n_reg == 2 && reg == gpr)
11628 regalign = 1;
11629 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11630 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11631 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11632 unshare_expr (reg), u);
11634 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11635 reg number is 0 for f1, so we want to make it odd. */
11636 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11638 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11639 build_int_cst (TREE_TYPE (reg), 1));
11640 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11643 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11644 t = build2 (GE_EXPR, boolean_type_node, u, t);
11645 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11646 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11647 gimplify_and_add (t, pre_p);
11649 t = sav;
11650 if (sav_ofs)
11651 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11653 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11654 build_int_cst (TREE_TYPE (reg), n_reg));
11655 u = fold_convert (sizetype, u);
11656 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11657 t = fold_build_pointer_plus (t, u);
11659 /* _Decimal32 varargs are located in the second word of the 64-bit
11660 FP register for 32-bit binaries. */
11661 if (TARGET_32BIT
11662 && TARGET_HARD_FLOAT && TARGET_FPRS
11663 && TYPE_MODE (type) == SDmode)
11664 t = fold_build_pointer_plus_hwi (t, size);
11666 gimplify_assign (addr, t, pre_p);
11668 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11670 stmt = gimple_build_label (lab_false);
11671 gimple_seq_add_stmt (pre_p, stmt);
11673 if ((n_reg == 2 && !regalign) || n_reg > 2)
11675 /* Ensure that we don't find any more args in regs.
11676 Alignment has taken care of for special cases. */
11677 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11681 /* ... otherwise out of the overflow area. */
11683 /* Care for on-stack alignment if needed. */
11684 t = ovf;
11685 if (align != 1)
11687 t = fold_build_pointer_plus_hwi (t, align - 1);
11688 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11689 build_int_cst (TREE_TYPE (t), -align));
11691 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11693 gimplify_assign (unshare_expr (addr), t, pre_p);
11695 t = fold_build_pointer_plus_hwi (t, size);
11696 gimplify_assign (unshare_expr (ovf), t, pre_p);
11698 if (lab_over)
11700 stmt = gimple_build_label (lab_over);
11701 gimple_seq_add_stmt (pre_p, stmt);
11704 if (STRICT_ALIGNMENT
11705 && (TYPE_ALIGN (type)
11706 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11708 /* The value (of type complex double, for example) may not be
11709 aligned in memory in the saved registers, so copy via a
11710 temporary. (This is the same code as used for SPARC.) */
11711 tree tmp = create_tmp_var (type, "va_arg_tmp");
11712 tree dest_addr = build_fold_addr_expr (tmp);
11714 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11715 3, dest_addr, addr, size_int (rsize * 4));
11717 gimplify_and_add (copy, pre_p);
11718 addr = dest_addr;
11721 addr = fold_convert (ptrtype, addr);
11722 return build_va_arg_indirect_ref (addr);
11725 /* Builtins. */
11727 static void
11728 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11730 tree t;
11731 unsigned classify = rs6000_builtin_info[(int)code].attr;
11732 const char *attr_string = "";
11734 gcc_assert (name != NULL);
11735 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11737 if (rs6000_builtin_decls[(int)code])
11738 fatal_error (input_location,
11739 "internal error: builtin function %s already processed", name);
11741 rs6000_builtin_decls[(int)code] = t =
11742 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11744 /* Set any special attributes. */
11745 if ((classify & RS6000_BTC_CONST) != 0)
11747 /* const function, function only depends on the inputs. */
11748 TREE_READONLY (t) = 1;
11749 TREE_NOTHROW (t) = 1;
11750 attr_string = ", pure";
11752 else if ((classify & RS6000_BTC_PURE) != 0)
11754 /* pure function, function can read global memory, but does not set any
11755 external state. */
11756 DECL_PURE_P (t) = 1;
11757 TREE_NOTHROW (t) = 1;
11758 attr_string = ", const";
11760 else if ((classify & RS6000_BTC_FP) != 0)
11762 /* Function is a math function. If rounding mode is on, then treat the
11763 function as not reading global memory, but it can have arbitrary side
11764 effects. If it is off, then assume the function is a const function.
11765 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11766 builtin-attribute.def that is used for the math functions. */
11767 TREE_NOTHROW (t) = 1;
11768 if (flag_rounding_math)
11770 DECL_PURE_P (t) = 1;
11771 DECL_IS_NOVOPS (t) = 1;
11772 attr_string = ", fp, pure";
11774 else
11776 TREE_READONLY (t) = 1;
11777 attr_string = ", fp, const";
11780 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11781 gcc_unreachable ();
11783 if (TARGET_DEBUG_BUILTIN)
11784 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11785 (int)code, name, attr_string);
11788 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11790 #undef RS6000_BUILTIN_1
11791 #undef RS6000_BUILTIN_2
11792 #undef RS6000_BUILTIN_3
11793 #undef RS6000_BUILTIN_A
11794 #undef RS6000_BUILTIN_D
11795 #undef RS6000_BUILTIN_E
11796 #undef RS6000_BUILTIN_H
11797 #undef RS6000_BUILTIN_P
11798 #undef RS6000_BUILTIN_Q
11799 #undef RS6000_BUILTIN_S
11800 #undef RS6000_BUILTIN_X
11802 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11803 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11804 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11805 { MASK, ICODE, NAME, ENUM },
11807 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11808 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11809 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11810 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11811 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11812 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11813 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11814 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11816 static const struct builtin_description bdesc_3arg[] =
11818 #include "rs6000-builtin.def"
11821 /* DST operations: void foo (void *, const int, const char). */
11823 #undef RS6000_BUILTIN_1
11824 #undef RS6000_BUILTIN_2
11825 #undef RS6000_BUILTIN_3
11826 #undef RS6000_BUILTIN_A
11827 #undef RS6000_BUILTIN_D
11828 #undef RS6000_BUILTIN_E
11829 #undef RS6000_BUILTIN_H
11830 #undef RS6000_BUILTIN_P
11831 #undef RS6000_BUILTIN_Q
11832 #undef RS6000_BUILTIN_S
11833 #undef RS6000_BUILTIN_X
11835 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11836 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11837 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11838 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11839 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11840 { MASK, ICODE, NAME, ENUM },
11842 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11843 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11844 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11845 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11846 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11847 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11849 static const struct builtin_description bdesc_dst[] =
11851 #include "rs6000-builtin.def"
11854 /* Simple binary operations: VECc = foo (VECa, VECb). */
11856 #undef RS6000_BUILTIN_1
11857 #undef RS6000_BUILTIN_2
11858 #undef RS6000_BUILTIN_3
11859 #undef RS6000_BUILTIN_A
11860 #undef RS6000_BUILTIN_D
11861 #undef RS6000_BUILTIN_E
11862 #undef RS6000_BUILTIN_H
11863 #undef RS6000_BUILTIN_P
11864 #undef RS6000_BUILTIN_Q
11865 #undef RS6000_BUILTIN_S
11866 #undef RS6000_BUILTIN_X
11868 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11869 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11870 { MASK, ICODE, NAME, ENUM },
11872 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11873 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11874 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11875 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11876 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11877 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11878 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11879 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11880 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11882 static const struct builtin_description bdesc_2arg[] =
11884 #include "rs6000-builtin.def"
11887 #undef RS6000_BUILTIN_1
11888 #undef RS6000_BUILTIN_2
11889 #undef RS6000_BUILTIN_3
11890 #undef RS6000_BUILTIN_A
11891 #undef RS6000_BUILTIN_D
11892 #undef RS6000_BUILTIN_E
11893 #undef RS6000_BUILTIN_H
11894 #undef RS6000_BUILTIN_P
11895 #undef RS6000_BUILTIN_Q
11896 #undef RS6000_BUILTIN_S
11897 #undef RS6000_BUILTIN_X
11899 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11900 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11901 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11902 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11903 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11904 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11905 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11906 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11907 { MASK, ICODE, NAME, ENUM },
11909 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11910 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11911 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11913 /* AltiVec predicates. */
11915 static const struct builtin_description bdesc_altivec_preds[] =
11917 #include "rs6000-builtin.def"
11920 /* SPE predicates. */
11921 #undef RS6000_BUILTIN_1
11922 #undef RS6000_BUILTIN_2
11923 #undef RS6000_BUILTIN_3
11924 #undef RS6000_BUILTIN_A
11925 #undef RS6000_BUILTIN_D
11926 #undef RS6000_BUILTIN_E
11927 #undef RS6000_BUILTIN_H
11928 #undef RS6000_BUILTIN_P
11929 #undef RS6000_BUILTIN_Q
11930 #undef RS6000_BUILTIN_S
11931 #undef RS6000_BUILTIN_X
11933 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11934 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11935 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11936 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11937 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11938 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11939 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11940 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11941 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11942 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11943 { MASK, ICODE, NAME, ENUM },
11945 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11947 static const struct builtin_description bdesc_spe_predicates[] =
11949 #include "rs6000-builtin.def"
11952 /* SPE evsel predicates. */
11953 #undef RS6000_BUILTIN_1
11954 #undef RS6000_BUILTIN_2
11955 #undef RS6000_BUILTIN_3
11956 #undef RS6000_BUILTIN_A
11957 #undef RS6000_BUILTIN_D
11958 #undef RS6000_BUILTIN_E
11959 #undef RS6000_BUILTIN_H
11960 #undef RS6000_BUILTIN_P
11961 #undef RS6000_BUILTIN_Q
11962 #undef RS6000_BUILTIN_S
11963 #undef RS6000_BUILTIN_X
11965 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11966 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11967 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11968 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11969 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11970 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11971 { MASK, ICODE, NAME, ENUM },
11973 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11974 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11975 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11976 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11977 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11979 static const struct builtin_description bdesc_spe_evsel[] =
11981 #include "rs6000-builtin.def"
11984 /* PAIRED predicates. */
11985 #undef RS6000_BUILTIN_1
11986 #undef RS6000_BUILTIN_2
11987 #undef RS6000_BUILTIN_3
11988 #undef RS6000_BUILTIN_A
11989 #undef RS6000_BUILTIN_D
11990 #undef RS6000_BUILTIN_E
11991 #undef RS6000_BUILTIN_H
11992 #undef RS6000_BUILTIN_P
11993 #undef RS6000_BUILTIN_Q
11994 #undef RS6000_BUILTIN_S
11995 #undef RS6000_BUILTIN_X
11997 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11998 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11999 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12000 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12001 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12002 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12003 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12004 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12005 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
12006 { MASK, ICODE, NAME, ENUM },
12008 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12009 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12011 static const struct builtin_description bdesc_paired_preds[] =
12013 #include "rs6000-builtin.def"
12016 /* ABS* operations. */
12018 #undef RS6000_BUILTIN_1
12019 #undef RS6000_BUILTIN_2
12020 #undef RS6000_BUILTIN_3
12021 #undef RS6000_BUILTIN_A
12022 #undef RS6000_BUILTIN_D
12023 #undef RS6000_BUILTIN_E
12024 #undef RS6000_BUILTIN_H
12025 #undef RS6000_BUILTIN_P
12026 #undef RS6000_BUILTIN_Q
12027 #undef RS6000_BUILTIN_S
12028 #undef RS6000_BUILTIN_X
12030 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12031 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12032 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12033 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
12034 { MASK, ICODE, NAME, ENUM },
12036 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12037 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12038 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12039 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12040 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12041 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12042 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12044 static const struct builtin_description bdesc_abs[] =
12046 #include "rs6000-builtin.def"
12049 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
12050 foo (VECa). */
12052 #undef RS6000_BUILTIN_1
12053 #undef RS6000_BUILTIN_2
12054 #undef RS6000_BUILTIN_3
12055 #undef RS6000_BUILTIN_A
12056 #undef RS6000_BUILTIN_D
12057 #undef RS6000_BUILTIN_E
12058 #undef RS6000_BUILTIN_H
12059 #undef RS6000_BUILTIN_P
12060 #undef RS6000_BUILTIN_Q
12061 #undef RS6000_BUILTIN_S
12062 #undef RS6000_BUILTIN_X
12064 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
12065 { MASK, ICODE, NAME, ENUM },
12067 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12068 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12069 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12070 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12071 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12072 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
12073 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12074 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12075 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12076 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12078 static const struct builtin_description bdesc_1arg[] =
12080 #include "rs6000-builtin.def"
12083 /* HTM builtins. */
12084 #undef RS6000_BUILTIN_1
12085 #undef RS6000_BUILTIN_2
12086 #undef RS6000_BUILTIN_3
12087 #undef RS6000_BUILTIN_A
12088 #undef RS6000_BUILTIN_D
12089 #undef RS6000_BUILTIN_E
12090 #undef RS6000_BUILTIN_H
12091 #undef RS6000_BUILTIN_P
12092 #undef RS6000_BUILTIN_Q
12093 #undef RS6000_BUILTIN_S
12094 #undef RS6000_BUILTIN_X
12096 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
12097 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
12098 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
12099 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
12100 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
12101 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
12102 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
12103 { MASK, ICODE, NAME, ENUM },
12105 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
12106 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
12107 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
12108 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
12110 static const struct builtin_description bdesc_htm[] =
12112 #include "rs6000-builtin.def"
12115 #undef RS6000_BUILTIN_1
12116 #undef RS6000_BUILTIN_2
12117 #undef RS6000_BUILTIN_3
12118 #undef RS6000_BUILTIN_A
12119 #undef RS6000_BUILTIN_D
12120 #undef RS6000_BUILTIN_E
12121 #undef RS6000_BUILTIN_H
12122 #undef RS6000_BUILTIN_P
12123 #undef RS6000_BUILTIN_Q
12124 #undef RS6000_BUILTIN_S
12126 /* Return true if a builtin function is overloaded. */
12127 bool
12128 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
12130 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
12133 /* Expand an expression EXP that calls a builtin without arguments. */
12134 static rtx
12135 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
12137 rtx pat;
12138 machine_mode tmode = insn_data[icode].operand[0].mode;
12140 if (icode == CODE_FOR_nothing)
12141 /* Builtin not supported on this processor. */
12142 return 0;
12144 if (target == 0
12145 || GET_MODE (target) != tmode
12146 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12147 target = gen_reg_rtx (tmode);
12149 pat = GEN_FCN (icode) (target);
12150 if (! pat)
12151 return 0;
12152 emit_insn (pat);
12154 return target;
12158 static rtx
12159 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
12161 rtx pat;
12162 tree arg0 = CALL_EXPR_ARG (exp, 0);
12163 tree arg1 = CALL_EXPR_ARG (exp, 1);
12164 rtx op0 = expand_normal (arg0);
12165 rtx op1 = expand_normal (arg1);
12166 machine_mode mode0 = insn_data[icode].operand[0].mode;
12167 machine_mode mode1 = insn_data[icode].operand[1].mode;
12169 if (icode == CODE_FOR_nothing)
12170 /* Builtin not supported on this processor. */
12171 return 0;
12173 /* If we got invalid arguments bail out before generating bad rtl. */
12174 if (arg0 == error_mark_node || arg1 == error_mark_node)
12175 return const0_rtx;
12177 if (GET_CODE (op0) != CONST_INT
12178 || INTVAL (op0) > 255
12179 || INTVAL (op0) < 0)
12181 error ("argument 1 must be an 8-bit field value");
12182 return const0_rtx;
12185 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12186 op0 = copy_to_mode_reg (mode0, op0);
12188 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12189 op1 = copy_to_mode_reg (mode1, op1);
12191 pat = GEN_FCN (icode) (op0, op1);
12192 if (! pat)
12193 return const0_rtx;
12194 emit_insn (pat);
12196 return NULL_RTX;
12200 static rtx
12201 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
12203 rtx pat;
12204 tree arg0 = CALL_EXPR_ARG (exp, 0);
12205 rtx op0 = expand_normal (arg0);
12206 machine_mode tmode = insn_data[icode].operand[0].mode;
12207 machine_mode mode0 = insn_data[icode].operand[1].mode;
12209 if (icode == CODE_FOR_nothing)
12210 /* Builtin not supported on this processor. */
12211 return 0;
12213 /* If we got invalid arguments bail out before generating bad rtl. */
12214 if (arg0 == error_mark_node)
12215 return const0_rtx;
12217 if (icode == CODE_FOR_altivec_vspltisb
12218 || icode == CODE_FOR_altivec_vspltish
12219 || icode == CODE_FOR_altivec_vspltisw
12220 || icode == CODE_FOR_spe_evsplatfi
12221 || icode == CODE_FOR_spe_evsplati)
12223 /* Only allow 5-bit *signed* literals. */
12224 if (GET_CODE (op0) != CONST_INT
12225 || INTVAL (op0) > 15
12226 || INTVAL (op0) < -16)
12228 error ("argument 1 must be a 5-bit signed literal");
12229 return const0_rtx;
12233 if (target == 0
12234 || GET_MODE (target) != tmode
12235 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12236 target = gen_reg_rtx (tmode);
12238 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12239 op0 = copy_to_mode_reg (mode0, op0);
12241 pat = GEN_FCN (icode) (target, op0);
12242 if (! pat)
12243 return 0;
12244 emit_insn (pat);
12246 return target;
12249 static rtx
12250 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12252 rtx pat, scratch1, scratch2;
12253 tree arg0 = CALL_EXPR_ARG (exp, 0);
12254 rtx op0 = expand_normal (arg0);
12255 machine_mode tmode = insn_data[icode].operand[0].mode;
12256 machine_mode mode0 = insn_data[icode].operand[1].mode;
12258 /* If we have invalid arguments, bail out before generating bad rtl. */
12259 if (arg0 == error_mark_node)
12260 return const0_rtx;
12262 if (target == 0
12263 || GET_MODE (target) != tmode
12264 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12265 target = gen_reg_rtx (tmode);
12267 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12268 op0 = copy_to_mode_reg (mode0, op0);
12270 scratch1 = gen_reg_rtx (mode0);
12271 scratch2 = gen_reg_rtx (mode0);
12273 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12274 if (! pat)
12275 return 0;
12276 emit_insn (pat);
12278 return target;
12281 static rtx
12282 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12284 rtx pat;
12285 tree arg0 = CALL_EXPR_ARG (exp, 0);
12286 tree arg1 = CALL_EXPR_ARG (exp, 1);
12287 rtx op0 = expand_normal (arg0);
12288 rtx op1 = expand_normal (arg1);
12289 machine_mode tmode = insn_data[icode].operand[0].mode;
12290 machine_mode mode0 = insn_data[icode].operand[1].mode;
12291 machine_mode mode1 = insn_data[icode].operand[2].mode;
12293 if (icode == CODE_FOR_nothing)
12294 /* Builtin not supported on this processor. */
12295 return 0;
12297 /* If we got invalid arguments bail out before generating bad rtl. */
12298 if (arg0 == error_mark_node || arg1 == error_mark_node)
12299 return const0_rtx;
12301 if (icode == CODE_FOR_altivec_vcfux
12302 || icode == CODE_FOR_altivec_vcfsx
12303 || icode == CODE_FOR_altivec_vctsxs
12304 || icode == CODE_FOR_altivec_vctuxs
12305 || icode == CODE_FOR_altivec_vspltb
12306 || icode == CODE_FOR_altivec_vsplth
12307 || icode == CODE_FOR_altivec_vspltw
12308 || icode == CODE_FOR_spe_evaddiw
12309 || icode == CODE_FOR_spe_evldd
12310 || icode == CODE_FOR_spe_evldh
12311 || icode == CODE_FOR_spe_evldw
12312 || icode == CODE_FOR_spe_evlhhesplat
12313 || icode == CODE_FOR_spe_evlhhossplat
12314 || icode == CODE_FOR_spe_evlhhousplat
12315 || icode == CODE_FOR_spe_evlwhe
12316 || icode == CODE_FOR_spe_evlwhos
12317 || icode == CODE_FOR_spe_evlwhou
12318 || icode == CODE_FOR_spe_evlwhsplat
12319 || icode == CODE_FOR_spe_evlwwsplat
12320 || icode == CODE_FOR_spe_evrlwi
12321 || icode == CODE_FOR_spe_evslwi
12322 || icode == CODE_FOR_spe_evsrwis
12323 || icode == CODE_FOR_spe_evsubifw
12324 || icode == CODE_FOR_spe_evsrwiu)
12326 /* Only allow 5-bit unsigned literals. */
12327 STRIP_NOPS (arg1);
12328 if (TREE_CODE (arg1) != INTEGER_CST
12329 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12331 error ("argument 2 must be a 5-bit unsigned literal");
12332 return const0_rtx;
12336 if (target == 0
12337 || GET_MODE (target) != tmode
12338 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12339 target = gen_reg_rtx (tmode);
12341 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12342 op0 = copy_to_mode_reg (mode0, op0);
12343 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12344 op1 = copy_to_mode_reg (mode1, op1);
12346 pat = GEN_FCN (icode) (target, op0, op1);
12347 if (! pat)
12348 return 0;
12349 emit_insn (pat);
12351 return target;
12354 static rtx
12355 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12357 rtx pat, scratch;
12358 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12359 tree arg0 = CALL_EXPR_ARG (exp, 1);
12360 tree arg1 = CALL_EXPR_ARG (exp, 2);
12361 rtx op0 = expand_normal (arg0);
12362 rtx op1 = expand_normal (arg1);
12363 machine_mode tmode = SImode;
12364 machine_mode mode0 = insn_data[icode].operand[1].mode;
12365 machine_mode mode1 = insn_data[icode].operand[2].mode;
12366 int cr6_form_int;
12368 if (TREE_CODE (cr6_form) != INTEGER_CST)
12370 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12371 return const0_rtx;
12373 else
12374 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12376 gcc_assert (mode0 == mode1);
12378 /* If we have invalid arguments, bail out before generating bad rtl. */
12379 if (arg0 == error_mark_node || arg1 == error_mark_node)
12380 return const0_rtx;
12382 if (target == 0
12383 || GET_MODE (target) != tmode
12384 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12385 target = gen_reg_rtx (tmode);
12387 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12388 op0 = copy_to_mode_reg (mode0, op0);
12389 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12390 op1 = copy_to_mode_reg (mode1, op1);
12392 scratch = gen_reg_rtx (mode0);
12394 pat = GEN_FCN (icode) (scratch, op0, op1);
12395 if (! pat)
12396 return 0;
12397 emit_insn (pat);
12399 /* The vec_any* and vec_all* predicates use the same opcodes for two
12400 different operations, but the bits in CR6 will be different
12401 depending on what information we want. So we have to play tricks
12402 with CR6 to get the right bits out.
12404 If you think this is disgusting, look at the specs for the
12405 AltiVec predicates. */
12407 switch (cr6_form_int)
12409 case 0:
12410 emit_insn (gen_cr6_test_for_zero (target));
12411 break;
12412 case 1:
12413 emit_insn (gen_cr6_test_for_zero_reverse (target));
12414 break;
12415 case 2:
12416 emit_insn (gen_cr6_test_for_lt (target));
12417 break;
12418 case 3:
12419 emit_insn (gen_cr6_test_for_lt_reverse (target));
12420 break;
12421 default:
12422 error ("argument 1 of __builtin_altivec_predicate is out of range");
12423 break;
12426 return target;
12429 static rtx
12430 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12432 rtx pat, addr;
12433 tree arg0 = CALL_EXPR_ARG (exp, 0);
12434 tree arg1 = CALL_EXPR_ARG (exp, 1);
12435 machine_mode tmode = insn_data[icode].operand[0].mode;
12436 machine_mode mode0 = Pmode;
12437 machine_mode mode1 = Pmode;
12438 rtx op0 = expand_normal (arg0);
12439 rtx op1 = expand_normal (arg1);
12441 if (icode == CODE_FOR_nothing)
12442 /* Builtin not supported on this processor. */
12443 return 0;
12445 /* If we got invalid arguments bail out before generating bad rtl. */
12446 if (arg0 == error_mark_node || arg1 == error_mark_node)
12447 return const0_rtx;
12449 if (target == 0
12450 || GET_MODE (target) != tmode
12451 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12452 target = gen_reg_rtx (tmode);
12454 op1 = copy_to_mode_reg (mode1, op1);
12456 if (op0 == const0_rtx)
12458 addr = gen_rtx_MEM (tmode, op1);
12460 else
12462 op0 = copy_to_mode_reg (mode0, op0);
12463 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12466 pat = GEN_FCN (icode) (target, addr);
12468 if (! pat)
12469 return 0;
12470 emit_insn (pat);
12472 return target;
12475 /* Return a constant vector for use as a little-endian permute control vector
12476 to reverse the order of elements of the given vector mode. */
12477 static rtx
12478 swap_selector_for_mode (machine_mode mode)
12480 /* These are little endian vectors, so their elements are reversed
12481 from what you would normally expect for a permute control vector. */
12482 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12483 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12484 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12485 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12486 unsigned int *swaparray, i;
12487 rtx perm[16];
12489 switch (mode)
12491 case V2DFmode:
12492 case V2DImode:
12493 swaparray = swap2;
12494 break;
12495 case V4SFmode:
12496 case V4SImode:
12497 swaparray = swap4;
12498 break;
12499 case V8HImode:
12500 swaparray = swap8;
12501 break;
12502 case V16QImode:
12503 swaparray = swap16;
12504 break;
12505 default:
12506 gcc_unreachable ();
12509 for (i = 0; i < 16; ++i)
12510 perm[i] = GEN_INT (swaparray[i]);
12512 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12515 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12516 with -maltivec=be specified. Issue the load followed by an element-reversing
12517 permute. */
12518 void
12519 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12521 rtx tmp = gen_reg_rtx (mode);
12522 rtx load = gen_rtx_SET (tmp, op1);
12523 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12524 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12525 rtx sel = swap_selector_for_mode (mode);
12526 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12528 gcc_assert (REG_P (op0));
12529 emit_insn (par);
12530 emit_insn (gen_rtx_SET (op0, vperm));
12533 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12534 with -maltivec=be specified. Issue the store preceded by an element-reversing
12535 permute. */
12536 void
12537 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12539 rtx tmp = gen_reg_rtx (mode);
12540 rtx store = gen_rtx_SET (op0, tmp);
12541 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12542 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12543 rtx sel = swap_selector_for_mode (mode);
12544 rtx vperm;
12546 gcc_assert (REG_P (op1));
12547 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12548 emit_insn (gen_rtx_SET (tmp, vperm));
12549 emit_insn (par);
12552 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12553 specified. Issue the store preceded by an element-reversing permute. */
12554 void
12555 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12557 machine_mode inner_mode = GET_MODE_INNER (mode);
12558 rtx tmp = gen_reg_rtx (mode);
12559 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12560 rtx sel = swap_selector_for_mode (mode);
12561 rtx vperm;
12563 gcc_assert (REG_P (op1));
12564 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12565 emit_insn (gen_rtx_SET (tmp, vperm));
12566 emit_insn (gen_rtx_SET (op0, stvx));
12569 static rtx
12570 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12572 rtx pat, addr;
12573 tree arg0 = CALL_EXPR_ARG (exp, 0);
12574 tree arg1 = CALL_EXPR_ARG (exp, 1);
12575 machine_mode tmode = insn_data[icode].operand[0].mode;
12576 machine_mode mode0 = Pmode;
12577 machine_mode mode1 = Pmode;
12578 rtx op0 = expand_normal (arg0);
12579 rtx op1 = expand_normal (arg1);
12581 if (icode == CODE_FOR_nothing)
12582 /* Builtin not supported on this processor. */
12583 return 0;
12585 /* If we got invalid arguments bail out before generating bad rtl. */
12586 if (arg0 == error_mark_node || arg1 == error_mark_node)
12587 return const0_rtx;
12589 if (target == 0
12590 || GET_MODE (target) != tmode
12591 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12592 target = gen_reg_rtx (tmode);
12594 op1 = copy_to_mode_reg (mode1, op1);
12596 if (op0 == const0_rtx)
12598 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12600 else
12602 op0 = copy_to_mode_reg (mode0, op0);
12603 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12606 pat = GEN_FCN (icode) (target, addr);
12608 if (! pat)
12609 return 0;
12610 emit_insn (pat);
12612 return target;
12615 static rtx
12616 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12618 tree arg0 = CALL_EXPR_ARG (exp, 0);
12619 tree arg1 = CALL_EXPR_ARG (exp, 1);
12620 tree arg2 = CALL_EXPR_ARG (exp, 2);
12621 rtx op0 = expand_normal (arg0);
12622 rtx op1 = expand_normal (arg1);
12623 rtx op2 = expand_normal (arg2);
12624 rtx pat;
12625 machine_mode mode0 = insn_data[icode].operand[0].mode;
12626 machine_mode mode1 = insn_data[icode].operand[1].mode;
12627 machine_mode mode2 = insn_data[icode].operand[2].mode;
12629 /* Invalid arguments. Bail before doing anything stoopid! */
12630 if (arg0 == error_mark_node
12631 || arg1 == error_mark_node
12632 || arg2 == error_mark_node)
12633 return const0_rtx;
12635 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12636 op0 = copy_to_mode_reg (mode2, op0);
12637 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12638 op1 = copy_to_mode_reg (mode0, op1);
12639 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12640 op2 = copy_to_mode_reg (mode1, op2);
12642 pat = GEN_FCN (icode) (op1, op2, op0);
12643 if (pat)
12644 emit_insn (pat);
12645 return NULL_RTX;
12648 static rtx
12649 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12651 tree arg0 = CALL_EXPR_ARG (exp, 0);
12652 tree arg1 = CALL_EXPR_ARG (exp, 1);
12653 tree arg2 = CALL_EXPR_ARG (exp, 2);
12654 rtx op0 = expand_normal (arg0);
12655 rtx op1 = expand_normal (arg1);
12656 rtx op2 = expand_normal (arg2);
12657 rtx pat, addr;
12658 machine_mode tmode = insn_data[icode].operand[0].mode;
12659 machine_mode mode1 = Pmode;
12660 machine_mode mode2 = Pmode;
12662 /* Invalid arguments. Bail before doing anything stoopid! */
12663 if (arg0 == error_mark_node
12664 || arg1 == error_mark_node
12665 || arg2 == error_mark_node)
12666 return const0_rtx;
12668 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12669 op0 = copy_to_mode_reg (tmode, op0);
12671 op2 = copy_to_mode_reg (mode2, op2);
12673 if (op1 == const0_rtx)
12675 addr = gen_rtx_MEM (tmode, op2);
12677 else
12679 op1 = copy_to_mode_reg (mode1, op1);
12680 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12683 pat = GEN_FCN (icode) (addr, op0);
12684 if (pat)
12685 emit_insn (pat);
12686 return NULL_RTX;
12689 static rtx
12690 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12692 tree arg0 = CALL_EXPR_ARG (exp, 0);
12693 tree arg1 = CALL_EXPR_ARG (exp, 1);
12694 tree arg2 = CALL_EXPR_ARG (exp, 2);
12695 rtx op0 = expand_normal (arg0);
12696 rtx op1 = expand_normal (arg1);
12697 rtx op2 = expand_normal (arg2);
12698 rtx pat, addr;
12699 machine_mode tmode = insn_data[icode].operand[0].mode;
12700 machine_mode smode = insn_data[icode].operand[1].mode;
12701 machine_mode mode1 = Pmode;
12702 machine_mode mode2 = Pmode;
12704 /* Invalid arguments. Bail before doing anything stoopid! */
12705 if (arg0 == error_mark_node
12706 || arg1 == error_mark_node
12707 || arg2 == error_mark_node)
12708 return const0_rtx;
12710 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12711 op0 = copy_to_mode_reg (smode, op0);
12713 op2 = copy_to_mode_reg (mode2, op2);
12715 if (op1 == const0_rtx)
12717 addr = gen_rtx_MEM (tmode, op2);
12719 else
12721 op1 = copy_to_mode_reg (mode1, op1);
12722 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12725 pat = GEN_FCN (icode) (addr, op0);
12726 if (pat)
12727 emit_insn (pat);
12728 return NULL_RTX;
12731 /* Return the appropriate SPR number associated with the given builtin. */
12732 static inline HOST_WIDE_INT
12733 htm_spr_num (enum rs6000_builtins code)
12735 if (code == HTM_BUILTIN_GET_TFHAR
12736 || code == HTM_BUILTIN_SET_TFHAR)
12737 return TFHAR_SPR;
12738 else if (code == HTM_BUILTIN_GET_TFIAR
12739 || code == HTM_BUILTIN_SET_TFIAR)
12740 return TFIAR_SPR;
12741 else if (code == HTM_BUILTIN_GET_TEXASR
12742 || code == HTM_BUILTIN_SET_TEXASR)
12743 return TEXASR_SPR;
12744 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12745 || code == HTM_BUILTIN_SET_TEXASRU);
12746 return TEXASRU_SPR;
12749 /* Return the appropriate SPR regno associated with the given builtin. */
12750 static inline HOST_WIDE_INT
12751 htm_spr_regno (enum rs6000_builtins code)
12753 if (code == HTM_BUILTIN_GET_TFHAR
12754 || code == HTM_BUILTIN_SET_TFHAR)
12755 return TFHAR_REGNO;
12756 else if (code == HTM_BUILTIN_GET_TFIAR
12757 || code == HTM_BUILTIN_SET_TFIAR)
12758 return TFIAR_REGNO;
12759 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12760 || code == HTM_BUILTIN_SET_TEXASR
12761 || code == HTM_BUILTIN_GET_TEXASRU
12762 || code == HTM_BUILTIN_SET_TEXASRU);
12763 return TEXASR_REGNO;
12766 /* Return the correct ICODE value depending on whether we are
12767 setting or reading the HTM SPRs. */
12768 static inline enum insn_code
12769 rs6000_htm_spr_icode (bool nonvoid)
12771 if (nonvoid)
12772 return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12773 else
12774 return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12777 /* Expand the HTM builtin in EXP and store the result in TARGET.
12778 Store true in *EXPANDEDP if we found a builtin to expand. */
12779 static rtx
12780 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12782 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12783 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12784 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12785 const struct builtin_description *d;
12786 size_t i;
12788 *expandedp = true;
12790 if (!TARGET_POWERPC64
12791 && (fcode == HTM_BUILTIN_TABORTDC
12792 || fcode == HTM_BUILTIN_TABORTDCI))
12794 size_t uns_fcode = (size_t)fcode;
12795 const char *name = rs6000_builtin_info[uns_fcode].name;
12796 error ("builtin %s is only valid in 64-bit mode", name);
12797 return const0_rtx;
12800 /* Expand the HTM builtins. */
12801 d = bdesc_htm;
12802 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12803 if (d->code == fcode)
12805 rtx op[MAX_HTM_OPERANDS], pat;
12806 int nopnds = 0;
12807 tree arg;
12808 call_expr_arg_iterator iter;
12809 unsigned attr = rs6000_builtin_info[fcode].attr;
12810 enum insn_code icode = d->icode;
12811 const struct insn_operand_data *insn_op;
12812 bool uses_spr = (attr & RS6000_BTC_SPR);
12813 rtx cr = NULL_RTX;
12815 if (uses_spr)
12816 icode = rs6000_htm_spr_icode (nonvoid);
12817 insn_op = &insn_data[icode].operand[0];
12819 if (nonvoid)
12821 machine_mode tmode = (uses_spr) ? insn_op->mode : SImode;
12822 if (!target
12823 || GET_MODE (target) != tmode
12824 || (uses_spr && !(*insn_op->predicate) (target, tmode)))
12825 target = gen_reg_rtx (tmode);
12826 if (uses_spr)
12827 op[nopnds++] = target;
12830 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12832 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12833 return const0_rtx;
12835 insn_op = &insn_data[icode].operand[nopnds];
12837 op[nopnds] = expand_normal (arg);
12839 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12841 if (!strcmp (insn_op->constraint, "n"))
12843 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12844 if (!CONST_INT_P (op[nopnds]))
12845 error ("argument %d must be an unsigned literal", arg_num);
12846 else
12847 error ("argument %d is an unsigned literal that is "
12848 "out of range", arg_num);
12849 return const0_rtx;
12851 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12854 nopnds++;
12857 /* Handle the builtins for extended mnemonics. These accept
12858 no arguments, but map to builtins that take arguments. */
12859 switch (fcode)
12861 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12862 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12863 op[nopnds++] = GEN_INT (1);
12864 #ifdef ENABLE_CHECKING
12865 attr |= RS6000_BTC_UNARY;
12866 #endif
12867 break;
12868 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12869 op[nopnds++] = GEN_INT (0);
12870 #ifdef ENABLE_CHECKING
12871 attr |= RS6000_BTC_UNARY;
12872 #endif
12873 break;
12874 default:
12875 break;
12878 /* If this builtin accesses SPRs, then pass in the appropriate
12879 SPR number and SPR regno as the last two operands. */
12880 if (uses_spr)
12882 machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode;
12883 op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode));
12884 op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode));
12886 /* If this builtin accesses a CR, then pass in a scratch
12887 CR as the last operand. */
12888 else if (attr & RS6000_BTC_CR)
12889 { cr = gen_reg_rtx (CCmode);
12890 op[nopnds++] = cr;
12893 #ifdef ENABLE_CHECKING
12894 int expected_nopnds = 0;
12895 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12896 expected_nopnds = 1;
12897 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12898 expected_nopnds = 2;
12899 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12900 expected_nopnds = 3;
12901 if (!(attr & RS6000_BTC_VOID))
12902 expected_nopnds += 1;
12903 if (uses_spr)
12904 expected_nopnds += 2;
12906 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12907 #endif
12909 switch (nopnds)
12911 case 1:
12912 pat = GEN_FCN (icode) (op[0]);
12913 break;
12914 case 2:
12915 pat = GEN_FCN (icode) (op[0], op[1]);
12916 break;
12917 case 3:
12918 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12919 break;
12920 case 4:
12921 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12922 break;
12923 default:
12924 gcc_unreachable ();
12926 if (!pat)
12927 return NULL_RTX;
12928 emit_insn (pat);
12930 if (attr & RS6000_BTC_CR)
12932 if (fcode == HTM_BUILTIN_TBEGIN)
12934 /* Emit code to set TARGET to true or false depending on
12935 whether the tbegin. instruction successfully or failed
12936 to start a transaction. We do this by placing the 1's
12937 complement of CR's EQ bit into TARGET. */
12938 rtx scratch = gen_reg_rtx (SImode);
12939 emit_insn (gen_rtx_SET (scratch,
12940 gen_rtx_EQ (SImode, cr,
12941 const0_rtx)));
12942 emit_insn (gen_rtx_SET (target,
12943 gen_rtx_XOR (SImode, scratch,
12944 GEN_INT (1))));
12946 else
12948 /* Emit code to copy the 4-bit condition register field
12949 CR into the least significant end of register TARGET. */
12950 rtx scratch1 = gen_reg_rtx (SImode);
12951 rtx scratch2 = gen_reg_rtx (SImode);
12952 rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0);
12953 emit_insn (gen_movcc (subreg, cr));
12954 emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28)));
12955 emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf)));
12959 if (nonvoid)
12960 return target;
12961 return const0_rtx;
12964 *expandedp = false;
12965 return NULL_RTX;
12968 static rtx
12969 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12971 rtx pat;
12972 tree arg0 = CALL_EXPR_ARG (exp, 0);
12973 tree arg1 = CALL_EXPR_ARG (exp, 1);
12974 tree arg2 = CALL_EXPR_ARG (exp, 2);
12975 rtx op0 = expand_normal (arg0);
12976 rtx op1 = expand_normal (arg1);
12977 rtx op2 = expand_normal (arg2);
12978 machine_mode tmode = insn_data[icode].operand[0].mode;
12979 machine_mode mode0 = insn_data[icode].operand[1].mode;
12980 machine_mode mode1 = insn_data[icode].operand[2].mode;
12981 machine_mode mode2 = insn_data[icode].operand[3].mode;
12983 if (icode == CODE_FOR_nothing)
12984 /* Builtin not supported on this processor. */
12985 return 0;
12987 /* If we got invalid arguments bail out before generating bad rtl. */
12988 if (arg0 == error_mark_node
12989 || arg1 == error_mark_node
12990 || arg2 == error_mark_node)
12991 return const0_rtx;
12993 /* Check and prepare argument depending on the instruction code.
12995 Note that a switch statement instead of the sequence of tests
12996 would be incorrect as many of the CODE_FOR values could be
12997 CODE_FOR_nothing and that would yield multiple alternatives
12998 with identical values. We'd never reach here at runtime in
12999 this case. */
13000 if (icode == CODE_FOR_altivec_vsldoi_v4sf
13001 || icode == CODE_FOR_altivec_vsldoi_v4si
13002 || icode == CODE_FOR_altivec_vsldoi_v8hi
13003 || icode == CODE_FOR_altivec_vsldoi_v16qi)
13005 /* Only allow 4-bit unsigned literals. */
13006 STRIP_NOPS (arg2);
13007 if (TREE_CODE (arg2) != INTEGER_CST
13008 || TREE_INT_CST_LOW (arg2) & ~0xf)
13010 error ("argument 3 must be a 4-bit unsigned literal");
13011 return const0_rtx;
13014 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
13015 || icode == CODE_FOR_vsx_xxpermdi_v2di
13016 || icode == CODE_FOR_vsx_xxsldwi_v16qi
13017 || icode == CODE_FOR_vsx_xxsldwi_v8hi
13018 || icode == CODE_FOR_vsx_xxsldwi_v4si
13019 || icode == CODE_FOR_vsx_xxsldwi_v4sf
13020 || icode == CODE_FOR_vsx_xxsldwi_v2di
13021 || icode == CODE_FOR_vsx_xxsldwi_v2df)
13023 /* Only allow 2-bit unsigned literals. */
13024 STRIP_NOPS (arg2);
13025 if (TREE_CODE (arg2) != INTEGER_CST
13026 || TREE_INT_CST_LOW (arg2) & ~0x3)
13028 error ("argument 3 must be a 2-bit unsigned literal");
13029 return const0_rtx;
13032 else if (icode == CODE_FOR_vsx_set_v2df
13033 || icode == CODE_FOR_vsx_set_v2di
13034 || icode == CODE_FOR_bcdadd
13035 || icode == CODE_FOR_bcdadd_lt
13036 || icode == CODE_FOR_bcdadd_eq
13037 || icode == CODE_FOR_bcdadd_gt
13038 || icode == CODE_FOR_bcdsub
13039 || icode == CODE_FOR_bcdsub_lt
13040 || icode == CODE_FOR_bcdsub_eq
13041 || icode == CODE_FOR_bcdsub_gt)
13043 /* Only allow 1-bit unsigned literals. */
13044 STRIP_NOPS (arg2);
13045 if (TREE_CODE (arg2) != INTEGER_CST
13046 || TREE_INT_CST_LOW (arg2) & ~0x1)
13048 error ("argument 3 must be a 1-bit unsigned literal");
13049 return const0_rtx;
13052 else if (icode == CODE_FOR_dfp_ddedpd_dd
13053 || icode == CODE_FOR_dfp_ddedpd_td)
13055 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
13056 STRIP_NOPS (arg0);
13057 if (TREE_CODE (arg0) != INTEGER_CST
13058 || TREE_INT_CST_LOW (arg2) & ~0x3)
13060 error ("argument 1 must be 0 or 2");
13061 return const0_rtx;
13064 else if (icode == CODE_FOR_dfp_denbcd_dd
13065 || icode == CODE_FOR_dfp_denbcd_td)
13067 /* Only allow 1-bit unsigned literals. */
13068 STRIP_NOPS (arg0);
13069 if (TREE_CODE (arg0) != INTEGER_CST
13070 || TREE_INT_CST_LOW (arg0) & ~0x1)
13072 error ("argument 1 must be a 1-bit unsigned literal");
13073 return const0_rtx;
13076 else if (icode == CODE_FOR_dfp_dscli_dd
13077 || icode == CODE_FOR_dfp_dscli_td
13078 || icode == CODE_FOR_dfp_dscri_dd
13079 || icode == CODE_FOR_dfp_dscri_td)
13081 /* Only allow 6-bit unsigned literals. */
13082 STRIP_NOPS (arg1);
13083 if (TREE_CODE (arg1) != INTEGER_CST
13084 || TREE_INT_CST_LOW (arg1) & ~0x3f)
13086 error ("argument 2 must be a 6-bit unsigned literal");
13087 return const0_rtx;
13090 else if (icode == CODE_FOR_crypto_vshasigmaw
13091 || icode == CODE_FOR_crypto_vshasigmad)
13093 /* Check whether the 2nd and 3rd arguments are integer constants and in
13094 range and prepare arguments. */
13095 STRIP_NOPS (arg1);
13096 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
13098 error ("argument 2 must be 0 or 1");
13099 return const0_rtx;
13102 STRIP_NOPS (arg2);
13103 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
13105 error ("argument 3 must be in the range 0..15");
13106 return const0_rtx;
13110 if (target == 0
13111 || GET_MODE (target) != tmode
13112 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13113 target = gen_reg_rtx (tmode);
13115 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13116 op0 = copy_to_mode_reg (mode0, op0);
13117 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13118 op1 = copy_to_mode_reg (mode1, op1);
13119 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
13120 op2 = copy_to_mode_reg (mode2, op2);
13122 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
13123 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
13124 else
13125 pat = GEN_FCN (icode) (target, op0, op1, op2);
13126 if (! pat)
13127 return 0;
13128 emit_insn (pat);
13130 return target;
13133 /* Expand the lvx builtins. */
13134 static rtx
13135 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
13137 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13138 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13139 tree arg0;
13140 machine_mode tmode, mode0;
13141 rtx pat, op0;
13142 enum insn_code icode;
13144 switch (fcode)
13146 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
13147 icode = CODE_FOR_vector_altivec_load_v16qi;
13148 break;
13149 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
13150 icode = CODE_FOR_vector_altivec_load_v8hi;
13151 break;
13152 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
13153 icode = CODE_FOR_vector_altivec_load_v4si;
13154 break;
13155 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
13156 icode = CODE_FOR_vector_altivec_load_v4sf;
13157 break;
13158 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
13159 icode = CODE_FOR_vector_altivec_load_v2df;
13160 break;
13161 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
13162 icode = CODE_FOR_vector_altivec_load_v2di;
13163 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
13164 icode = CODE_FOR_vector_altivec_load_v1ti;
13165 break;
13166 default:
13167 *expandedp = false;
13168 return NULL_RTX;
13171 *expandedp = true;
13173 arg0 = CALL_EXPR_ARG (exp, 0);
13174 op0 = expand_normal (arg0);
13175 tmode = insn_data[icode].operand[0].mode;
13176 mode0 = insn_data[icode].operand[1].mode;
13178 if (target == 0
13179 || GET_MODE (target) != tmode
13180 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13181 target = gen_reg_rtx (tmode);
13183 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13184 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13186 pat = GEN_FCN (icode) (target, op0);
13187 if (! pat)
13188 return 0;
13189 emit_insn (pat);
13190 return target;
13193 /* Expand the stvx builtins. */
13194 static rtx
13195 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13196 bool *expandedp)
13198 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13199 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
13200 tree arg0, arg1;
13201 machine_mode mode0, mode1;
13202 rtx pat, op0, op1;
13203 enum insn_code icode;
13205 switch (fcode)
13207 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
13208 icode = CODE_FOR_vector_altivec_store_v16qi;
13209 break;
13210 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
13211 icode = CODE_FOR_vector_altivec_store_v8hi;
13212 break;
13213 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
13214 icode = CODE_FOR_vector_altivec_store_v4si;
13215 break;
13216 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
13217 icode = CODE_FOR_vector_altivec_store_v4sf;
13218 break;
13219 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
13220 icode = CODE_FOR_vector_altivec_store_v2df;
13221 break;
13222 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
13223 icode = CODE_FOR_vector_altivec_store_v2di;
13224 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
13225 icode = CODE_FOR_vector_altivec_store_v1ti;
13226 break;
13227 default:
13228 *expandedp = false;
13229 return NULL_RTX;
13232 arg0 = CALL_EXPR_ARG (exp, 0);
13233 arg1 = CALL_EXPR_ARG (exp, 1);
13234 op0 = expand_normal (arg0);
13235 op1 = expand_normal (arg1);
13236 mode0 = insn_data[icode].operand[0].mode;
13237 mode1 = insn_data[icode].operand[1].mode;
13239 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13240 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
13241 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13242 op1 = copy_to_mode_reg (mode1, op1);
13244 pat = GEN_FCN (icode) (op0, op1);
13245 if (pat)
13246 emit_insn (pat);
13248 *expandedp = true;
13249 return NULL_RTX;
13252 /* Expand the dst builtins. */
13253 static rtx
13254 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
13255 bool *expandedp)
13257 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13258 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13259 tree arg0, arg1, arg2;
13260 machine_mode mode0, mode1;
13261 rtx pat, op0, op1, op2;
13262 const struct builtin_description *d;
13263 size_t i;
13265 *expandedp = false;
13267 /* Handle DST variants. */
13268 d = bdesc_dst;
13269 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
13270 if (d->code == fcode)
13272 arg0 = CALL_EXPR_ARG (exp, 0);
13273 arg1 = CALL_EXPR_ARG (exp, 1);
13274 arg2 = CALL_EXPR_ARG (exp, 2);
13275 op0 = expand_normal (arg0);
13276 op1 = expand_normal (arg1);
13277 op2 = expand_normal (arg2);
13278 mode0 = insn_data[d->icode].operand[0].mode;
13279 mode1 = insn_data[d->icode].operand[1].mode;
13281 /* Invalid arguments, bail out before generating bad rtl. */
13282 if (arg0 == error_mark_node
13283 || arg1 == error_mark_node
13284 || arg2 == error_mark_node)
13285 return const0_rtx;
13287 *expandedp = true;
13288 STRIP_NOPS (arg2);
13289 if (TREE_CODE (arg2) != INTEGER_CST
13290 || TREE_INT_CST_LOW (arg2) & ~0x3)
13292 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13293 return const0_rtx;
13296 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13297 op0 = copy_to_mode_reg (Pmode, op0);
13298 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13299 op1 = copy_to_mode_reg (mode1, op1);
13301 pat = GEN_FCN (d->icode) (op0, op1, op2);
13302 if (pat != 0)
13303 emit_insn (pat);
13305 return NULL_RTX;
13308 return NULL_RTX;
13311 /* Expand vec_init builtin. */
13312 static rtx
13313 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13315 machine_mode tmode = TYPE_MODE (type);
13316 machine_mode inner_mode = GET_MODE_INNER (tmode);
13317 int i, n_elt = GET_MODE_NUNITS (tmode);
13319 gcc_assert (VECTOR_MODE_P (tmode));
13320 gcc_assert (n_elt == call_expr_nargs (exp));
13322 if (!target || !register_operand (target, tmode))
13323 target = gen_reg_rtx (tmode);
13325 /* If we have a vector compromised of a single element, such as V1TImode, do
13326 the initialization directly. */
13327 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13329 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13330 emit_move_insn (target, gen_lowpart (tmode, x));
13332 else
13334 rtvec v = rtvec_alloc (n_elt);
13336 for (i = 0; i < n_elt; ++i)
13338 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13339 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13342 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13345 return target;
13348 /* Return the integer constant in ARG. Constrain it to be in the range
13349 of the subparts of VEC_TYPE; issue an error if not. */
13351 static int
13352 get_element_number (tree vec_type, tree arg)
13354 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13356 if (!tree_fits_uhwi_p (arg)
13357 || (elt = tree_to_uhwi (arg), elt > max))
13359 error ("selector must be an integer constant in the range 0..%wi", max);
13360 return 0;
13363 return elt;
13366 /* Expand vec_set builtin. */
13367 static rtx
13368 altivec_expand_vec_set_builtin (tree exp)
13370 machine_mode tmode, mode1;
13371 tree arg0, arg1, arg2;
13372 int elt;
13373 rtx op0, op1;
13375 arg0 = CALL_EXPR_ARG (exp, 0);
13376 arg1 = CALL_EXPR_ARG (exp, 1);
13377 arg2 = CALL_EXPR_ARG (exp, 2);
13379 tmode = TYPE_MODE (TREE_TYPE (arg0));
13380 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13381 gcc_assert (VECTOR_MODE_P (tmode));
13383 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13384 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13385 elt = get_element_number (TREE_TYPE (arg0), arg2);
13387 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13388 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13390 op0 = force_reg (tmode, op0);
13391 op1 = force_reg (mode1, op1);
13393 rs6000_expand_vector_set (op0, op1, elt);
13395 return op0;
13398 /* Expand vec_ext builtin. */
13399 static rtx
13400 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13402 machine_mode tmode, mode0;
13403 tree arg0, arg1;
13404 int elt;
13405 rtx op0;
13407 arg0 = CALL_EXPR_ARG (exp, 0);
13408 arg1 = CALL_EXPR_ARG (exp, 1);
13410 op0 = expand_normal (arg0);
13411 elt = get_element_number (TREE_TYPE (arg0), arg1);
13413 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13414 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13415 gcc_assert (VECTOR_MODE_P (mode0));
13417 op0 = force_reg (mode0, op0);
13419 if (optimize || !target || !register_operand (target, tmode))
13420 target = gen_reg_rtx (tmode);
13422 rs6000_expand_vector_extract (target, op0, elt);
13424 return target;
13427 /* Expand the builtin in EXP and store the result in TARGET. Store
13428 true in *EXPANDEDP if we found a builtin to expand. */
13429 static rtx
13430 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13432 const struct builtin_description *d;
13433 size_t i;
13434 enum insn_code icode;
13435 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13436 tree arg0;
13437 rtx op0, pat;
13438 machine_mode tmode, mode0;
13439 enum rs6000_builtins fcode
13440 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13442 if (rs6000_overloaded_builtin_p (fcode))
13444 *expandedp = true;
13445 error ("unresolved overload for Altivec builtin %qF", fndecl);
13447 /* Given it is invalid, just generate a normal call. */
13448 return expand_call (exp, target, false);
13451 target = altivec_expand_ld_builtin (exp, target, expandedp);
13452 if (*expandedp)
13453 return target;
13455 target = altivec_expand_st_builtin (exp, target, expandedp);
13456 if (*expandedp)
13457 return target;
13459 target = altivec_expand_dst_builtin (exp, target, expandedp);
13460 if (*expandedp)
13461 return target;
13463 *expandedp = true;
13465 switch (fcode)
13467 case ALTIVEC_BUILTIN_STVX_V2DF:
13468 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13469 case ALTIVEC_BUILTIN_STVX_V2DI:
13470 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13471 case ALTIVEC_BUILTIN_STVX_V4SF:
13472 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13473 case ALTIVEC_BUILTIN_STVX:
13474 case ALTIVEC_BUILTIN_STVX_V4SI:
13475 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13476 case ALTIVEC_BUILTIN_STVX_V8HI:
13477 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13478 case ALTIVEC_BUILTIN_STVX_V16QI:
13479 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13480 case ALTIVEC_BUILTIN_STVEBX:
13481 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13482 case ALTIVEC_BUILTIN_STVEHX:
13483 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13484 case ALTIVEC_BUILTIN_STVEWX:
13485 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13486 case ALTIVEC_BUILTIN_STVXL_V2DF:
13487 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13488 case ALTIVEC_BUILTIN_STVXL_V2DI:
13489 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13490 case ALTIVEC_BUILTIN_STVXL_V4SF:
13491 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13492 case ALTIVEC_BUILTIN_STVXL:
13493 case ALTIVEC_BUILTIN_STVXL_V4SI:
13494 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13495 case ALTIVEC_BUILTIN_STVXL_V8HI:
13496 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13497 case ALTIVEC_BUILTIN_STVXL_V16QI:
13498 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13500 case ALTIVEC_BUILTIN_STVLX:
13501 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13502 case ALTIVEC_BUILTIN_STVLXL:
13503 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13504 case ALTIVEC_BUILTIN_STVRX:
13505 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13506 case ALTIVEC_BUILTIN_STVRXL:
13507 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13509 case VSX_BUILTIN_STXVD2X_V1TI:
13510 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13511 case VSX_BUILTIN_STXVD2X_V2DF:
13512 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13513 case VSX_BUILTIN_STXVD2X_V2DI:
13514 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13515 case VSX_BUILTIN_STXVW4X_V4SF:
13516 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13517 case VSX_BUILTIN_STXVW4X_V4SI:
13518 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13519 case VSX_BUILTIN_STXVW4X_V8HI:
13520 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13521 case VSX_BUILTIN_STXVW4X_V16QI:
13522 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13524 case ALTIVEC_BUILTIN_MFVSCR:
13525 icode = CODE_FOR_altivec_mfvscr;
13526 tmode = insn_data[icode].operand[0].mode;
13528 if (target == 0
13529 || GET_MODE (target) != tmode
13530 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13531 target = gen_reg_rtx (tmode);
13533 pat = GEN_FCN (icode) (target);
13534 if (! pat)
13535 return 0;
13536 emit_insn (pat);
13537 return target;
13539 case ALTIVEC_BUILTIN_MTVSCR:
13540 icode = CODE_FOR_altivec_mtvscr;
13541 arg0 = CALL_EXPR_ARG (exp, 0);
13542 op0 = expand_normal (arg0);
13543 mode0 = insn_data[icode].operand[0].mode;
13545 /* If we got invalid arguments bail out before generating bad rtl. */
13546 if (arg0 == error_mark_node)
13547 return const0_rtx;
13549 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13550 op0 = copy_to_mode_reg (mode0, op0);
13552 pat = GEN_FCN (icode) (op0);
13553 if (pat)
13554 emit_insn (pat);
13555 return NULL_RTX;
13557 case ALTIVEC_BUILTIN_DSSALL:
13558 emit_insn (gen_altivec_dssall ());
13559 return NULL_RTX;
13561 case ALTIVEC_BUILTIN_DSS:
13562 icode = CODE_FOR_altivec_dss;
13563 arg0 = CALL_EXPR_ARG (exp, 0);
13564 STRIP_NOPS (arg0);
13565 op0 = expand_normal (arg0);
13566 mode0 = insn_data[icode].operand[0].mode;
13568 /* If we got invalid arguments bail out before generating bad rtl. */
13569 if (arg0 == error_mark_node)
13570 return const0_rtx;
13572 if (TREE_CODE (arg0) != INTEGER_CST
13573 || TREE_INT_CST_LOW (arg0) & ~0x3)
13575 error ("argument to dss must be a 2-bit unsigned literal");
13576 return const0_rtx;
13579 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13580 op0 = copy_to_mode_reg (mode0, op0);
13582 emit_insn (gen_altivec_dss (op0));
13583 return NULL_RTX;
13585 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13586 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13587 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13588 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13589 case VSX_BUILTIN_VEC_INIT_V2DF:
13590 case VSX_BUILTIN_VEC_INIT_V2DI:
13591 case VSX_BUILTIN_VEC_INIT_V1TI:
13592 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13594 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13595 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13596 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13597 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13598 case VSX_BUILTIN_VEC_SET_V2DF:
13599 case VSX_BUILTIN_VEC_SET_V2DI:
13600 case VSX_BUILTIN_VEC_SET_V1TI:
13601 return altivec_expand_vec_set_builtin (exp);
13603 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13604 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13605 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13606 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13607 case VSX_BUILTIN_VEC_EXT_V2DF:
13608 case VSX_BUILTIN_VEC_EXT_V2DI:
13609 case VSX_BUILTIN_VEC_EXT_V1TI:
13610 return altivec_expand_vec_ext_builtin (exp, target);
13612 default:
13613 break;
13614 /* Fall through. */
13617 /* Expand abs* operations. */
13618 d = bdesc_abs;
13619 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13620 if (d->code == fcode)
13621 return altivec_expand_abs_builtin (d->icode, exp, target);
13623 /* Expand the AltiVec predicates. */
13624 d = bdesc_altivec_preds;
13625 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13626 if (d->code == fcode)
13627 return altivec_expand_predicate_builtin (d->icode, exp, target);
13629 /* LV* are funky. We initialized them differently. */
13630 switch (fcode)
13632 case ALTIVEC_BUILTIN_LVSL:
13633 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13634 exp, target, false);
13635 case ALTIVEC_BUILTIN_LVSR:
13636 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13637 exp, target, false);
13638 case ALTIVEC_BUILTIN_LVEBX:
13639 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13640 exp, target, false);
13641 case ALTIVEC_BUILTIN_LVEHX:
13642 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13643 exp, target, false);
13644 case ALTIVEC_BUILTIN_LVEWX:
13645 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13646 exp, target, false);
13647 case ALTIVEC_BUILTIN_LVXL_V2DF:
13648 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13649 exp, target, false);
13650 case ALTIVEC_BUILTIN_LVXL_V2DI:
13651 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13652 exp, target, false);
13653 case ALTIVEC_BUILTIN_LVXL_V4SF:
13654 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13655 exp, target, false);
13656 case ALTIVEC_BUILTIN_LVXL:
13657 case ALTIVEC_BUILTIN_LVXL_V4SI:
13658 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13659 exp, target, false);
13660 case ALTIVEC_BUILTIN_LVXL_V8HI:
13661 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13662 exp, target, false);
13663 case ALTIVEC_BUILTIN_LVXL_V16QI:
13664 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13665 exp, target, false);
13666 case ALTIVEC_BUILTIN_LVX_V2DF:
13667 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13668 exp, target, false);
13669 case ALTIVEC_BUILTIN_LVX_V2DI:
13670 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13671 exp, target, false);
13672 case ALTIVEC_BUILTIN_LVX_V4SF:
13673 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13674 exp, target, false);
13675 case ALTIVEC_BUILTIN_LVX:
13676 case ALTIVEC_BUILTIN_LVX_V4SI:
13677 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13678 exp, target, false);
13679 case ALTIVEC_BUILTIN_LVX_V8HI:
13680 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13681 exp, target, false);
13682 case ALTIVEC_BUILTIN_LVX_V16QI:
13683 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13684 exp, target, false);
13685 case ALTIVEC_BUILTIN_LVLX:
13686 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13687 exp, target, true);
13688 case ALTIVEC_BUILTIN_LVLXL:
13689 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13690 exp, target, true);
13691 case ALTIVEC_BUILTIN_LVRX:
13692 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13693 exp, target, true);
13694 case ALTIVEC_BUILTIN_LVRXL:
13695 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13696 exp, target, true);
13697 case VSX_BUILTIN_LXVD2X_V1TI:
13698 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13699 exp, target, false);
13700 case VSX_BUILTIN_LXVD2X_V2DF:
13701 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13702 exp, target, false);
13703 case VSX_BUILTIN_LXVD2X_V2DI:
13704 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13705 exp, target, false);
13706 case VSX_BUILTIN_LXVW4X_V4SF:
13707 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13708 exp, target, false);
13709 case VSX_BUILTIN_LXVW4X_V4SI:
13710 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13711 exp, target, false);
13712 case VSX_BUILTIN_LXVW4X_V8HI:
13713 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13714 exp, target, false);
13715 case VSX_BUILTIN_LXVW4X_V16QI:
13716 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13717 exp, target, false);
13718 break;
13719 default:
13720 break;
13721 /* Fall through. */
13724 *expandedp = false;
13725 return NULL_RTX;
13728 /* Expand the builtin in EXP and store the result in TARGET. Store
13729 true in *EXPANDEDP if we found a builtin to expand. */
13730 static rtx
13731 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13733 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13734 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13735 const struct builtin_description *d;
13736 size_t i;
13738 *expandedp = true;
13740 switch (fcode)
13742 case PAIRED_BUILTIN_STX:
13743 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13744 case PAIRED_BUILTIN_LX:
13745 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13746 default:
13747 break;
13748 /* Fall through. */
13751 /* Expand the paired predicates. */
13752 d = bdesc_paired_preds;
13753 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13754 if (d->code == fcode)
13755 return paired_expand_predicate_builtin (d->icode, exp, target);
13757 *expandedp = false;
13758 return NULL_RTX;
13761 /* Binops that need to be initialized manually, but can be expanded
13762 automagically by rs6000_expand_binop_builtin. */
13763 static const struct builtin_description bdesc_2arg_spe[] =
13765 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13766 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13767 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13768 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13769 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13770 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13771 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13772 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13773 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13774 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13775 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13776 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13777 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13778 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13779 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13780 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13781 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13782 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13783 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13784 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13785 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13786 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13789 /* Expand the builtin in EXP and store the result in TARGET. Store
13790 true in *EXPANDEDP if we found a builtin to expand.
13792 This expands the SPE builtins that are not simple unary and binary
13793 operations. */
13794 static rtx
13795 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13797 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13798 tree arg1, arg0;
13799 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13800 enum insn_code icode;
13801 machine_mode tmode, mode0;
13802 rtx pat, op0;
13803 const struct builtin_description *d;
13804 size_t i;
13806 *expandedp = true;
13808 /* Syntax check for a 5-bit unsigned immediate. */
13809 switch (fcode)
13811 case SPE_BUILTIN_EVSTDD:
13812 case SPE_BUILTIN_EVSTDH:
13813 case SPE_BUILTIN_EVSTDW:
13814 case SPE_BUILTIN_EVSTWHE:
13815 case SPE_BUILTIN_EVSTWHO:
13816 case SPE_BUILTIN_EVSTWWE:
13817 case SPE_BUILTIN_EVSTWWO:
13818 arg1 = CALL_EXPR_ARG (exp, 2);
13819 if (TREE_CODE (arg1) != INTEGER_CST
13820 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13822 error ("argument 2 must be a 5-bit unsigned literal");
13823 return const0_rtx;
13825 break;
13826 default:
13827 break;
13830 /* The evsplat*i instructions are not quite generic. */
13831 switch (fcode)
13833 case SPE_BUILTIN_EVSPLATFI:
13834 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13835 exp, target);
13836 case SPE_BUILTIN_EVSPLATI:
13837 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13838 exp, target);
13839 default:
13840 break;
13843 d = bdesc_2arg_spe;
13844 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13845 if (d->code == fcode)
13846 return rs6000_expand_binop_builtin (d->icode, exp, target);
13848 d = bdesc_spe_predicates;
13849 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13850 if (d->code == fcode)
13851 return spe_expand_predicate_builtin (d->icode, exp, target);
13853 d = bdesc_spe_evsel;
13854 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13855 if (d->code == fcode)
13856 return spe_expand_evsel_builtin (d->icode, exp, target);
13858 switch (fcode)
13860 case SPE_BUILTIN_EVSTDDX:
13861 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13862 case SPE_BUILTIN_EVSTDHX:
13863 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13864 case SPE_BUILTIN_EVSTDWX:
13865 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13866 case SPE_BUILTIN_EVSTWHEX:
13867 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13868 case SPE_BUILTIN_EVSTWHOX:
13869 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13870 case SPE_BUILTIN_EVSTWWEX:
13871 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13872 case SPE_BUILTIN_EVSTWWOX:
13873 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13874 case SPE_BUILTIN_EVSTDD:
13875 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13876 case SPE_BUILTIN_EVSTDH:
13877 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13878 case SPE_BUILTIN_EVSTDW:
13879 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13880 case SPE_BUILTIN_EVSTWHE:
13881 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13882 case SPE_BUILTIN_EVSTWHO:
13883 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13884 case SPE_BUILTIN_EVSTWWE:
13885 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13886 case SPE_BUILTIN_EVSTWWO:
13887 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13888 case SPE_BUILTIN_MFSPEFSCR:
13889 icode = CODE_FOR_spe_mfspefscr;
13890 tmode = insn_data[icode].operand[0].mode;
13892 if (target == 0
13893 || GET_MODE (target) != tmode
13894 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13895 target = gen_reg_rtx (tmode);
13897 pat = GEN_FCN (icode) (target);
13898 if (! pat)
13899 return 0;
13900 emit_insn (pat);
13901 return target;
13902 case SPE_BUILTIN_MTSPEFSCR:
13903 icode = CODE_FOR_spe_mtspefscr;
13904 arg0 = CALL_EXPR_ARG (exp, 0);
13905 op0 = expand_normal (arg0);
13906 mode0 = insn_data[icode].operand[0].mode;
13908 if (arg0 == error_mark_node)
13909 return const0_rtx;
13911 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13912 op0 = copy_to_mode_reg (mode0, op0);
13914 pat = GEN_FCN (icode) (op0);
13915 if (pat)
13916 emit_insn (pat);
13917 return NULL_RTX;
13918 default:
13919 break;
13922 *expandedp = false;
13923 return NULL_RTX;
13926 static rtx
13927 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13929 rtx pat, scratch, tmp;
13930 tree form = CALL_EXPR_ARG (exp, 0);
13931 tree arg0 = CALL_EXPR_ARG (exp, 1);
13932 tree arg1 = CALL_EXPR_ARG (exp, 2);
13933 rtx op0 = expand_normal (arg0);
13934 rtx op1 = expand_normal (arg1);
13935 machine_mode mode0 = insn_data[icode].operand[1].mode;
13936 machine_mode mode1 = insn_data[icode].operand[2].mode;
13937 int form_int;
13938 enum rtx_code code;
13940 if (TREE_CODE (form) != INTEGER_CST)
13942 error ("argument 1 of __builtin_paired_predicate must be a constant");
13943 return const0_rtx;
13945 else
13946 form_int = TREE_INT_CST_LOW (form);
13948 gcc_assert (mode0 == mode1);
13950 if (arg0 == error_mark_node || arg1 == error_mark_node)
13951 return const0_rtx;
13953 if (target == 0
13954 || GET_MODE (target) != SImode
13955 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13956 target = gen_reg_rtx (SImode);
13957 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13958 op0 = copy_to_mode_reg (mode0, op0);
13959 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13960 op1 = copy_to_mode_reg (mode1, op1);
13962 scratch = gen_reg_rtx (CCFPmode);
13964 pat = GEN_FCN (icode) (scratch, op0, op1);
13965 if (!pat)
13966 return const0_rtx;
13968 emit_insn (pat);
13970 switch (form_int)
13972 /* LT bit. */
13973 case 0:
13974 code = LT;
13975 break;
13976 /* GT bit. */
13977 case 1:
13978 code = GT;
13979 break;
13980 /* EQ bit. */
13981 case 2:
13982 code = EQ;
13983 break;
13984 /* UN bit. */
13985 case 3:
13986 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13987 return target;
13988 default:
13989 error ("argument 1 of __builtin_paired_predicate is out of range");
13990 return const0_rtx;
13993 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13994 emit_move_insn (target, tmp);
13995 return target;
13998 static rtx
13999 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
14001 rtx pat, scratch, tmp;
14002 tree form = CALL_EXPR_ARG (exp, 0);
14003 tree arg0 = CALL_EXPR_ARG (exp, 1);
14004 tree arg1 = CALL_EXPR_ARG (exp, 2);
14005 rtx op0 = expand_normal (arg0);
14006 rtx op1 = expand_normal (arg1);
14007 machine_mode mode0 = insn_data[icode].operand[1].mode;
14008 machine_mode mode1 = insn_data[icode].operand[2].mode;
14009 int form_int;
14010 enum rtx_code code;
14012 if (TREE_CODE (form) != INTEGER_CST)
14014 error ("argument 1 of __builtin_spe_predicate must be a constant");
14015 return const0_rtx;
14017 else
14018 form_int = TREE_INT_CST_LOW (form);
14020 gcc_assert (mode0 == mode1);
14022 if (arg0 == error_mark_node || arg1 == error_mark_node)
14023 return const0_rtx;
14025 if (target == 0
14026 || GET_MODE (target) != SImode
14027 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
14028 target = gen_reg_rtx (SImode);
14030 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14031 op0 = copy_to_mode_reg (mode0, op0);
14032 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
14033 op1 = copy_to_mode_reg (mode1, op1);
14035 scratch = gen_reg_rtx (CCmode);
14037 pat = GEN_FCN (icode) (scratch, op0, op1);
14038 if (! pat)
14039 return const0_rtx;
14040 emit_insn (pat);
14042 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
14043 _lower_. We use one compare, but look in different bits of the
14044 CR for each variant.
14046 There are 2 elements in each SPE simd type (upper/lower). The CR
14047 bits are set as follows:
14049 BIT0 | BIT 1 | BIT 2 | BIT 3
14050 U | L | (U | L) | (U & L)
14052 So, for an "all" relationship, BIT 3 would be set.
14053 For an "any" relationship, BIT 2 would be set. Etc.
14055 Following traditional nomenclature, these bits map to:
14057 BIT0 | BIT 1 | BIT 2 | BIT 3
14058 LT | GT | EQ | OV
14060 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
14063 switch (form_int)
14065 /* All variant. OV bit. */
14066 case 0:
14067 /* We need to get to the OV bit, which is the ORDERED bit. We
14068 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
14069 that's ugly and will make validate_condition_mode die.
14070 So let's just use another pattern. */
14071 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
14072 return target;
14073 /* Any variant. EQ bit. */
14074 case 1:
14075 code = EQ;
14076 break;
14077 /* Upper variant. LT bit. */
14078 case 2:
14079 code = LT;
14080 break;
14081 /* Lower variant. GT bit. */
14082 case 3:
14083 code = GT;
14084 break;
14085 default:
14086 error ("argument 1 of __builtin_spe_predicate is out of range");
14087 return const0_rtx;
14090 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
14091 emit_move_insn (target, tmp);
14093 return target;
14096 /* The evsel builtins look like this:
14098 e = __builtin_spe_evsel_OP (a, b, c, d);
14100 and work like this:
14102 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
14103 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
14106 static rtx
14107 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
14109 rtx pat, scratch;
14110 tree arg0 = CALL_EXPR_ARG (exp, 0);
14111 tree arg1 = CALL_EXPR_ARG (exp, 1);
14112 tree arg2 = CALL_EXPR_ARG (exp, 2);
14113 tree arg3 = CALL_EXPR_ARG (exp, 3);
14114 rtx op0 = expand_normal (arg0);
14115 rtx op1 = expand_normal (arg1);
14116 rtx op2 = expand_normal (arg2);
14117 rtx op3 = expand_normal (arg3);
14118 machine_mode mode0 = insn_data[icode].operand[1].mode;
14119 machine_mode mode1 = insn_data[icode].operand[2].mode;
14121 gcc_assert (mode0 == mode1);
14123 if (arg0 == error_mark_node || arg1 == error_mark_node
14124 || arg2 == error_mark_node || arg3 == error_mark_node)
14125 return const0_rtx;
14127 if (target == 0
14128 || GET_MODE (target) != mode0
14129 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
14130 target = gen_reg_rtx (mode0);
14132 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
14133 op0 = copy_to_mode_reg (mode0, op0);
14134 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
14135 op1 = copy_to_mode_reg (mode0, op1);
14136 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
14137 op2 = copy_to_mode_reg (mode0, op2);
14138 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
14139 op3 = copy_to_mode_reg (mode0, op3);
14141 /* Generate the compare. */
14142 scratch = gen_reg_rtx (CCmode);
14143 pat = GEN_FCN (icode) (scratch, op0, op1);
14144 if (! pat)
14145 return const0_rtx;
14146 emit_insn (pat);
14148 if (mode0 == V2SImode)
14149 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
14150 else
14151 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
14153 return target;
14156 /* Raise an error message for a builtin function that is called without the
14157 appropriate target options being set. */
14159 static void
14160 rs6000_invalid_builtin (enum rs6000_builtins fncode)
14162 size_t uns_fncode = (size_t)fncode;
14163 const char *name = rs6000_builtin_info[uns_fncode].name;
14164 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
14166 gcc_assert (name != NULL);
14167 if ((fnmask & RS6000_BTM_CELL) != 0)
14168 error ("Builtin function %s is only valid for the cell processor", name);
14169 else if ((fnmask & RS6000_BTM_VSX) != 0)
14170 error ("Builtin function %s requires the -mvsx option", name);
14171 else if ((fnmask & RS6000_BTM_HTM) != 0)
14172 error ("Builtin function %s requires the -mhtm option", name);
14173 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
14174 error ("Builtin function %s requires the -maltivec option", name);
14175 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
14176 error ("Builtin function %s requires the -mpaired option", name);
14177 else if ((fnmask & RS6000_BTM_SPE) != 0)
14178 error ("Builtin function %s requires the -mspe option", name);
14179 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14180 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
14181 error ("Builtin function %s requires the -mhard-dfp and"
14182 " -mpower8-vector options", name);
14183 else if ((fnmask & RS6000_BTM_DFP) != 0)
14184 error ("Builtin function %s requires the -mhard-dfp option", name);
14185 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
14186 error ("Builtin function %s requires the -mpower8-vector option", name);
14187 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14188 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
14189 error ("Builtin function %s requires the -mhard-float and"
14190 " -mlong-double-128 options", name);
14191 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
14192 error ("Builtin function %s requires the -mhard-float option", name);
14193 else
14194 error ("Builtin function %s is not supported with the current options",
14195 name);
14198 /* Expand an expression EXP that calls a built-in function,
14199 with result going to TARGET if that's convenient
14200 (and in mode MODE if that's convenient).
14201 SUBTARGET may be used as the target for computing one of EXP's operands.
14202 IGNORE is nonzero if the value is to be ignored. */
14204 static rtx
14205 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
14206 machine_mode mode ATTRIBUTE_UNUSED,
14207 int ignore ATTRIBUTE_UNUSED)
14209 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
14210 enum rs6000_builtins fcode
14211 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
14212 size_t uns_fcode = (size_t)fcode;
14213 const struct builtin_description *d;
14214 size_t i;
14215 rtx ret;
14216 bool success;
14217 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
14218 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
14220 if (TARGET_DEBUG_BUILTIN)
14222 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
14223 const char *name1 = rs6000_builtin_info[uns_fcode].name;
14224 const char *name2 = ((icode != CODE_FOR_nothing)
14225 ? get_insn_name ((int)icode)
14226 : "nothing");
14227 const char *name3;
14229 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
14231 default: name3 = "unknown"; break;
14232 case RS6000_BTC_SPECIAL: name3 = "special"; break;
14233 case RS6000_BTC_UNARY: name3 = "unary"; break;
14234 case RS6000_BTC_BINARY: name3 = "binary"; break;
14235 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
14236 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
14237 case RS6000_BTC_ABS: name3 = "abs"; break;
14238 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
14239 case RS6000_BTC_DST: name3 = "dst"; break;
14243 fprintf (stderr,
14244 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
14245 (name1) ? name1 : "---", fcode,
14246 (name2) ? name2 : "---", (int)icode,
14247 name3,
14248 func_valid_p ? "" : ", not valid");
14251 if (!func_valid_p)
14253 rs6000_invalid_builtin (fcode);
14255 /* Given it is invalid, just generate a normal call. */
14256 return expand_call (exp, target, ignore);
14259 switch (fcode)
14261 case RS6000_BUILTIN_RECIP:
14262 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
14264 case RS6000_BUILTIN_RECIPF:
14265 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
14267 case RS6000_BUILTIN_RSQRTF:
14268 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
14270 case RS6000_BUILTIN_RSQRT:
14271 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
14273 case POWER7_BUILTIN_BPERMD:
14274 return rs6000_expand_binop_builtin (((TARGET_64BIT)
14275 ? CODE_FOR_bpermd_di
14276 : CODE_FOR_bpermd_si), exp, target);
14278 case RS6000_BUILTIN_GET_TB:
14279 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
14280 target);
14282 case RS6000_BUILTIN_MFTB:
14283 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
14284 ? CODE_FOR_rs6000_mftb_di
14285 : CODE_FOR_rs6000_mftb_si),
14286 target);
14288 case RS6000_BUILTIN_MFFS:
14289 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14291 case RS6000_BUILTIN_MTFSF:
14292 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14294 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14295 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14297 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14298 : (int) CODE_FOR_altivec_lvsl_direct);
14299 machine_mode tmode = insn_data[icode].operand[0].mode;
14300 machine_mode mode = insn_data[icode].operand[1].mode;
14301 tree arg;
14302 rtx op, addr, pat;
14304 gcc_assert (TARGET_ALTIVEC);
14306 arg = CALL_EXPR_ARG (exp, 0);
14307 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14308 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14309 addr = memory_address (mode, op);
14310 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14311 op = addr;
14312 else
14314 /* For the load case need to negate the address. */
14315 op = gen_reg_rtx (GET_MODE (addr));
14316 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
14318 op = gen_rtx_MEM (mode, op);
14320 if (target == 0
14321 || GET_MODE (target) != tmode
14322 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14323 target = gen_reg_rtx (tmode);
14325 pat = GEN_FCN (icode) (target, op);
14326 if (!pat)
14327 return 0;
14328 emit_insn (pat);
14330 return target;
14333 case ALTIVEC_BUILTIN_VCFUX:
14334 case ALTIVEC_BUILTIN_VCFSX:
14335 case ALTIVEC_BUILTIN_VCTUXS:
14336 case ALTIVEC_BUILTIN_VCTSXS:
14337 /* FIXME: There's got to be a nicer way to handle this case than
14338 constructing a new CALL_EXPR. */
14339 if (call_expr_nargs (exp) == 1)
14341 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14342 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14344 break;
14346 default:
14347 break;
14350 if (TARGET_ALTIVEC)
14352 ret = altivec_expand_builtin (exp, target, &success);
14354 if (success)
14355 return ret;
14357 if (TARGET_SPE)
14359 ret = spe_expand_builtin (exp, target, &success);
14361 if (success)
14362 return ret;
14364 if (TARGET_PAIRED_FLOAT)
14366 ret = paired_expand_builtin (exp, target, &success);
14368 if (success)
14369 return ret;
14371 if (TARGET_HTM)
14373 ret = htm_expand_builtin (exp, target, &success);
14375 if (success)
14376 return ret;
14379 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14380 gcc_assert (attr == RS6000_BTC_UNARY
14381 || attr == RS6000_BTC_BINARY
14382 || attr == RS6000_BTC_TERNARY);
14384 /* Handle simple unary operations. */
14385 d = bdesc_1arg;
14386 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14387 if (d->code == fcode)
14388 return rs6000_expand_unop_builtin (d->icode, exp, target);
14390 /* Handle simple binary operations. */
14391 d = bdesc_2arg;
14392 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14393 if (d->code == fcode)
14394 return rs6000_expand_binop_builtin (d->icode, exp, target);
14396 /* Handle simple ternary operations. */
14397 d = bdesc_3arg;
14398 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14399 if (d->code == fcode)
14400 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14402 gcc_unreachable ();
14405 static void
14406 rs6000_init_builtins (void)
14408 tree tdecl;
14409 tree ftype;
14410 machine_mode mode;
14411 machine_mode ieee128_mode;
14412 machine_mode ibm128_mode;
14414 if (TARGET_DEBUG_BUILTIN)
14415 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14416 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14417 (TARGET_SPE) ? ", spe" : "",
14418 (TARGET_ALTIVEC) ? ", altivec" : "",
14419 (TARGET_VSX) ? ", vsx" : "");
14421 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14422 V2SF_type_node = build_vector_type (float_type_node, 2);
14423 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14424 V2DF_type_node = build_vector_type (double_type_node, 2);
14425 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14426 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14427 V4SF_type_node = build_vector_type (float_type_node, 4);
14428 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14429 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14431 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14432 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14433 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14434 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14436 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14437 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14438 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14439 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14441 /* We use V1TI mode as a special container to hold __int128_t items that
14442 must live in VSX registers. */
14443 if (intTI_type_node)
14445 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14446 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14449 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14450 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14451 'vector unsigned short'. */
14453 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14454 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14455 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14456 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14457 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14459 long_integer_type_internal_node = long_integer_type_node;
14460 long_unsigned_type_internal_node = long_unsigned_type_node;
14461 long_long_integer_type_internal_node = long_long_integer_type_node;
14462 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14463 intQI_type_internal_node = intQI_type_node;
14464 uintQI_type_internal_node = unsigned_intQI_type_node;
14465 intHI_type_internal_node = intHI_type_node;
14466 uintHI_type_internal_node = unsigned_intHI_type_node;
14467 intSI_type_internal_node = intSI_type_node;
14468 uintSI_type_internal_node = unsigned_intSI_type_node;
14469 intDI_type_internal_node = intDI_type_node;
14470 uintDI_type_internal_node = unsigned_intDI_type_node;
14471 intTI_type_internal_node = intTI_type_node;
14472 uintTI_type_internal_node = unsigned_intTI_type_node;
14473 float_type_internal_node = float_type_node;
14474 double_type_internal_node = double_type_node;
14475 long_double_type_internal_node = long_double_type_node;
14476 dfloat64_type_internal_node = dfloat64_type_node;
14477 dfloat128_type_internal_node = dfloat128_type_node;
14478 void_type_internal_node = void_type_node;
14480 /* 128-bit floating point support. KFmode is IEEE 128-bit floating point.
14481 IFmode is the IBM extended 128-bit format that is a pair of doubles.
14482 TFmode will be either IEEE 128-bit floating point or the IBM double-double
14483 format that uses a pair of doubles, depending on the switches and
14484 defaults. */
14485 if (TARGET_IEEEQUAD)
14487 ieee128_mode = TFmode;
14488 ibm128_mode = IFmode;
14490 else
14492 ieee128_mode = KFmode;
14493 ibm128_mode = TFmode;
14496 ieee128_float_type_node = make_node (REAL_TYPE);
14497 TYPE_PRECISION (ieee128_float_type_node) = 128;
14498 layout_type (ieee128_float_type_node);
14499 SET_TYPE_MODE (ieee128_float_type_node, ieee128_mode);
14501 ibm128_float_type_node = make_node (REAL_TYPE);
14502 TYPE_PRECISION (ibm128_float_type_node) = 128;
14503 layout_type (ibm128_float_type_node);
14504 SET_TYPE_MODE (ibm128_float_type_node, ibm128_mode);
14506 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14507 tree type node. */
14508 builtin_mode_to_type[QImode][0] = integer_type_node;
14509 builtin_mode_to_type[HImode][0] = integer_type_node;
14510 builtin_mode_to_type[SImode][0] = intSI_type_node;
14511 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14512 builtin_mode_to_type[DImode][0] = intDI_type_node;
14513 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14514 builtin_mode_to_type[TImode][0] = intTI_type_node;
14515 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14516 builtin_mode_to_type[SFmode][0] = float_type_node;
14517 builtin_mode_to_type[DFmode][0] = double_type_node;
14518 builtin_mode_to_type[IFmode][0] = ibm128_float_type_node;
14519 builtin_mode_to_type[KFmode][0] = ieee128_float_type_node;
14520 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14521 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14522 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14523 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14524 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14525 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14526 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14527 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14528 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14529 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14530 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14531 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14532 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14533 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14534 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14535 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14536 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14537 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14539 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14540 TYPE_NAME (bool_char_type_node) = tdecl;
14542 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14543 TYPE_NAME (bool_short_type_node) = tdecl;
14545 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14546 TYPE_NAME (bool_int_type_node) = tdecl;
14548 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14549 TYPE_NAME (pixel_type_node) = tdecl;
14551 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14552 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14553 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14554 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14555 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14557 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14558 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14560 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14561 TYPE_NAME (V16QI_type_node) = tdecl;
14563 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14564 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14566 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14567 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14569 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14570 TYPE_NAME (V8HI_type_node) = tdecl;
14572 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14573 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14575 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14576 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14578 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14579 TYPE_NAME (V4SI_type_node) = tdecl;
14581 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14582 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14584 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14585 TYPE_NAME (V4SF_type_node) = tdecl;
14587 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14588 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14590 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14591 TYPE_NAME (V2DF_type_node) = tdecl;
14593 if (TARGET_POWERPC64)
14595 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14596 TYPE_NAME (V2DI_type_node) = tdecl;
14598 tdecl = add_builtin_type ("__vector unsigned long",
14599 unsigned_V2DI_type_node);
14600 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14602 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14603 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14605 else
14607 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14608 TYPE_NAME (V2DI_type_node) = tdecl;
14610 tdecl = add_builtin_type ("__vector unsigned long long",
14611 unsigned_V2DI_type_node);
14612 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14614 tdecl = add_builtin_type ("__vector __bool long long",
14615 bool_V2DI_type_node);
14616 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14619 if (V1TI_type_node)
14621 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14622 TYPE_NAME (V1TI_type_node) = tdecl;
14624 tdecl = add_builtin_type ("__vector unsigned __int128",
14625 unsigned_V1TI_type_node);
14626 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14629 /* Paired and SPE builtins are only available if you build a compiler with
14630 the appropriate options, so only create those builtins with the
14631 appropriate compiler option. Create Altivec and VSX builtins on machines
14632 with at least the general purpose extensions (970 and newer) to allow the
14633 use of the target attribute. */
14634 if (TARGET_PAIRED_FLOAT)
14635 paired_init_builtins ();
14636 if (TARGET_SPE)
14637 spe_init_builtins ();
14638 if (TARGET_EXTRA_BUILTINS)
14639 altivec_init_builtins ();
14640 if (TARGET_HTM)
14641 htm_init_builtins ();
14643 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14644 rs6000_common_init_builtins ();
14646 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14647 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14648 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14650 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14651 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14652 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14654 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14655 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14656 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14658 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14659 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14660 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14662 mode = (TARGET_64BIT) ? DImode : SImode;
14663 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14664 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14665 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14667 ftype = build_function_type_list (unsigned_intDI_type_node,
14668 NULL_TREE);
14669 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14671 if (TARGET_64BIT)
14672 ftype = build_function_type_list (unsigned_intDI_type_node,
14673 NULL_TREE);
14674 else
14675 ftype = build_function_type_list (unsigned_intSI_type_node,
14676 NULL_TREE);
14677 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14679 ftype = build_function_type_list (double_type_node, NULL_TREE);
14680 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14682 ftype = build_function_type_list (void_type_node,
14683 intSI_type_node, double_type_node,
14684 NULL_TREE);
14685 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14687 #if TARGET_XCOFF
14688 /* AIX libm provides clog as __clog. */
14689 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14690 set_user_assembler_name (tdecl, "__clog");
14691 #endif
14693 #ifdef SUBTARGET_INIT_BUILTINS
14694 SUBTARGET_INIT_BUILTINS;
14695 #endif
14698 /* Returns the rs6000 builtin decl for CODE. */
14700 static tree
14701 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14703 HOST_WIDE_INT fnmask;
14705 if (code >= RS6000_BUILTIN_COUNT)
14706 return error_mark_node;
14708 fnmask = rs6000_builtin_info[code].mask;
14709 if ((fnmask & rs6000_builtin_mask) != fnmask)
14711 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14712 return error_mark_node;
14715 return rs6000_builtin_decls[code];
14718 static void
14719 spe_init_builtins (void)
14721 tree puint_type_node = build_pointer_type (unsigned_type_node);
14722 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14723 const struct builtin_description *d;
14724 size_t i;
14726 tree v2si_ftype_4_v2si
14727 = build_function_type_list (opaque_V2SI_type_node,
14728 opaque_V2SI_type_node,
14729 opaque_V2SI_type_node,
14730 opaque_V2SI_type_node,
14731 opaque_V2SI_type_node,
14732 NULL_TREE);
14734 tree v2sf_ftype_4_v2sf
14735 = build_function_type_list (opaque_V2SF_type_node,
14736 opaque_V2SF_type_node,
14737 opaque_V2SF_type_node,
14738 opaque_V2SF_type_node,
14739 opaque_V2SF_type_node,
14740 NULL_TREE);
14742 tree int_ftype_int_v2si_v2si
14743 = build_function_type_list (integer_type_node,
14744 integer_type_node,
14745 opaque_V2SI_type_node,
14746 opaque_V2SI_type_node,
14747 NULL_TREE);
14749 tree int_ftype_int_v2sf_v2sf
14750 = build_function_type_list (integer_type_node,
14751 integer_type_node,
14752 opaque_V2SF_type_node,
14753 opaque_V2SF_type_node,
14754 NULL_TREE);
14756 tree void_ftype_v2si_puint_int
14757 = build_function_type_list (void_type_node,
14758 opaque_V2SI_type_node,
14759 puint_type_node,
14760 integer_type_node,
14761 NULL_TREE);
14763 tree void_ftype_v2si_puint_char
14764 = build_function_type_list (void_type_node,
14765 opaque_V2SI_type_node,
14766 puint_type_node,
14767 char_type_node,
14768 NULL_TREE);
14770 tree void_ftype_v2si_pv2si_int
14771 = build_function_type_list (void_type_node,
14772 opaque_V2SI_type_node,
14773 opaque_p_V2SI_type_node,
14774 integer_type_node,
14775 NULL_TREE);
14777 tree void_ftype_v2si_pv2si_char
14778 = build_function_type_list (void_type_node,
14779 opaque_V2SI_type_node,
14780 opaque_p_V2SI_type_node,
14781 char_type_node,
14782 NULL_TREE);
14784 tree void_ftype_int
14785 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14787 tree int_ftype_void
14788 = build_function_type_list (integer_type_node, NULL_TREE);
14790 tree v2si_ftype_pv2si_int
14791 = build_function_type_list (opaque_V2SI_type_node,
14792 opaque_p_V2SI_type_node,
14793 integer_type_node,
14794 NULL_TREE);
14796 tree v2si_ftype_puint_int
14797 = build_function_type_list (opaque_V2SI_type_node,
14798 puint_type_node,
14799 integer_type_node,
14800 NULL_TREE);
14802 tree v2si_ftype_pushort_int
14803 = build_function_type_list (opaque_V2SI_type_node,
14804 pushort_type_node,
14805 integer_type_node,
14806 NULL_TREE);
14808 tree v2si_ftype_signed_char
14809 = build_function_type_list (opaque_V2SI_type_node,
14810 signed_char_type_node,
14811 NULL_TREE);
14813 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14815 /* Initialize irregular SPE builtins. */
14817 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14818 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14819 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14820 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14821 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14822 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14823 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14824 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14825 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14826 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14827 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14828 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14829 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14830 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14831 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14832 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14833 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14834 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14836 /* Loads. */
14837 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14838 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14839 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14840 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14841 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14842 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14843 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14844 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14845 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14846 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14847 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14848 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14849 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14850 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14851 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14852 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14853 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14854 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14855 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14856 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14857 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14858 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14860 /* Predicates. */
14861 d = bdesc_spe_predicates;
14862 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14864 tree type;
14866 switch (insn_data[d->icode].operand[1].mode)
14868 case V2SImode:
14869 type = int_ftype_int_v2si_v2si;
14870 break;
14871 case V2SFmode:
14872 type = int_ftype_int_v2sf_v2sf;
14873 break;
14874 default:
14875 gcc_unreachable ();
14878 def_builtin (d->name, type, d->code);
14881 /* Evsel predicates. */
14882 d = bdesc_spe_evsel;
14883 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14885 tree type;
14887 switch (insn_data[d->icode].operand[1].mode)
14889 case V2SImode:
14890 type = v2si_ftype_4_v2si;
14891 break;
14892 case V2SFmode:
14893 type = v2sf_ftype_4_v2sf;
14894 break;
14895 default:
14896 gcc_unreachable ();
14899 def_builtin (d->name, type, d->code);
14903 static void
14904 paired_init_builtins (void)
14906 const struct builtin_description *d;
14907 size_t i;
14909 tree int_ftype_int_v2sf_v2sf
14910 = build_function_type_list (integer_type_node,
14911 integer_type_node,
14912 V2SF_type_node,
14913 V2SF_type_node,
14914 NULL_TREE);
14915 tree pcfloat_type_node =
14916 build_pointer_type (build_qualified_type
14917 (float_type_node, TYPE_QUAL_CONST));
14919 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14920 long_integer_type_node,
14921 pcfloat_type_node,
14922 NULL_TREE);
14923 tree void_ftype_v2sf_long_pcfloat =
14924 build_function_type_list (void_type_node,
14925 V2SF_type_node,
14926 long_integer_type_node,
14927 pcfloat_type_node,
14928 NULL_TREE);
14931 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14932 PAIRED_BUILTIN_LX);
14935 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14936 PAIRED_BUILTIN_STX);
14938 /* Predicates. */
14939 d = bdesc_paired_preds;
14940 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14942 tree type;
14944 if (TARGET_DEBUG_BUILTIN)
14945 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14946 (int)i, get_insn_name (d->icode), (int)d->icode,
14947 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14949 switch (insn_data[d->icode].operand[1].mode)
14951 case V2SFmode:
14952 type = int_ftype_int_v2sf_v2sf;
14953 break;
14954 default:
14955 gcc_unreachable ();
14958 def_builtin (d->name, type, d->code);
14962 static void
14963 altivec_init_builtins (void)
14965 const struct builtin_description *d;
14966 size_t i;
14967 tree ftype;
14968 tree decl;
14970 tree pvoid_type_node = build_pointer_type (void_type_node);
14972 tree pcvoid_type_node
14973 = build_pointer_type (build_qualified_type (void_type_node,
14974 TYPE_QUAL_CONST));
14976 tree int_ftype_opaque
14977 = build_function_type_list (integer_type_node,
14978 opaque_V4SI_type_node, NULL_TREE);
14979 tree opaque_ftype_opaque
14980 = build_function_type_list (integer_type_node, NULL_TREE);
14981 tree opaque_ftype_opaque_int
14982 = build_function_type_list (opaque_V4SI_type_node,
14983 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14984 tree opaque_ftype_opaque_opaque_int
14985 = build_function_type_list (opaque_V4SI_type_node,
14986 opaque_V4SI_type_node, opaque_V4SI_type_node,
14987 integer_type_node, NULL_TREE);
14988 tree int_ftype_int_opaque_opaque
14989 = build_function_type_list (integer_type_node,
14990 integer_type_node, opaque_V4SI_type_node,
14991 opaque_V4SI_type_node, NULL_TREE);
14992 tree int_ftype_int_v4si_v4si
14993 = build_function_type_list (integer_type_node,
14994 integer_type_node, V4SI_type_node,
14995 V4SI_type_node, NULL_TREE);
14996 tree int_ftype_int_v2di_v2di
14997 = build_function_type_list (integer_type_node,
14998 integer_type_node, V2DI_type_node,
14999 V2DI_type_node, NULL_TREE);
15000 tree void_ftype_v4si
15001 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
15002 tree v8hi_ftype_void
15003 = build_function_type_list (V8HI_type_node, NULL_TREE);
15004 tree void_ftype_void
15005 = build_function_type_list (void_type_node, NULL_TREE);
15006 tree void_ftype_int
15007 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
15009 tree opaque_ftype_long_pcvoid
15010 = build_function_type_list (opaque_V4SI_type_node,
15011 long_integer_type_node, pcvoid_type_node,
15012 NULL_TREE);
15013 tree v16qi_ftype_long_pcvoid
15014 = build_function_type_list (V16QI_type_node,
15015 long_integer_type_node, pcvoid_type_node,
15016 NULL_TREE);
15017 tree v8hi_ftype_long_pcvoid
15018 = build_function_type_list (V8HI_type_node,
15019 long_integer_type_node, pcvoid_type_node,
15020 NULL_TREE);
15021 tree v4si_ftype_long_pcvoid
15022 = build_function_type_list (V4SI_type_node,
15023 long_integer_type_node, pcvoid_type_node,
15024 NULL_TREE);
15025 tree v4sf_ftype_long_pcvoid
15026 = build_function_type_list (V4SF_type_node,
15027 long_integer_type_node, pcvoid_type_node,
15028 NULL_TREE);
15029 tree v2df_ftype_long_pcvoid
15030 = build_function_type_list (V2DF_type_node,
15031 long_integer_type_node, pcvoid_type_node,
15032 NULL_TREE);
15033 tree v2di_ftype_long_pcvoid
15034 = build_function_type_list (V2DI_type_node,
15035 long_integer_type_node, pcvoid_type_node,
15036 NULL_TREE);
15038 tree void_ftype_opaque_long_pvoid
15039 = build_function_type_list (void_type_node,
15040 opaque_V4SI_type_node, long_integer_type_node,
15041 pvoid_type_node, NULL_TREE);
15042 tree void_ftype_v4si_long_pvoid
15043 = build_function_type_list (void_type_node,
15044 V4SI_type_node, long_integer_type_node,
15045 pvoid_type_node, NULL_TREE);
15046 tree void_ftype_v16qi_long_pvoid
15047 = build_function_type_list (void_type_node,
15048 V16QI_type_node, long_integer_type_node,
15049 pvoid_type_node, NULL_TREE);
15050 tree void_ftype_v8hi_long_pvoid
15051 = build_function_type_list (void_type_node,
15052 V8HI_type_node, long_integer_type_node,
15053 pvoid_type_node, NULL_TREE);
15054 tree void_ftype_v4sf_long_pvoid
15055 = build_function_type_list (void_type_node,
15056 V4SF_type_node, long_integer_type_node,
15057 pvoid_type_node, NULL_TREE);
15058 tree void_ftype_v2df_long_pvoid
15059 = build_function_type_list (void_type_node,
15060 V2DF_type_node, long_integer_type_node,
15061 pvoid_type_node, NULL_TREE);
15062 tree void_ftype_v2di_long_pvoid
15063 = build_function_type_list (void_type_node,
15064 V2DI_type_node, long_integer_type_node,
15065 pvoid_type_node, NULL_TREE);
15066 tree int_ftype_int_v8hi_v8hi
15067 = build_function_type_list (integer_type_node,
15068 integer_type_node, V8HI_type_node,
15069 V8HI_type_node, NULL_TREE);
15070 tree int_ftype_int_v16qi_v16qi
15071 = build_function_type_list (integer_type_node,
15072 integer_type_node, V16QI_type_node,
15073 V16QI_type_node, NULL_TREE);
15074 tree int_ftype_int_v4sf_v4sf
15075 = build_function_type_list (integer_type_node,
15076 integer_type_node, V4SF_type_node,
15077 V4SF_type_node, NULL_TREE);
15078 tree int_ftype_int_v2df_v2df
15079 = build_function_type_list (integer_type_node,
15080 integer_type_node, V2DF_type_node,
15081 V2DF_type_node, NULL_TREE);
15082 tree v2di_ftype_v2di
15083 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
15084 tree v4si_ftype_v4si
15085 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
15086 tree v8hi_ftype_v8hi
15087 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
15088 tree v16qi_ftype_v16qi
15089 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
15090 tree v4sf_ftype_v4sf
15091 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
15092 tree v2df_ftype_v2df
15093 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
15094 tree void_ftype_pcvoid_int_int
15095 = build_function_type_list (void_type_node,
15096 pcvoid_type_node, integer_type_node,
15097 integer_type_node, NULL_TREE);
15099 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
15100 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
15101 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
15102 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
15103 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
15104 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
15105 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
15106 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
15107 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
15108 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
15109 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
15110 ALTIVEC_BUILTIN_LVXL_V2DF);
15111 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
15112 ALTIVEC_BUILTIN_LVXL_V2DI);
15113 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
15114 ALTIVEC_BUILTIN_LVXL_V4SF);
15115 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
15116 ALTIVEC_BUILTIN_LVXL_V4SI);
15117 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
15118 ALTIVEC_BUILTIN_LVXL_V8HI);
15119 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
15120 ALTIVEC_BUILTIN_LVXL_V16QI);
15121 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
15122 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
15123 ALTIVEC_BUILTIN_LVX_V2DF);
15124 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
15125 ALTIVEC_BUILTIN_LVX_V2DI);
15126 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
15127 ALTIVEC_BUILTIN_LVX_V4SF);
15128 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
15129 ALTIVEC_BUILTIN_LVX_V4SI);
15130 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
15131 ALTIVEC_BUILTIN_LVX_V8HI);
15132 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
15133 ALTIVEC_BUILTIN_LVX_V16QI);
15134 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
15135 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
15136 ALTIVEC_BUILTIN_STVX_V2DF);
15137 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
15138 ALTIVEC_BUILTIN_STVX_V2DI);
15139 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
15140 ALTIVEC_BUILTIN_STVX_V4SF);
15141 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
15142 ALTIVEC_BUILTIN_STVX_V4SI);
15143 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
15144 ALTIVEC_BUILTIN_STVX_V8HI);
15145 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
15146 ALTIVEC_BUILTIN_STVX_V16QI);
15147 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
15148 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
15149 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
15150 ALTIVEC_BUILTIN_STVXL_V2DF);
15151 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
15152 ALTIVEC_BUILTIN_STVXL_V2DI);
15153 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
15154 ALTIVEC_BUILTIN_STVXL_V4SF);
15155 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
15156 ALTIVEC_BUILTIN_STVXL_V4SI);
15157 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
15158 ALTIVEC_BUILTIN_STVXL_V8HI);
15159 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
15160 ALTIVEC_BUILTIN_STVXL_V16QI);
15161 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
15162 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
15163 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
15164 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
15165 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
15166 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
15167 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
15168 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
15169 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
15170 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
15171 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
15172 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
15173 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
15174 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
15175 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
15176 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
15178 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
15179 VSX_BUILTIN_LXVD2X_V2DF);
15180 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
15181 VSX_BUILTIN_LXVD2X_V2DI);
15182 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
15183 VSX_BUILTIN_LXVW4X_V4SF);
15184 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
15185 VSX_BUILTIN_LXVW4X_V4SI);
15186 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
15187 VSX_BUILTIN_LXVW4X_V8HI);
15188 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
15189 VSX_BUILTIN_LXVW4X_V16QI);
15190 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
15191 VSX_BUILTIN_STXVD2X_V2DF);
15192 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
15193 VSX_BUILTIN_STXVD2X_V2DI);
15194 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
15195 VSX_BUILTIN_STXVW4X_V4SF);
15196 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
15197 VSX_BUILTIN_STXVW4X_V4SI);
15198 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
15199 VSX_BUILTIN_STXVW4X_V8HI);
15200 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
15201 VSX_BUILTIN_STXVW4X_V16QI);
15202 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
15203 VSX_BUILTIN_VEC_LD);
15204 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
15205 VSX_BUILTIN_VEC_ST);
15207 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
15208 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
15209 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
15211 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
15212 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
15213 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
15214 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
15215 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
15216 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
15217 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
15218 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
15219 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
15220 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
15221 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
15222 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
15224 /* Cell builtins. */
15225 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
15226 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
15227 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
15228 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
15230 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
15231 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
15232 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
15233 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
15235 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
15236 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
15237 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
15238 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
15240 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
15241 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
15242 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
15243 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
15245 /* Add the DST variants. */
15246 d = bdesc_dst;
15247 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
15248 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
15250 /* Initialize the predicates. */
15251 d = bdesc_altivec_preds;
15252 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
15254 machine_mode mode1;
15255 tree type;
15257 if (rs6000_overloaded_builtin_p (d->code))
15258 mode1 = VOIDmode;
15259 else
15260 mode1 = insn_data[d->icode].operand[1].mode;
15262 switch (mode1)
15264 case VOIDmode:
15265 type = int_ftype_int_opaque_opaque;
15266 break;
15267 case V2DImode:
15268 type = int_ftype_int_v2di_v2di;
15269 break;
15270 case V4SImode:
15271 type = int_ftype_int_v4si_v4si;
15272 break;
15273 case V8HImode:
15274 type = int_ftype_int_v8hi_v8hi;
15275 break;
15276 case V16QImode:
15277 type = int_ftype_int_v16qi_v16qi;
15278 break;
15279 case V4SFmode:
15280 type = int_ftype_int_v4sf_v4sf;
15281 break;
15282 case V2DFmode:
15283 type = int_ftype_int_v2df_v2df;
15284 break;
15285 default:
15286 gcc_unreachable ();
15289 def_builtin (d->name, type, d->code);
15292 /* Initialize the abs* operators. */
15293 d = bdesc_abs;
15294 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
15296 machine_mode mode0;
15297 tree type;
15299 mode0 = insn_data[d->icode].operand[0].mode;
15301 switch (mode0)
15303 case V2DImode:
15304 type = v2di_ftype_v2di;
15305 break;
15306 case V4SImode:
15307 type = v4si_ftype_v4si;
15308 break;
15309 case V8HImode:
15310 type = v8hi_ftype_v8hi;
15311 break;
15312 case V16QImode:
15313 type = v16qi_ftype_v16qi;
15314 break;
15315 case V4SFmode:
15316 type = v4sf_ftype_v4sf;
15317 break;
15318 case V2DFmode:
15319 type = v2df_ftype_v2df;
15320 break;
15321 default:
15322 gcc_unreachable ();
15325 def_builtin (d->name, type, d->code);
15328 /* Initialize target builtin that implements
15329 targetm.vectorize.builtin_mask_for_load. */
15331 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15332 v16qi_ftype_long_pcvoid,
15333 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15334 BUILT_IN_MD, NULL, NULL_TREE);
15335 TREE_READONLY (decl) = 1;
15336 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15337 altivec_builtin_mask_for_load = decl;
15339 /* Access to the vec_init patterns. */
15340 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15341 integer_type_node, integer_type_node,
15342 integer_type_node, NULL_TREE);
15343 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15345 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15346 short_integer_type_node,
15347 short_integer_type_node,
15348 short_integer_type_node,
15349 short_integer_type_node,
15350 short_integer_type_node,
15351 short_integer_type_node,
15352 short_integer_type_node, NULL_TREE);
15353 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15355 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15356 char_type_node, char_type_node,
15357 char_type_node, char_type_node,
15358 char_type_node, char_type_node,
15359 char_type_node, char_type_node,
15360 char_type_node, char_type_node,
15361 char_type_node, char_type_node,
15362 char_type_node, char_type_node,
15363 char_type_node, NULL_TREE);
15364 def_builtin ("__builtin_vec_init_v16qi", ftype,
15365 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15367 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15368 float_type_node, float_type_node,
15369 float_type_node, NULL_TREE);
15370 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15372 /* VSX builtins. */
15373 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15374 double_type_node, NULL_TREE);
15375 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15377 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15378 intDI_type_node, NULL_TREE);
15379 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15381 /* Access to the vec_set patterns. */
15382 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15383 intSI_type_node,
15384 integer_type_node, NULL_TREE);
15385 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15387 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15388 intHI_type_node,
15389 integer_type_node, NULL_TREE);
15390 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15392 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15393 intQI_type_node,
15394 integer_type_node, NULL_TREE);
15395 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15397 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15398 float_type_node,
15399 integer_type_node, NULL_TREE);
15400 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15402 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15403 double_type_node,
15404 integer_type_node, NULL_TREE);
15405 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15407 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15408 intDI_type_node,
15409 integer_type_node, NULL_TREE);
15410 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15412 /* Access to the vec_extract patterns. */
15413 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15414 integer_type_node, NULL_TREE);
15415 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15417 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15418 integer_type_node, NULL_TREE);
15419 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15421 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15422 integer_type_node, NULL_TREE);
15423 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15425 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15426 integer_type_node, NULL_TREE);
15427 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15429 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15430 integer_type_node, NULL_TREE);
15431 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15433 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15434 integer_type_node, NULL_TREE);
15435 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15438 if (V1TI_type_node)
15440 tree v1ti_ftype_long_pcvoid
15441 = build_function_type_list (V1TI_type_node,
15442 long_integer_type_node, pcvoid_type_node,
15443 NULL_TREE);
15444 tree void_ftype_v1ti_long_pvoid
15445 = build_function_type_list (void_type_node,
15446 V1TI_type_node, long_integer_type_node,
15447 pvoid_type_node, NULL_TREE);
15448 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15449 VSX_BUILTIN_LXVD2X_V1TI);
15450 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15451 VSX_BUILTIN_STXVD2X_V1TI);
15452 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15453 NULL_TREE, NULL_TREE);
15454 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15455 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15456 intTI_type_node,
15457 integer_type_node, NULL_TREE);
15458 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15459 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15460 integer_type_node, NULL_TREE);
15461 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15466 static void
15467 htm_init_builtins (void)
15469 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15470 const struct builtin_description *d;
15471 size_t i;
15473 d = bdesc_htm;
15474 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15476 tree op[MAX_HTM_OPERANDS], type;
15477 HOST_WIDE_INT mask = d->mask;
15478 unsigned attr = rs6000_builtin_info[d->code].attr;
15479 bool void_func = (attr & RS6000_BTC_VOID);
15480 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15481 int nopnds = 0;
15482 tree gpr_type_node;
15483 tree rettype;
15484 tree argtype;
15486 if (TARGET_32BIT && TARGET_POWERPC64)
15487 gpr_type_node = long_long_unsigned_type_node;
15488 else
15489 gpr_type_node = long_unsigned_type_node;
15491 if (attr & RS6000_BTC_SPR)
15493 rettype = gpr_type_node;
15494 argtype = gpr_type_node;
15496 else if (d->code == HTM_BUILTIN_TABORTDC
15497 || d->code == HTM_BUILTIN_TABORTDCI)
15499 rettype = unsigned_type_node;
15500 argtype = gpr_type_node;
15502 else
15504 rettype = unsigned_type_node;
15505 argtype = unsigned_type_node;
15508 if ((mask & builtin_mask) != mask)
15510 if (TARGET_DEBUG_BUILTIN)
15511 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15512 continue;
15515 if (d->name == 0)
15517 if (TARGET_DEBUG_BUILTIN)
15518 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15519 (long unsigned) i);
15520 continue;
15523 op[nopnds++] = (void_func) ? void_type_node : rettype;
15525 if (attr_args == RS6000_BTC_UNARY)
15526 op[nopnds++] = argtype;
15527 else if (attr_args == RS6000_BTC_BINARY)
15529 op[nopnds++] = argtype;
15530 op[nopnds++] = argtype;
15532 else if (attr_args == RS6000_BTC_TERNARY)
15534 op[nopnds++] = argtype;
15535 op[nopnds++] = argtype;
15536 op[nopnds++] = argtype;
15539 switch (nopnds)
15541 case 1:
15542 type = build_function_type_list (op[0], NULL_TREE);
15543 break;
15544 case 2:
15545 type = build_function_type_list (op[0], op[1], NULL_TREE);
15546 break;
15547 case 3:
15548 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15549 break;
15550 case 4:
15551 type = build_function_type_list (op[0], op[1], op[2], op[3],
15552 NULL_TREE);
15553 break;
15554 default:
15555 gcc_unreachable ();
15558 def_builtin (d->name, type, d->code);
15562 /* Hash function for builtin functions with up to 3 arguments and a return
15563 type. */
15564 hashval_t
15565 builtin_hasher::hash (builtin_hash_struct *bh)
15567 unsigned ret = 0;
15568 int i;
15570 for (i = 0; i < 4; i++)
15572 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15573 ret = (ret * 2) + bh->uns_p[i];
15576 return ret;
15579 /* Compare builtin hash entries H1 and H2 for equivalence. */
15580 bool
15581 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15583 return ((p1->mode[0] == p2->mode[0])
15584 && (p1->mode[1] == p2->mode[1])
15585 && (p1->mode[2] == p2->mode[2])
15586 && (p1->mode[3] == p2->mode[3])
15587 && (p1->uns_p[0] == p2->uns_p[0])
15588 && (p1->uns_p[1] == p2->uns_p[1])
15589 && (p1->uns_p[2] == p2->uns_p[2])
15590 && (p1->uns_p[3] == p2->uns_p[3]));
15593 /* Map types for builtin functions with an explicit return type and up to 3
15594 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15595 of the argument. */
15596 static tree
15597 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15598 machine_mode mode_arg1, machine_mode mode_arg2,
15599 enum rs6000_builtins builtin, const char *name)
15601 struct builtin_hash_struct h;
15602 struct builtin_hash_struct *h2;
15603 int num_args = 3;
15604 int i;
15605 tree ret_type = NULL_TREE;
15606 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15608 /* Create builtin_hash_table. */
15609 if (builtin_hash_table == NULL)
15610 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15612 h.type = NULL_TREE;
15613 h.mode[0] = mode_ret;
15614 h.mode[1] = mode_arg0;
15615 h.mode[2] = mode_arg1;
15616 h.mode[3] = mode_arg2;
15617 h.uns_p[0] = 0;
15618 h.uns_p[1] = 0;
15619 h.uns_p[2] = 0;
15620 h.uns_p[3] = 0;
15622 /* If the builtin is a type that produces unsigned results or takes unsigned
15623 arguments, and it is returned as a decl for the vectorizer (such as
15624 widening multiplies, permute), make sure the arguments and return value
15625 are type correct. */
15626 switch (builtin)
15628 /* unsigned 1 argument functions. */
15629 case CRYPTO_BUILTIN_VSBOX:
15630 case P8V_BUILTIN_VGBBD:
15631 case MISC_BUILTIN_CDTBCD:
15632 case MISC_BUILTIN_CBCDTD:
15633 h.uns_p[0] = 1;
15634 h.uns_p[1] = 1;
15635 break;
15637 /* unsigned 2 argument functions. */
15638 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15639 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15640 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15641 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15642 case CRYPTO_BUILTIN_VCIPHER:
15643 case CRYPTO_BUILTIN_VCIPHERLAST:
15644 case CRYPTO_BUILTIN_VNCIPHER:
15645 case CRYPTO_BUILTIN_VNCIPHERLAST:
15646 case CRYPTO_BUILTIN_VPMSUMB:
15647 case CRYPTO_BUILTIN_VPMSUMH:
15648 case CRYPTO_BUILTIN_VPMSUMW:
15649 case CRYPTO_BUILTIN_VPMSUMD:
15650 case CRYPTO_BUILTIN_VPMSUM:
15651 case MISC_BUILTIN_ADDG6S:
15652 case MISC_BUILTIN_DIVWEU:
15653 case MISC_BUILTIN_DIVWEUO:
15654 case MISC_BUILTIN_DIVDEU:
15655 case MISC_BUILTIN_DIVDEUO:
15656 h.uns_p[0] = 1;
15657 h.uns_p[1] = 1;
15658 h.uns_p[2] = 1;
15659 break;
15661 /* unsigned 3 argument functions. */
15662 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15663 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15664 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15665 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15666 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15667 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15668 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15669 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15670 case VSX_BUILTIN_VPERM_16QI_UNS:
15671 case VSX_BUILTIN_VPERM_8HI_UNS:
15672 case VSX_BUILTIN_VPERM_4SI_UNS:
15673 case VSX_BUILTIN_VPERM_2DI_UNS:
15674 case VSX_BUILTIN_XXSEL_16QI_UNS:
15675 case VSX_BUILTIN_XXSEL_8HI_UNS:
15676 case VSX_BUILTIN_XXSEL_4SI_UNS:
15677 case VSX_BUILTIN_XXSEL_2DI_UNS:
15678 case CRYPTO_BUILTIN_VPERMXOR:
15679 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15680 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15681 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15682 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15683 case CRYPTO_BUILTIN_VSHASIGMAW:
15684 case CRYPTO_BUILTIN_VSHASIGMAD:
15685 case CRYPTO_BUILTIN_VSHASIGMA:
15686 h.uns_p[0] = 1;
15687 h.uns_p[1] = 1;
15688 h.uns_p[2] = 1;
15689 h.uns_p[3] = 1;
15690 break;
15692 /* signed permute functions with unsigned char mask. */
15693 case ALTIVEC_BUILTIN_VPERM_16QI:
15694 case ALTIVEC_BUILTIN_VPERM_8HI:
15695 case ALTIVEC_BUILTIN_VPERM_4SI:
15696 case ALTIVEC_BUILTIN_VPERM_4SF:
15697 case ALTIVEC_BUILTIN_VPERM_2DI:
15698 case ALTIVEC_BUILTIN_VPERM_2DF:
15699 case VSX_BUILTIN_VPERM_16QI:
15700 case VSX_BUILTIN_VPERM_8HI:
15701 case VSX_BUILTIN_VPERM_4SI:
15702 case VSX_BUILTIN_VPERM_4SF:
15703 case VSX_BUILTIN_VPERM_2DI:
15704 case VSX_BUILTIN_VPERM_2DF:
15705 h.uns_p[3] = 1;
15706 break;
15708 /* unsigned args, signed return. */
15709 case VSX_BUILTIN_XVCVUXDDP_UNS:
15710 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15711 h.uns_p[1] = 1;
15712 break;
15714 /* signed args, unsigned return. */
15715 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15716 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15717 case MISC_BUILTIN_UNPACK_TD:
15718 case MISC_BUILTIN_UNPACK_V1TI:
15719 h.uns_p[0] = 1;
15720 break;
15722 /* unsigned arguments for 128-bit pack instructions. */
15723 case MISC_BUILTIN_PACK_TD:
15724 case MISC_BUILTIN_PACK_V1TI:
15725 h.uns_p[1] = 1;
15726 h.uns_p[2] = 1;
15727 break;
15729 default:
15730 break;
15733 /* Figure out how many args are present. */
15734 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15735 num_args--;
15737 if (num_args == 0)
15738 fatal_error (input_location,
15739 "internal error: builtin function %s had no type", name);
15741 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15742 if (!ret_type && h.uns_p[0])
15743 ret_type = builtin_mode_to_type[h.mode[0]][0];
15745 if (!ret_type)
15746 fatal_error (input_location,
15747 "internal error: builtin function %s had an unexpected "
15748 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15750 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15751 arg_type[i] = NULL_TREE;
15753 for (i = 0; i < num_args; i++)
15755 int m = (int) h.mode[i+1];
15756 int uns_p = h.uns_p[i+1];
15758 arg_type[i] = builtin_mode_to_type[m][uns_p];
15759 if (!arg_type[i] && uns_p)
15760 arg_type[i] = builtin_mode_to_type[m][0];
15762 if (!arg_type[i])
15763 fatal_error (input_location,
15764 "internal error: builtin function %s, argument %d "
15765 "had unexpected argument type %s", name, i,
15766 GET_MODE_NAME (m));
15769 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15770 if (*found == NULL)
15772 h2 = ggc_alloc<builtin_hash_struct> ();
15773 *h2 = h;
15774 *found = h2;
15776 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15777 arg_type[2], NULL_TREE);
15780 return (*found)->type;
15783 static void
15784 rs6000_common_init_builtins (void)
15786 const struct builtin_description *d;
15787 size_t i;
15789 tree opaque_ftype_opaque = NULL_TREE;
15790 tree opaque_ftype_opaque_opaque = NULL_TREE;
15791 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15792 tree v2si_ftype_qi = NULL_TREE;
15793 tree v2si_ftype_v2si_qi = NULL_TREE;
15794 tree v2si_ftype_int_qi = NULL_TREE;
15795 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15797 if (!TARGET_PAIRED_FLOAT)
15799 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15800 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15803 /* Paired and SPE builtins are only available if you build a compiler with
15804 the appropriate options, so only create those builtins with the
15805 appropriate compiler option. Create Altivec and VSX builtins on machines
15806 with at least the general purpose extensions (970 and newer) to allow the
15807 use of the target attribute.. */
15809 if (TARGET_EXTRA_BUILTINS)
15810 builtin_mask |= RS6000_BTM_COMMON;
15812 /* Add the ternary operators. */
15813 d = bdesc_3arg;
15814 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15816 tree type;
15817 HOST_WIDE_INT mask = d->mask;
15819 if ((mask & builtin_mask) != mask)
15821 if (TARGET_DEBUG_BUILTIN)
15822 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15823 continue;
15826 if (rs6000_overloaded_builtin_p (d->code))
15828 if (! (type = opaque_ftype_opaque_opaque_opaque))
15829 type = opaque_ftype_opaque_opaque_opaque
15830 = build_function_type_list (opaque_V4SI_type_node,
15831 opaque_V4SI_type_node,
15832 opaque_V4SI_type_node,
15833 opaque_V4SI_type_node,
15834 NULL_TREE);
15836 else
15838 enum insn_code icode = d->icode;
15839 if (d->name == 0)
15841 if (TARGET_DEBUG_BUILTIN)
15842 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15843 (long unsigned)i);
15845 continue;
15848 if (icode == CODE_FOR_nothing)
15850 if (TARGET_DEBUG_BUILTIN)
15851 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15852 d->name);
15854 continue;
15857 type = builtin_function_type (insn_data[icode].operand[0].mode,
15858 insn_data[icode].operand[1].mode,
15859 insn_data[icode].operand[2].mode,
15860 insn_data[icode].operand[3].mode,
15861 d->code, d->name);
15864 def_builtin (d->name, type, d->code);
15867 /* Add the binary operators. */
15868 d = bdesc_2arg;
15869 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15871 machine_mode mode0, mode1, mode2;
15872 tree type;
15873 HOST_WIDE_INT mask = d->mask;
15875 if ((mask & builtin_mask) != mask)
15877 if (TARGET_DEBUG_BUILTIN)
15878 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15879 continue;
15882 if (rs6000_overloaded_builtin_p (d->code))
15884 if (! (type = opaque_ftype_opaque_opaque))
15885 type = opaque_ftype_opaque_opaque
15886 = build_function_type_list (opaque_V4SI_type_node,
15887 opaque_V4SI_type_node,
15888 opaque_V4SI_type_node,
15889 NULL_TREE);
15891 else
15893 enum insn_code icode = d->icode;
15894 if (d->name == 0)
15896 if (TARGET_DEBUG_BUILTIN)
15897 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15898 (long unsigned)i);
15900 continue;
15903 if (icode == CODE_FOR_nothing)
15905 if (TARGET_DEBUG_BUILTIN)
15906 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15907 d->name);
15909 continue;
15912 mode0 = insn_data[icode].operand[0].mode;
15913 mode1 = insn_data[icode].operand[1].mode;
15914 mode2 = insn_data[icode].operand[2].mode;
15916 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15918 if (! (type = v2si_ftype_v2si_qi))
15919 type = v2si_ftype_v2si_qi
15920 = build_function_type_list (opaque_V2SI_type_node,
15921 opaque_V2SI_type_node,
15922 char_type_node,
15923 NULL_TREE);
15926 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15927 && mode2 == QImode)
15929 if (! (type = v2si_ftype_int_qi))
15930 type = v2si_ftype_int_qi
15931 = build_function_type_list (opaque_V2SI_type_node,
15932 integer_type_node,
15933 char_type_node,
15934 NULL_TREE);
15937 else
15938 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15939 d->code, d->name);
15942 def_builtin (d->name, type, d->code);
15945 /* Add the simple unary operators. */
15946 d = bdesc_1arg;
15947 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15949 machine_mode mode0, mode1;
15950 tree type;
15951 HOST_WIDE_INT mask = d->mask;
15953 if ((mask & builtin_mask) != mask)
15955 if (TARGET_DEBUG_BUILTIN)
15956 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15957 continue;
15960 if (rs6000_overloaded_builtin_p (d->code))
15962 if (! (type = opaque_ftype_opaque))
15963 type = opaque_ftype_opaque
15964 = build_function_type_list (opaque_V4SI_type_node,
15965 opaque_V4SI_type_node,
15966 NULL_TREE);
15968 else
15970 enum insn_code icode = d->icode;
15971 if (d->name == 0)
15973 if (TARGET_DEBUG_BUILTIN)
15974 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15975 (long unsigned)i);
15977 continue;
15980 if (icode == CODE_FOR_nothing)
15982 if (TARGET_DEBUG_BUILTIN)
15983 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15984 d->name);
15986 continue;
15989 mode0 = insn_data[icode].operand[0].mode;
15990 mode1 = insn_data[icode].operand[1].mode;
15992 if (mode0 == V2SImode && mode1 == QImode)
15994 if (! (type = v2si_ftype_qi))
15995 type = v2si_ftype_qi
15996 = build_function_type_list (opaque_V2SI_type_node,
15997 char_type_node,
15998 NULL_TREE);
16001 else
16002 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
16003 d->code, d->name);
16006 def_builtin (d->name, type, d->code);
16010 static void
16011 rs6000_init_libfuncs (void)
16013 if (!TARGET_IEEEQUAD)
16014 /* AIX/Darwin/64-bit Linux quad floating point routines. */
16015 if (!TARGET_XL_COMPAT)
16017 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
16018 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
16019 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
16020 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
16022 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
16024 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
16025 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
16026 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
16027 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
16028 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
16029 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
16030 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
16032 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
16033 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
16034 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
16035 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
16036 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
16037 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
16038 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
16039 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
16042 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
16043 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
16045 else
16047 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
16048 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
16049 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
16050 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
16052 else
16054 /* 32-bit SVR4 quad floating point routines. */
16056 set_optab_libfunc (add_optab, TFmode, "_q_add");
16057 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
16058 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
16059 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
16060 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
16061 if (TARGET_PPC_GPOPT)
16062 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
16064 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
16065 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
16066 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
16067 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
16068 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
16069 set_optab_libfunc (le_optab, TFmode, "_q_fle");
16071 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
16072 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
16073 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
16074 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
16075 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
16076 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
16077 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
16078 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
16083 /* Expand a block clear operation, and return 1 if successful. Return 0
16084 if we should let the compiler generate normal code.
16086 operands[0] is the destination
16087 operands[1] is the length
16088 operands[3] is the alignment */
16091 expand_block_clear (rtx operands[])
16093 rtx orig_dest = operands[0];
16094 rtx bytes_rtx = operands[1];
16095 rtx align_rtx = operands[3];
16096 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
16097 HOST_WIDE_INT align;
16098 HOST_WIDE_INT bytes;
16099 int offset;
16100 int clear_bytes;
16101 int clear_step;
16103 /* If this is not a fixed size move, just call memcpy */
16104 if (! constp)
16105 return 0;
16107 /* This must be a fixed size alignment */
16108 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16109 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16111 /* Anything to clear? */
16112 bytes = INTVAL (bytes_rtx);
16113 if (bytes <= 0)
16114 return 1;
16116 /* Use the builtin memset after a point, to avoid huge code bloat.
16117 When optimize_size, avoid any significant code bloat; calling
16118 memset is about 4 instructions, so allow for one instruction to
16119 load zero and three to do clearing. */
16120 if (TARGET_ALTIVEC && align >= 128)
16121 clear_step = 16;
16122 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
16123 clear_step = 8;
16124 else if (TARGET_SPE && align >= 64)
16125 clear_step = 8;
16126 else
16127 clear_step = 4;
16129 if (optimize_size && bytes > 3 * clear_step)
16130 return 0;
16131 if (! optimize_size && bytes > 8 * clear_step)
16132 return 0;
16134 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
16136 machine_mode mode = BLKmode;
16137 rtx dest;
16139 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
16141 clear_bytes = 16;
16142 mode = V4SImode;
16144 else if (bytes >= 8 && TARGET_SPE && align >= 64)
16146 clear_bytes = 8;
16147 mode = V2SImode;
16149 else if (bytes >= 8 && TARGET_POWERPC64
16150 && (align >= 64 || !STRICT_ALIGNMENT))
16152 clear_bytes = 8;
16153 mode = DImode;
16154 if (offset == 0 && align < 64)
16156 rtx addr;
16158 /* If the address form is reg+offset with offset not a
16159 multiple of four, reload into reg indirect form here
16160 rather than waiting for reload. This way we get one
16161 reload, not one per store. */
16162 addr = XEXP (orig_dest, 0);
16163 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16164 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16165 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16167 addr = copy_addr_to_reg (addr);
16168 orig_dest = replace_equiv_address (orig_dest, addr);
16172 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16173 { /* move 4 bytes */
16174 clear_bytes = 4;
16175 mode = SImode;
16177 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16178 { /* move 2 bytes */
16179 clear_bytes = 2;
16180 mode = HImode;
16182 else /* move 1 byte at a time */
16184 clear_bytes = 1;
16185 mode = QImode;
16188 dest = adjust_address (orig_dest, mode, offset);
16190 emit_move_insn (dest, CONST0_RTX (mode));
16193 return 1;
16197 /* Expand a block move operation, and return 1 if successful. Return 0
16198 if we should let the compiler generate normal code.
16200 operands[0] is the destination
16201 operands[1] is the source
16202 operands[2] is the length
16203 operands[3] is the alignment */
16205 #define MAX_MOVE_REG 4
16208 expand_block_move (rtx operands[])
16210 rtx orig_dest = operands[0];
16211 rtx orig_src = operands[1];
16212 rtx bytes_rtx = operands[2];
16213 rtx align_rtx = operands[3];
16214 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
16215 int align;
16216 int bytes;
16217 int offset;
16218 int move_bytes;
16219 rtx stores[MAX_MOVE_REG];
16220 int num_reg = 0;
16222 /* If this is not a fixed size move, just call memcpy */
16223 if (! constp)
16224 return 0;
16226 /* This must be a fixed size alignment */
16227 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
16228 align = INTVAL (align_rtx) * BITS_PER_UNIT;
16230 /* Anything to move? */
16231 bytes = INTVAL (bytes_rtx);
16232 if (bytes <= 0)
16233 return 1;
16235 if (bytes > rs6000_block_move_inline_limit)
16236 return 0;
16238 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
16240 union {
16241 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
16242 rtx (*mov) (rtx, rtx);
16243 } gen_func;
16244 machine_mode mode = BLKmode;
16245 rtx src, dest;
16247 /* Altivec first, since it will be faster than a string move
16248 when it applies, and usually not significantly larger. */
16249 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
16251 move_bytes = 16;
16252 mode = V4SImode;
16253 gen_func.mov = gen_movv4si;
16255 else if (TARGET_SPE && bytes >= 8 && align >= 64)
16257 move_bytes = 8;
16258 mode = V2SImode;
16259 gen_func.mov = gen_movv2si;
16261 else if (TARGET_STRING
16262 && bytes > 24 /* move up to 32 bytes at a time */
16263 && ! fixed_regs[5]
16264 && ! fixed_regs[6]
16265 && ! fixed_regs[7]
16266 && ! fixed_regs[8]
16267 && ! fixed_regs[9]
16268 && ! fixed_regs[10]
16269 && ! fixed_regs[11]
16270 && ! fixed_regs[12])
16272 move_bytes = (bytes > 32) ? 32 : bytes;
16273 gen_func.movmemsi = gen_movmemsi_8reg;
16275 else if (TARGET_STRING
16276 && bytes > 16 /* move up to 24 bytes at a time */
16277 && ! fixed_regs[5]
16278 && ! fixed_regs[6]
16279 && ! fixed_regs[7]
16280 && ! fixed_regs[8]
16281 && ! fixed_regs[9]
16282 && ! fixed_regs[10])
16284 move_bytes = (bytes > 24) ? 24 : bytes;
16285 gen_func.movmemsi = gen_movmemsi_6reg;
16287 else if (TARGET_STRING
16288 && bytes > 8 /* move up to 16 bytes at a time */
16289 && ! fixed_regs[5]
16290 && ! fixed_regs[6]
16291 && ! fixed_regs[7]
16292 && ! fixed_regs[8])
16294 move_bytes = (bytes > 16) ? 16 : bytes;
16295 gen_func.movmemsi = gen_movmemsi_4reg;
16297 else if (bytes >= 8 && TARGET_POWERPC64
16298 && (align >= 64 || !STRICT_ALIGNMENT))
16300 move_bytes = 8;
16301 mode = DImode;
16302 gen_func.mov = gen_movdi;
16303 if (offset == 0 && align < 64)
16305 rtx addr;
16307 /* If the address form is reg+offset with offset not a
16308 multiple of four, reload into reg indirect form here
16309 rather than waiting for reload. This way we get one
16310 reload, not one per load and/or store. */
16311 addr = XEXP (orig_dest, 0);
16312 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16313 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16314 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16316 addr = copy_addr_to_reg (addr);
16317 orig_dest = replace_equiv_address (orig_dest, addr);
16319 addr = XEXP (orig_src, 0);
16320 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
16321 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16322 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
16324 addr = copy_addr_to_reg (addr);
16325 orig_src = replace_equiv_address (orig_src, addr);
16329 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
16330 { /* move up to 8 bytes at a time */
16331 move_bytes = (bytes > 8) ? 8 : bytes;
16332 gen_func.movmemsi = gen_movmemsi_2reg;
16334 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
16335 { /* move 4 bytes */
16336 move_bytes = 4;
16337 mode = SImode;
16338 gen_func.mov = gen_movsi;
16340 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
16341 { /* move 2 bytes */
16342 move_bytes = 2;
16343 mode = HImode;
16344 gen_func.mov = gen_movhi;
16346 else if (TARGET_STRING && bytes > 1)
16347 { /* move up to 4 bytes at a time */
16348 move_bytes = (bytes > 4) ? 4 : bytes;
16349 gen_func.movmemsi = gen_movmemsi_1reg;
16351 else /* move 1 byte at a time */
16353 move_bytes = 1;
16354 mode = QImode;
16355 gen_func.mov = gen_movqi;
16358 src = adjust_address (orig_src, mode, offset);
16359 dest = adjust_address (orig_dest, mode, offset);
16361 if (mode != BLKmode)
16363 rtx tmp_reg = gen_reg_rtx (mode);
16365 emit_insn ((*gen_func.mov) (tmp_reg, src));
16366 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16369 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16371 int i;
16372 for (i = 0; i < num_reg; i++)
16373 emit_insn (stores[i]);
16374 num_reg = 0;
16377 if (mode == BLKmode)
16379 /* Move the address into scratch registers. The movmemsi
16380 patterns require zero offset. */
16381 if (!REG_P (XEXP (src, 0)))
16383 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16384 src = replace_equiv_address (src, src_reg);
16386 set_mem_size (src, move_bytes);
16388 if (!REG_P (XEXP (dest, 0)))
16390 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16391 dest = replace_equiv_address (dest, dest_reg);
16393 set_mem_size (dest, move_bytes);
16395 emit_insn ((*gen_func.movmemsi) (dest, src,
16396 GEN_INT (move_bytes & 31),
16397 align_rtx));
16401 return 1;
16405 /* Return a string to perform a load_multiple operation.
16406 operands[0] is the vector.
16407 operands[1] is the source address.
16408 operands[2] is the first destination register. */
16410 const char *
16411 rs6000_output_load_multiple (rtx operands[3])
16413 /* We have to handle the case where the pseudo used to contain the address
16414 is assigned to one of the output registers. */
16415 int i, j;
16416 int words = XVECLEN (operands[0], 0);
16417 rtx xop[10];
16419 if (XVECLEN (operands[0], 0) == 1)
16420 return "lwz %2,0(%1)";
16422 for (i = 0; i < words; i++)
16423 if (refers_to_regno_p (REGNO (operands[2]) + i, operands[1]))
16425 if (i == words-1)
16427 xop[0] = GEN_INT (4 * (words-1));
16428 xop[1] = operands[1];
16429 xop[2] = operands[2];
16430 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16431 return "";
16433 else if (i == 0)
16435 xop[0] = GEN_INT (4 * (words-1));
16436 xop[1] = operands[1];
16437 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16438 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16439 return "";
16441 else
16443 for (j = 0; j < words; j++)
16444 if (j != i)
16446 xop[0] = GEN_INT (j * 4);
16447 xop[1] = operands[1];
16448 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16449 output_asm_insn ("lwz %2,%0(%1)", xop);
16451 xop[0] = GEN_INT (i * 4);
16452 xop[1] = operands[1];
16453 output_asm_insn ("lwz %1,%0(%1)", xop);
16454 return "";
16458 return "lswi %2,%1,%N0";
16462 /* A validation routine: say whether CODE, a condition code, and MODE
16463 match. The other alternatives either don't make sense or should
16464 never be generated. */
16466 void
16467 validate_condition_mode (enum rtx_code code, machine_mode mode)
16469 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16470 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16471 && GET_MODE_CLASS (mode) == MODE_CC);
16473 /* These don't make sense. */
16474 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16475 || mode != CCUNSmode);
16477 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16478 || mode == CCUNSmode);
16480 gcc_assert (mode == CCFPmode
16481 || (code != ORDERED && code != UNORDERED
16482 && code != UNEQ && code != LTGT
16483 && code != UNGT && code != UNLT
16484 && code != UNGE && code != UNLE));
16486 /* These should never be generated except for
16487 flag_finite_math_only. */
16488 gcc_assert (mode != CCFPmode
16489 || flag_finite_math_only
16490 || (code != LE && code != GE
16491 && code != UNEQ && code != LTGT
16492 && code != UNGT && code != UNLT));
16494 /* These are invalid; the information is not there. */
16495 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16499 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm,
16500 rldicl, rldicr, or rldic instruction in mode MODE. If so, if E is
16501 not zero, store there the bit offset (counted from the right) where
16502 the single stretch of 1 bits begins; and similarly for B, the bit
16503 offset where it ends. */
16505 bool
16506 rs6000_is_valid_mask (rtx mask, int *b, int *e, machine_mode mode)
16508 unsigned HOST_WIDE_INT val = INTVAL (mask);
16509 unsigned HOST_WIDE_INT bit;
16510 int nb, ne;
16511 int n = GET_MODE_PRECISION (mode);
16513 if (mode != DImode && mode != SImode)
16514 return false;
16516 if (INTVAL (mask) >= 0)
16518 bit = val & -val;
16519 ne = exact_log2 (bit);
16520 nb = exact_log2 (val + bit);
16522 else if (val + 1 == 0)
16524 nb = n;
16525 ne = 0;
16527 else if (val & 1)
16529 val = ~val;
16530 bit = val & -val;
16531 nb = exact_log2 (bit);
16532 ne = exact_log2 (val + bit);
16534 else
16536 bit = val & -val;
16537 ne = exact_log2 (bit);
16538 if (val + bit == 0)
16539 nb = n;
16540 else
16541 nb = 0;
16544 nb--;
16546 if (nb < 0 || ne < 0 || nb >= n || ne >= n)
16547 return false;
16549 if (b)
16550 *b = nb;
16551 if (e)
16552 *e = ne;
16554 return true;
16557 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwinm, rldicl,
16558 or rldicr instruction, to implement an AND with it in mode MODE. */
16560 bool
16561 rs6000_is_valid_and_mask (rtx mask, machine_mode mode)
16563 int nb, ne;
16565 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
16566 return false;
16568 /* For DImode, we need a rldicl, rldicr, or a rlwinm with mask that
16569 does not wrap. */
16570 if (mode == DImode)
16571 return (ne == 0 || nb == 63 || (nb < 32 && ne <= nb));
16573 /* For SImode, rlwinm can do everything. */
16574 if (mode == SImode)
16575 return (nb < 32 && ne < 32);
16577 return false;
16580 /* Return the instruction template for an AND with mask in mode MODE, with
16581 operands OPERANDS. If DOT is true, make it a record-form instruction. */
16583 const char *
16584 rs6000_insn_for_and_mask (machine_mode mode, rtx *operands, bool dot)
16586 int nb, ne;
16588 if (!rs6000_is_valid_mask (operands[2], &nb, &ne, mode))
16589 gcc_unreachable ();
16591 if (mode == DImode && ne == 0)
16593 operands[3] = GEN_INT (63 - nb);
16594 if (dot)
16595 return "rldicl. %0,%1,0,%3";
16596 return "rldicl %0,%1,0,%3";
16599 if (mode == DImode && nb == 63)
16601 operands[3] = GEN_INT (63 - ne);
16602 if (dot)
16603 return "rldicr. %0,%1,0,%3";
16604 return "rldicr %0,%1,0,%3";
16607 if (nb < 32 && ne < 32)
16609 operands[3] = GEN_INT (31 - nb);
16610 operands[4] = GEN_INT (31 - ne);
16611 if (dot)
16612 return "rlwinm. %0,%1,0,%3,%4";
16613 return "rlwinm %0,%1,0,%3,%4";
16616 gcc_unreachable ();
16619 /* Return whether MASK (a CONST_INT) is a valid mask for any rlw[i]nm,
16620 rld[i]cl, rld[i]cr, or rld[i]c instruction, to implement an AND with
16621 shift SHIFT (a ROTATE, ASHIFT, or LSHIFTRT) in mode MODE. */
16623 bool
16624 rs6000_is_valid_shift_mask (rtx mask, rtx shift, machine_mode mode)
16626 int nb, ne;
16628 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
16629 return false;
16631 int n = GET_MODE_PRECISION (mode);
16632 int sh = -1;
16634 if (CONST_INT_P (XEXP (shift, 1)))
16636 sh = INTVAL (XEXP (shift, 1));
16637 if (sh < 0 || sh >= n)
16638 return false;
16641 rtx_code code = GET_CODE (shift);
16643 /* Convert any shift by 0 to a rotate, to simplify below code. */
16644 if (sh == 0)
16645 code = ROTATE;
16647 /* Convert rotate to simple shift if we can, to make analysis simpler. */
16648 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
16649 code = ASHIFT;
16650 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
16652 code = LSHIFTRT;
16653 sh = n - sh;
16656 /* DImode rotates need rld*. */
16657 if (mode == DImode && code == ROTATE)
16658 return (nb == 63 || ne == 0 || ne == sh);
16660 /* SImode rotates need rlw*. */
16661 if (mode == SImode && code == ROTATE)
16662 return (nb < 32 && ne < 32 && sh < 32);
16664 /* Wrap-around masks are only okay for rotates. */
16665 if (ne > nb)
16666 return false;
16668 /* Variable shifts are only okay for rotates. */
16669 if (sh < 0)
16670 return false;
16672 /* Don't allow ASHIFT if the mask is wrong for that. */
16673 if (code == ASHIFT && ne < sh)
16674 return false;
16676 /* If we can do it with an rlw*, we can do it. Don't allow LSHIFTRT
16677 if the mask is wrong for that. */
16678 if (nb < 32 && ne < 32 && sh < 32
16679 && !(code == LSHIFTRT && nb >= 32 - sh))
16680 return true;
16682 /* If we can do it with an rld*, we can do it. Don't allow LSHIFTRT
16683 if the mask is wrong for that. */
16684 if (code == LSHIFTRT)
16685 sh = 64 - sh;
16686 if (nb == 63 || ne == 0 || ne == sh)
16687 return !(code == LSHIFTRT && nb >= sh);
16689 return false;
16692 /* Return the instruction template for a shift with mask in mode MODE, with
16693 operands OPERANDS. If DOT is true, make it a record-form instruction. */
16695 const char *
16696 rs6000_insn_for_shift_mask (machine_mode mode, rtx *operands, bool dot)
16698 int nb, ne;
16700 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
16701 gcc_unreachable ();
16703 if (mode == DImode && ne == 0)
16705 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
16706 operands[2] = GEN_INT (64 - INTVAL (operands[2]));
16707 operands[3] = GEN_INT (63 - nb);
16708 if (dot)
16709 return "rld%I2cl. %0,%1,%2,%3";
16710 return "rld%I2cl %0,%1,%2,%3";
16713 if (mode == DImode && nb == 63)
16715 operands[3] = GEN_INT (63 - ne);
16716 if (dot)
16717 return "rld%I2cr. %0,%1,%2,%3";
16718 return "rld%I2cr %0,%1,%2,%3";
16721 if (mode == DImode
16722 && GET_CODE (operands[4]) != LSHIFTRT
16723 && CONST_INT_P (operands[2])
16724 && ne == INTVAL (operands[2]))
16726 operands[3] = GEN_INT (63 - nb);
16727 if (dot)
16728 return "rld%I2c. %0,%1,%2,%3";
16729 return "rld%I2c %0,%1,%2,%3";
16732 if (nb < 32 && ne < 32)
16734 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
16735 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
16736 operands[3] = GEN_INT (31 - nb);
16737 operands[4] = GEN_INT (31 - ne);
16738 if (dot)
16739 return "rlw%I2nm. %0,%1,%2,%3,%4";
16740 return "rlw%I2nm %0,%1,%2,%3,%4";
16743 gcc_unreachable ();
16746 /* Return whether MASK (a CONST_INT) is a valid mask for any rlwimi or
16747 rldimi instruction, to implement an insert with shift SHIFT (a ROTATE,
16748 ASHIFT, or LSHIFTRT) in mode MODE. */
16750 bool
16751 rs6000_is_valid_insert_mask (rtx mask, rtx shift, machine_mode mode)
16753 int nb, ne;
16755 if (!rs6000_is_valid_mask (mask, &nb, &ne, mode))
16756 return false;
16758 int n = GET_MODE_PRECISION (mode);
16760 int sh = INTVAL (XEXP (shift, 1));
16761 if (sh < 0 || sh >= n)
16762 return false;
16764 rtx_code code = GET_CODE (shift);
16766 /* Convert any shift by 0 to a rotate, to simplify below code. */
16767 if (sh == 0)
16768 code = ROTATE;
16770 /* Convert rotate to simple shift if we can, to make analysis simpler. */
16771 if (code == ROTATE && sh >= 0 && nb >= ne && ne >= sh)
16772 code = ASHIFT;
16773 if (code == ROTATE && sh >= 0 && nb >= ne && nb < sh)
16775 code = LSHIFTRT;
16776 sh = n - sh;
16779 /* DImode rotates need rldimi. */
16780 if (mode == DImode && code == ROTATE)
16781 return (ne == sh);
16783 /* SImode rotates need rlwimi. */
16784 if (mode == SImode && code == ROTATE)
16785 return (nb < 32 && ne < 32 && sh < 32);
16787 /* Wrap-around masks are only okay for rotates. */
16788 if (ne > nb)
16789 return false;
16791 /* Don't allow ASHIFT if the mask is wrong for that. */
16792 if (code == ASHIFT && ne < sh)
16793 return false;
16795 /* If we can do it with an rlwimi, we can do it. Don't allow LSHIFTRT
16796 if the mask is wrong for that. */
16797 if (nb < 32 && ne < 32 && sh < 32
16798 && !(code == LSHIFTRT && nb >= 32 - sh))
16799 return true;
16801 /* If we can do it with an rldimi, we can do it. Don't allow LSHIFTRT
16802 if the mask is wrong for that. */
16803 if (code == LSHIFTRT)
16804 sh = 64 - sh;
16805 if (ne == sh)
16806 return !(code == LSHIFTRT && nb >= sh);
16808 return false;
16811 /* Return the instruction template for an insert with mask in mode MODE, with
16812 operands OPERANDS. If DOT is true, make it a record-form instruction. */
16814 const char *
16815 rs6000_insn_for_insert_mask (machine_mode mode, rtx *operands, bool dot)
16817 int nb, ne;
16819 if (!rs6000_is_valid_mask (operands[3], &nb, &ne, mode))
16820 gcc_unreachable ();
16822 /* Prefer rldimi because rlwimi is cracked. */
16823 if (TARGET_POWERPC64
16824 && (!dot || mode == DImode)
16825 && GET_CODE (operands[4]) != LSHIFTRT
16826 && ne == INTVAL (operands[2]))
16828 operands[3] = GEN_INT (63 - nb);
16829 if (dot)
16830 return "rldimi. %0,%1,%2,%3";
16831 return "rldimi %0,%1,%2,%3";
16834 if (nb < 32 && ne < 32)
16836 if (GET_CODE (operands[4]) == LSHIFTRT && INTVAL (operands[2]))
16837 operands[2] = GEN_INT (32 - INTVAL (operands[2]));
16838 operands[3] = GEN_INT (31 - nb);
16839 operands[4] = GEN_INT (31 - ne);
16840 if (dot)
16841 return "rlwimi. %0,%1,%2,%3,%4";
16842 return "rlwimi %0,%1,%2,%3,%4";
16845 gcc_unreachable ();
16848 /* Return whether an AND with C (a CONST_INT) in mode MODE can be done
16849 using two machine instructions. */
16851 bool
16852 rs6000_is_valid_2insn_and (rtx c, machine_mode mode)
16854 /* There are two kinds of AND we can handle with two insns:
16855 1) those we can do with two rl* insn;
16856 2) ori[s];xori[s].
16858 We do not handle that last case yet. */
16860 /* If there is just one stretch of ones, we can do it. */
16861 if (rs6000_is_valid_mask (c, NULL, NULL, mode))
16862 return true;
16864 /* Otherwise, fill in the lowest "hole"; if we can do the result with
16865 one insn, we can do the whole thing with two. */
16866 unsigned HOST_WIDE_INT val = INTVAL (c);
16867 unsigned HOST_WIDE_INT bit1 = val & -val;
16868 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
16869 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
16870 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
16871 return rs6000_is_valid_and_mask (GEN_INT (val + bit3 - bit2), mode);
16874 /* Emit a potentially record-form instruction, setting DST from SRC.
16875 If DOT is 0, that is all; otherwise, set CCREG to the result of the
16876 signed comparison of DST with zero. If DOT is 1, the generated RTL
16877 doesn't care about the DST result; if DOT is 2, it does. If CCREG
16878 is CR0 do a single dot insn (as a PARALLEL); otherwise, do a SET and
16879 a separate COMPARE. */
16881 static void
16882 rs6000_emit_dot_insn (rtx dst, rtx src, int dot, rtx ccreg)
16884 if (dot == 0)
16886 emit_move_insn (dst, src);
16887 return;
16890 if (cc_reg_not_cr0_operand (ccreg, CCmode))
16892 emit_move_insn (dst, src);
16893 emit_move_insn (ccreg, gen_rtx_COMPARE (CCmode, dst, const0_rtx));
16894 return;
16897 rtx ccset = gen_rtx_SET (ccreg, gen_rtx_COMPARE (CCmode, src, const0_rtx));
16898 if (dot == 1)
16900 rtx clobber = gen_rtx_CLOBBER (VOIDmode, dst);
16901 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, clobber)));
16903 else
16905 rtx set = gen_rtx_SET (dst, src);
16906 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, ccset, set)));
16910 /* Emit the two insns to do an AND in mode MODE, with operands OPERANDS.
16911 If EXPAND is true, split rotate-and-mask instructions we generate to
16912 their constituent parts as well (this is used during expand); if DOT
16913 is 1, make the last insn a record-form instruction clobbering the
16914 destination GPR and setting the CC reg (from operands[3]); if 2, set
16915 that GPR as well as the CC reg. */
16917 void
16918 rs6000_emit_2insn_and (machine_mode mode, rtx *operands, bool expand, int dot)
16920 gcc_assert (!(expand && dot));
16922 unsigned HOST_WIDE_INT val = INTVAL (operands[2]);
16924 /* If it is one stretch of ones, it is DImode; shift left, mask, then
16925 shift right. This generates better code than doing the masks without
16926 shifts, or shifting first right and then left. */
16927 int nb, ne;
16928 if (rs6000_is_valid_mask (operands[2], &nb, &ne, mode) && nb >= ne)
16930 gcc_assert (mode == DImode);
16932 int shift = 63 - nb;
16933 if (expand)
16935 rtx tmp1 = gen_reg_rtx (DImode);
16936 rtx tmp2 = gen_reg_rtx (DImode);
16937 emit_insn (gen_ashldi3 (tmp1, operands[1], GEN_INT (shift)));
16938 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (val << shift)));
16939 emit_insn (gen_lshrdi3 (operands[0], tmp2, GEN_INT (shift)));
16941 else
16943 rtx tmp = gen_rtx_ASHIFT (mode, operands[1], GEN_INT (shift));
16944 tmp = gen_rtx_AND (mode, tmp, GEN_INT (val << shift));
16945 emit_move_insn (operands[0], tmp);
16946 tmp = gen_rtx_LSHIFTRT (mode, operands[0], GEN_INT (shift));
16947 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
16949 return;
16952 /* Otherwise, make a mask2 that cuts out the lowest "hole", and a mask1
16953 that does the rest. */
16954 unsigned HOST_WIDE_INT bit1 = val & -val;
16955 unsigned HOST_WIDE_INT bit2 = (val + bit1) & ~val;
16956 unsigned HOST_WIDE_INT val1 = (val + bit1) & val;
16957 unsigned HOST_WIDE_INT bit3 = val1 & -val1;
16959 unsigned HOST_WIDE_INT mask1 = -bit3 + bit2 - 1;
16960 unsigned HOST_WIDE_INT mask2 = val + bit3 - bit2;
16962 gcc_assert (rs6000_is_valid_and_mask (GEN_INT (mask2), mode));
16964 /* Two "no-rotate"-and-mask instructions, for SImode. */
16965 if (rs6000_is_valid_and_mask (GEN_INT (mask1), mode))
16967 gcc_assert (mode == SImode);
16969 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
16970 rtx tmp = gen_rtx_AND (mode, operands[1], GEN_INT (mask1));
16971 emit_move_insn (reg, tmp);
16972 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
16973 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
16974 return;
16977 gcc_assert (mode == DImode);
16979 /* Two "no-rotate"-and-mask instructions, for DImode: both are rlwinm
16980 insns; we have to do the first in SImode, because it wraps. */
16981 if (mask2 <= 0xffffffff
16982 && rs6000_is_valid_and_mask (GEN_INT (mask1), SImode))
16984 rtx reg = expand ? gen_reg_rtx (mode) : operands[0];
16985 rtx tmp = gen_rtx_AND (SImode, gen_lowpart (SImode, operands[1]),
16986 GEN_INT (mask1));
16987 rtx reg_low = gen_lowpart (SImode, reg);
16988 emit_move_insn (reg_low, tmp);
16989 tmp = gen_rtx_AND (mode, reg, GEN_INT (mask2));
16990 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
16991 return;
16994 /* Two rld* insns: rotate, clear the hole in the middle (which now is
16995 at the top end), rotate back and clear the other hole. */
16996 int right = exact_log2 (bit3);
16997 int left = 64 - right;
16999 /* Rotate the mask too. */
17000 mask1 = (mask1 >> right) | ((bit2 - 1) << left);
17002 if (expand)
17004 rtx tmp1 = gen_reg_rtx (DImode);
17005 rtx tmp2 = gen_reg_rtx (DImode);
17006 rtx tmp3 = gen_reg_rtx (DImode);
17007 emit_insn (gen_rotldi3 (tmp1, operands[1], GEN_INT (left)));
17008 emit_insn (gen_anddi3 (tmp2, tmp1, GEN_INT (mask1)));
17009 emit_insn (gen_rotldi3 (tmp3, tmp2, GEN_INT (right)));
17010 emit_insn (gen_anddi3 (operands[0], tmp3, GEN_INT (mask2)));
17012 else
17014 rtx tmp = gen_rtx_ROTATE (mode, operands[1], GEN_INT (left));
17015 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask1));
17016 emit_move_insn (operands[0], tmp);
17017 tmp = gen_rtx_ROTATE (mode, operands[0], GEN_INT (right));
17018 tmp = gen_rtx_AND (mode, tmp, GEN_INT (mask2));
17019 rs6000_emit_dot_insn (operands[0], tmp, dot, dot ? operands[3] : 0);
17023 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
17024 for lfq and stfq insns iff the registers are hard registers. */
17027 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
17029 /* We might have been passed a SUBREG. */
17030 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
17031 return 0;
17033 /* We might have been passed non floating point registers. */
17034 if (!FP_REGNO_P (REGNO (reg1))
17035 || !FP_REGNO_P (REGNO (reg2)))
17036 return 0;
17038 return (REGNO (reg1) == REGNO (reg2) - 1);
17041 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
17042 addr1 and addr2 must be in consecutive memory locations
17043 (addr2 == addr1 + 8). */
17046 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
17048 rtx addr1, addr2;
17049 unsigned int reg1, reg2;
17050 int offset1, offset2;
17052 /* The mems cannot be volatile. */
17053 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
17054 return 0;
17056 addr1 = XEXP (mem1, 0);
17057 addr2 = XEXP (mem2, 0);
17059 /* Extract an offset (if used) from the first addr. */
17060 if (GET_CODE (addr1) == PLUS)
17062 /* If not a REG, return zero. */
17063 if (GET_CODE (XEXP (addr1, 0)) != REG)
17064 return 0;
17065 else
17067 reg1 = REGNO (XEXP (addr1, 0));
17068 /* The offset must be constant! */
17069 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
17070 return 0;
17071 offset1 = INTVAL (XEXP (addr1, 1));
17074 else if (GET_CODE (addr1) != REG)
17075 return 0;
17076 else
17078 reg1 = REGNO (addr1);
17079 /* This was a simple (mem (reg)) expression. Offset is 0. */
17080 offset1 = 0;
17083 /* And now for the second addr. */
17084 if (GET_CODE (addr2) == PLUS)
17086 /* If not a REG, return zero. */
17087 if (GET_CODE (XEXP (addr2, 0)) != REG)
17088 return 0;
17089 else
17091 reg2 = REGNO (XEXP (addr2, 0));
17092 /* The offset must be constant. */
17093 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
17094 return 0;
17095 offset2 = INTVAL (XEXP (addr2, 1));
17098 else if (GET_CODE (addr2) != REG)
17099 return 0;
17100 else
17102 reg2 = REGNO (addr2);
17103 /* This was a simple (mem (reg)) expression. Offset is 0. */
17104 offset2 = 0;
17107 /* Both of these must have the same base register. */
17108 if (reg1 != reg2)
17109 return 0;
17111 /* The offset for the second addr must be 8 more than the first addr. */
17112 if (offset2 != offset1 + 8)
17113 return 0;
17115 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
17116 instructions. */
17117 return 1;
17122 rs6000_secondary_memory_needed_rtx (machine_mode mode)
17124 static bool eliminated = false;
17125 rtx ret;
17127 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
17128 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
17129 else
17131 rtx mem = cfun->machine->sdmode_stack_slot;
17132 gcc_assert (mem != NULL_RTX);
17134 if (!eliminated)
17136 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
17137 cfun->machine->sdmode_stack_slot = mem;
17138 eliminated = true;
17140 ret = mem;
17143 if (TARGET_DEBUG_ADDR)
17145 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
17146 GET_MODE_NAME (mode));
17147 if (!ret)
17148 fprintf (stderr, "\tNULL_RTX\n");
17149 else
17150 debug_rtx (ret);
17153 return ret;
17156 /* Return the mode to be used for memory when a secondary memory
17157 location is needed. For SDmode values we need to use DDmode, in
17158 all other cases we can use the same mode. */
17159 machine_mode
17160 rs6000_secondary_memory_needed_mode (machine_mode mode)
17162 if (lra_in_progress && mode == SDmode)
17163 return DDmode;
17164 return mode;
17167 static tree
17168 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
17170 /* Don't walk into types. */
17171 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
17173 *walk_subtrees = 0;
17174 return NULL_TREE;
17177 switch (TREE_CODE (*tp))
17179 case VAR_DECL:
17180 case PARM_DECL:
17181 case FIELD_DECL:
17182 case RESULT_DECL:
17183 case SSA_NAME:
17184 case REAL_CST:
17185 case MEM_REF:
17186 case VIEW_CONVERT_EXPR:
17187 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
17188 return *tp;
17189 break;
17190 default:
17191 break;
17194 return NULL_TREE;
17197 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
17198 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
17199 only work on the traditional altivec registers, note if an altivec register
17200 was chosen. */
17202 static enum rs6000_reg_type
17203 register_to_reg_type (rtx reg, bool *is_altivec)
17205 HOST_WIDE_INT regno;
17206 enum reg_class rclass;
17208 if (GET_CODE (reg) == SUBREG)
17209 reg = SUBREG_REG (reg);
17211 if (!REG_P (reg))
17212 return NO_REG_TYPE;
17214 regno = REGNO (reg);
17215 if (regno >= FIRST_PSEUDO_REGISTER)
17217 if (!lra_in_progress && !reload_in_progress && !reload_completed)
17218 return PSEUDO_REG_TYPE;
17220 regno = true_regnum (reg);
17221 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
17222 return PSEUDO_REG_TYPE;
17225 gcc_assert (regno >= 0);
17227 if (is_altivec && ALTIVEC_REGNO_P (regno))
17228 *is_altivec = true;
17230 rclass = rs6000_regno_regclass[regno];
17231 return reg_class_to_reg_type[(int)rclass];
17234 /* Helper function to return the cost of adding a TOC entry address. */
17236 static inline int
17237 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
17239 int ret;
17241 if (TARGET_CMODEL != CMODEL_SMALL)
17242 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
17244 else
17245 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
17247 return ret;
17250 /* Helper function for rs6000_secondary_reload to determine whether the memory
17251 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
17252 needs reloading. Return negative if the memory is not handled by the memory
17253 helper functions and to try a different reload method, 0 if no additional
17254 instructions are need, and positive to give the extra cost for the
17255 memory. */
17257 static int
17258 rs6000_secondary_reload_memory (rtx addr,
17259 enum reg_class rclass,
17260 machine_mode mode)
17262 int extra_cost = 0;
17263 rtx reg, and_arg, plus_arg0, plus_arg1;
17264 addr_mask_type addr_mask;
17265 const char *type = NULL;
17266 const char *fail_msg = NULL;
17268 if (GPR_REG_CLASS_P (rclass))
17269 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17271 else if (rclass == FLOAT_REGS)
17272 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17274 else if (rclass == ALTIVEC_REGS)
17275 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17277 /* For the combined VSX_REGS, turn off Altivec AND -16. */
17278 else if (rclass == VSX_REGS)
17279 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
17280 & ~RELOAD_REG_AND_M16);
17282 else
17284 if (TARGET_DEBUG_ADDR)
17285 fprintf (stderr,
17286 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
17287 "class is not GPR, FPR, VMX\n",
17288 GET_MODE_NAME (mode), reg_class_names[rclass]);
17290 return -1;
17293 /* If the register isn't valid in this register class, just return now. */
17294 if ((addr_mask & RELOAD_REG_VALID) == 0)
17296 if (TARGET_DEBUG_ADDR)
17297 fprintf (stderr,
17298 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
17299 "not valid in class\n",
17300 GET_MODE_NAME (mode), reg_class_names[rclass]);
17302 return -1;
17305 switch (GET_CODE (addr))
17307 /* Does the register class supports auto update forms for this mode? We
17308 don't need a scratch register, since the powerpc only supports
17309 PRE_INC, PRE_DEC, and PRE_MODIFY. */
17310 case PRE_INC:
17311 case PRE_DEC:
17312 reg = XEXP (addr, 0);
17313 if (!base_reg_operand (addr, GET_MODE (reg)))
17315 fail_msg = "no base register #1";
17316 extra_cost = -1;
17319 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17321 extra_cost = 1;
17322 type = "update";
17324 break;
17326 case PRE_MODIFY:
17327 reg = XEXP (addr, 0);
17328 plus_arg1 = XEXP (addr, 1);
17329 if (!base_reg_operand (reg, GET_MODE (reg))
17330 || GET_CODE (plus_arg1) != PLUS
17331 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
17333 fail_msg = "bad PRE_MODIFY";
17334 extra_cost = -1;
17337 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17339 extra_cost = 1;
17340 type = "update";
17342 break;
17344 /* Do we need to simulate AND -16 to clear the bottom address bits used
17345 in VMX load/stores? Only allow the AND for vector sizes. */
17346 case AND:
17347 and_arg = XEXP (addr, 0);
17348 if (GET_MODE_SIZE (mode) != 16
17349 || GET_CODE (XEXP (addr, 1)) != CONST_INT
17350 || INTVAL (XEXP (addr, 1)) != -16)
17352 fail_msg = "bad Altivec AND #1";
17353 extra_cost = -1;
17356 if (rclass != ALTIVEC_REGS)
17358 if (legitimate_indirect_address_p (and_arg, false))
17359 extra_cost = 1;
17361 else if (legitimate_indexed_address_p (and_arg, false))
17362 extra_cost = 2;
17364 else
17366 fail_msg = "bad Altivec AND #2";
17367 extra_cost = -1;
17370 type = "and";
17372 break;
17374 /* If this is an indirect address, make sure it is a base register. */
17375 case REG:
17376 case SUBREG:
17377 if (!legitimate_indirect_address_p (addr, false))
17379 extra_cost = 1;
17380 type = "move";
17382 break;
17384 /* If this is an indexed address, make sure the register class can handle
17385 indexed addresses for this mode. */
17386 case PLUS:
17387 plus_arg0 = XEXP (addr, 0);
17388 plus_arg1 = XEXP (addr, 1);
17390 /* (plus (plus (reg) (constant)) (constant)) is generated during
17391 push_reload processing, so handle it now. */
17392 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
17394 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17396 extra_cost = 1;
17397 type = "offset";
17401 /* (plus (plus (reg) (constant)) (reg)) is also generated during
17402 push_reload processing, so handle it now. */
17403 else if (GET_CODE (plus_arg0) == PLUS && REG_P (plus_arg1))
17405 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17407 extra_cost = 1;
17408 type = "indexed #2";
17412 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
17414 fail_msg = "no base register #2";
17415 extra_cost = -1;
17418 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
17420 if ((addr_mask & RELOAD_REG_INDEXED) == 0
17421 || !legitimate_indexed_address_p (addr, false))
17423 extra_cost = 1;
17424 type = "indexed";
17428 /* Make sure the register class can handle offset addresses. */
17429 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17431 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17433 extra_cost = 1;
17434 type = "offset";
17438 else
17440 fail_msg = "bad PLUS";
17441 extra_cost = -1;
17444 break;
17446 case LO_SUM:
17447 if (!legitimate_lo_sum_address_p (mode, addr, false))
17449 fail_msg = "bad LO_SUM";
17450 extra_cost = -1;
17453 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17455 extra_cost = 1;
17456 type = "lo_sum";
17458 break;
17460 /* Static addresses need to create a TOC entry. */
17461 case CONST:
17462 case SYMBOL_REF:
17463 case LABEL_REF:
17464 type = "address";
17465 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
17466 break;
17468 /* TOC references look like offsetable memory. */
17469 case UNSPEC:
17470 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
17472 fail_msg = "bad UNSPEC";
17473 extra_cost = -1;
17476 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17478 extra_cost = 1;
17479 type = "toc reference";
17481 break;
17483 default:
17485 fail_msg = "bad address";
17486 extra_cost = -1;
17490 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
17492 if (extra_cost < 0)
17493 fprintf (stderr,
17494 "rs6000_secondary_reload_memory error: mode = %s, "
17495 "class = %s, addr_mask = '%s', %s\n",
17496 GET_MODE_NAME (mode),
17497 reg_class_names[rclass],
17498 rs6000_debug_addr_mask (addr_mask, false),
17499 (fail_msg != NULL) ? fail_msg : "<bad address>");
17501 else
17502 fprintf (stderr,
17503 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
17504 "addr_mask = '%s', extra cost = %d, %s\n",
17505 GET_MODE_NAME (mode),
17506 reg_class_names[rclass],
17507 rs6000_debug_addr_mask (addr_mask, false),
17508 extra_cost,
17509 (type) ? type : "<none>");
17511 debug_rtx (addr);
17514 return extra_cost;
17517 /* Helper function for rs6000_secondary_reload to return true if a move to a
17518 different register classe is really a simple move. */
17520 static bool
17521 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
17522 enum rs6000_reg_type from_type,
17523 machine_mode mode)
17525 int size;
17527 /* Add support for various direct moves available. In this function, we only
17528 look at cases where we don't need any extra registers, and one or more
17529 simple move insns are issued. At present, 32-bit integers are not allowed
17530 in FPR/VSX registers. Single precision binary floating is not a simple
17531 move because we need to convert to the single precision memory layout.
17532 The 4-byte SDmode can be moved. */
17533 size = GET_MODE_SIZE (mode);
17534 if (TARGET_DIRECT_MOVE
17535 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
17536 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17537 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
17538 return true;
17540 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
17541 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
17542 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
17543 return true;
17545 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
17546 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
17547 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
17548 return true;
17550 return false;
17553 /* Power8 helper function for rs6000_secondary_reload, handle all of the
17554 special direct moves that involve allocating an extra register, return the
17555 insn code of the helper function if there is such a function or
17556 CODE_FOR_nothing if not. */
17558 static bool
17559 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
17560 enum rs6000_reg_type from_type,
17561 machine_mode mode,
17562 secondary_reload_info *sri,
17563 bool altivec_p)
17565 bool ret = false;
17566 enum insn_code icode = CODE_FOR_nothing;
17567 int cost = 0;
17568 int size = GET_MODE_SIZE (mode);
17570 if (TARGET_POWERPC64)
17572 if (size == 16)
17574 /* Handle moving 128-bit values from GPRs to VSX point registers on
17575 power8 when running in 64-bit mode using XXPERMDI to glue the two
17576 64-bit values back together. */
17577 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17579 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
17580 icode = reg_addr[mode].reload_vsx_gpr;
17583 /* Handle moving 128-bit values from VSX point registers to GPRs on
17584 power8 when running in 64-bit mode using XXPERMDI to get access to the
17585 bottom 64-bit value. */
17586 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17588 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
17589 icode = reg_addr[mode].reload_gpr_vsx;
17593 else if (mode == SFmode)
17595 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17597 cost = 3; /* xscvdpspn, mfvsrd, and. */
17598 icode = reg_addr[mode].reload_gpr_vsx;
17601 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17603 cost = 2; /* mtvsrz, xscvspdpn. */
17604 icode = reg_addr[mode].reload_vsx_gpr;
17609 if (TARGET_POWERPC64 && size == 16)
17611 /* Handle moving 128-bit values from GPRs to VSX point registers on
17612 power8 when running in 64-bit mode using XXPERMDI to glue the two
17613 64-bit values back together. */
17614 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
17616 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
17617 icode = reg_addr[mode].reload_vsx_gpr;
17620 /* Handle moving 128-bit values from VSX point registers to GPRs on
17621 power8 when running in 64-bit mode using XXPERMDI to get access to the
17622 bottom 64-bit value. */
17623 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
17625 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
17626 icode = reg_addr[mode].reload_gpr_vsx;
17630 else if (!TARGET_POWERPC64 && size == 8)
17632 /* Handle moving 64-bit values from GPRs to floating point registers on
17633 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
17634 values back together. Altivec register classes must be handled
17635 specially since a different instruction is used, and the secondary
17636 reload support requires a single instruction class in the scratch
17637 register constraint. However, right now TFmode is not allowed in
17638 Altivec registers, so the pattern will never match. */
17639 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
17641 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
17642 icode = reg_addr[mode].reload_fpr_gpr;
17646 if (icode != CODE_FOR_nothing)
17648 ret = true;
17649 if (sri)
17651 sri->icode = icode;
17652 sri->extra_cost = cost;
17656 return ret;
17659 /* Return whether a move between two register classes can be done either
17660 directly (simple move) or via a pattern that uses a single extra temporary
17661 (using power8's direct move in this case. */
17663 static bool
17664 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
17665 enum rs6000_reg_type from_type,
17666 machine_mode mode,
17667 secondary_reload_info *sri,
17668 bool altivec_p)
17670 /* Fall back to load/store reloads if either type is not a register. */
17671 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
17672 return false;
17674 /* If we haven't allocated registers yet, assume the move can be done for the
17675 standard register types. */
17676 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
17677 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
17678 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
17679 return true;
17681 /* Moves to the same set of registers is a simple move for non-specialized
17682 registers. */
17683 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
17684 return true;
17686 /* Check whether a simple move can be done directly. */
17687 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
17689 if (sri)
17691 sri->icode = CODE_FOR_nothing;
17692 sri->extra_cost = 0;
17694 return true;
17697 /* Now check if we can do it in a few steps. */
17698 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
17699 altivec_p);
17702 /* Inform reload about cases where moving X with a mode MODE to a register in
17703 RCLASS requires an extra scratch or immediate register. Return the class
17704 needed for the immediate register.
17706 For VSX and Altivec, we may need a register to convert sp+offset into
17707 reg+sp.
17709 For misaligned 64-bit gpr loads and stores we need a register to
17710 convert an offset address to indirect. */
17712 static reg_class_t
17713 rs6000_secondary_reload (bool in_p,
17714 rtx x,
17715 reg_class_t rclass_i,
17716 machine_mode mode,
17717 secondary_reload_info *sri)
17719 enum reg_class rclass = (enum reg_class) rclass_i;
17720 reg_class_t ret = ALL_REGS;
17721 enum insn_code icode;
17722 bool default_p = false;
17723 bool done_p = false;
17725 /* Allow subreg of memory before/during reload. */
17726 bool memory_p = (MEM_P (x)
17727 || (!reload_completed && GET_CODE (x) == SUBREG
17728 && MEM_P (SUBREG_REG (x))));
17730 sri->icode = CODE_FOR_nothing;
17731 sri->extra_cost = 0;
17732 icode = ((in_p)
17733 ? reg_addr[mode].reload_load
17734 : reg_addr[mode].reload_store);
17736 if (REG_P (x) || register_operand (x, mode))
17738 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
17739 bool altivec_p = (rclass == ALTIVEC_REGS);
17740 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
17742 if (!in_p)
17744 enum rs6000_reg_type exchange = to_type;
17745 to_type = from_type;
17746 from_type = exchange;
17749 /* Can we do a direct move of some sort? */
17750 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
17751 altivec_p))
17753 icode = (enum insn_code)sri->icode;
17754 default_p = false;
17755 done_p = true;
17756 ret = NO_REGS;
17760 /* Make sure 0.0 is not reloaded or forced into memory. */
17761 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17763 ret = NO_REGS;
17764 default_p = false;
17765 done_p = true;
17768 /* If this is a scalar floating point value and we want to load it into the
17769 traditional Altivec registers, do it via a move via a traditional floating
17770 point register. Also make sure that non-zero constants use a FPR. */
17771 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17772 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17773 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17775 ret = FLOAT_REGS;
17776 default_p = false;
17777 done_p = true;
17780 /* Handle reload of load/stores if we have reload helper functions. */
17781 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17783 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17784 mode);
17786 if (extra_cost >= 0)
17788 done_p = true;
17789 ret = NO_REGS;
17790 if (extra_cost > 0)
17792 sri->extra_cost = extra_cost;
17793 sri->icode = icode;
17798 /* Handle unaligned loads and stores of integer registers. */
17799 if (!done_p && TARGET_POWERPC64
17800 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17801 && memory_p
17802 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17804 rtx addr = XEXP (x, 0);
17805 rtx off = address_offset (addr);
17807 if (off != NULL_RTX)
17809 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17810 unsigned HOST_WIDE_INT offset = INTVAL (off);
17812 /* We need a secondary reload when our legitimate_address_p
17813 says the address is good (as otherwise the entire address
17814 will be reloaded), and the offset is not a multiple of
17815 four or we have an address wrap. Address wrap will only
17816 occur for LO_SUMs since legitimate_offset_address_p
17817 rejects addresses for 16-byte mems that will wrap. */
17818 if (GET_CODE (addr) == LO_SUM
17819 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17820 && ((offset & 3) != 0
17821 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17822 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17823 && (offset & 3) != 0))
17825 /* -m32 -mpowerpc64 needs to use a 32-bit scratch register. */
17826 if (in_p)
17827 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_load
17828 : CODE_FOR_reload_di_load);
17829 else
17830 sri->icode = ((TARGET_32BIT) ? CODE_FOR_reload_si_store
17831 : CODE_FOR_reload_di_store);
17832 sri->extra_cost = 2;
17833 ret = NO_REGS;
17834 done_p = true;
17836 else
17837 default_p = true;
17839 else
17840 default_p = true;
17843 if (!done_p && !TARGET_POWERPC64
17844 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17845 && memory_p
17846 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17848 rtx addr = XEXP (x, 0);
17849 rtx off = address_offset (addr);
17851 if (off != NULL_RTX)
17853 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17854 unsigned HOST_WIDE_INT offset = INTVAL (off);
17856 /* We need a secondary reload when our legitimate_address_p
17857 says the address is good (as otherwise the entire address
17858 will be reloaded), and we have a wrap.
17860 legitimate_lo_sum_address_p allows LO_SUM addresses to
17861 have any offset so test for wrap in the low 16 bits.
17863 legitimate_offset_address_p checks for the range
17864 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17865 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17866 [0x7ff4,0x7fff] respectively, so test for the
17867 intersection of these ranges, [0x7ffc,0x7fff] and
17868 [0x7ff4,0x7ff7] respectively.
17870 Note that the address we see here may have been
17871 manipulated by legitimize_reload_address. */
17872 if (GET_CODE (addr) == LO_SUM
17873 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17874 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17876 if (in_p)
17877 sri->icode = CODE_FOR_reload_si_load;
17878 else
17879 sri->icode = CODE_FOR_reload_si_store;
17880 sri->extra_cost = 2;
17881 ret = NO_REGS;
17882 done_p = true;
17884 else
17885 default_p = true;
17887 else
17888 default_p = true;
17891 if (!done_p)
17892 default_p = true;
17894 if (default_p)
17895 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17897 gcc_assert (ret != ALL_REGS);
17899 if (TARGET_DEBUG_ADDR)
17901 fprintf (stderr,
17902 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17903 "mode = %s",
17904 reg_class_names[ret],
17905 in_p ? "true" : "false",
17906 reg_class_names[rclass],
17907 GET_MODE_NAME (mode));
17909 if (reload_completed)
17910 fputs (", after reload", stderr);
17912 if (!done_p)
17913 fputs (", done_p not set", stderr);
17915 if (default_p)
17916 fputs (", default secondary reload", stderr);
17918 if (sri->icode != CODE_FOR_nothing)
17919 fprintf (stderr, ", reload func = %s, extra cost = %d",
17920 insn_data[sri->icode].name, sri->extra_cost);
17922 fputs ("\n", stderr);
17923 debug_rtx (x);
17926 return ret;
17929 /* Better tracing for rs6000_secondary_reload_inner. */
17931 static void
17932 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17933 bool store_p)
17935 rtx set, clobber;
17937 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17939 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17940 store_p ? "store" : "load");
17942 if (store_p)
17943 set = gen_rtx_SET (mem, reg);
17944 else
17945 set = gen_rtx_SET (reg, mem);
17947 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17948 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17951 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17952 ATTRIBUTE_NORETURN;
17954 static void
17955 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17956 bool store_p)
17958 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17959 gcc_unreachable ();
17962 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17963 reload helper functions. These were identified in
17964 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17965 reload, it calls the insns:
17966 reload_<RELOAD:mode>_<P:mptrsize>_store
17967 reload_<RELOAD:mode>_<P:mptrsize>_load
17969 which in turn calls this function, to do whatever is necessary to create
17970 valid addresses. */
17972 void
17973 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17975 int regno = true_regnum (reg);
17976 machine_mode mode = GET_MODE (reg);
17977 addr_mask_type addr_mask;
17978 rtx addr;
17979 rtx new_addr;
17980 rtx op_reg, op0, op1;
17981 rtx and_op;
17982 rtx cc_clobber;
17983 rtvec rv;
17985 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17986 || !base_reg_operand (scratch, GET_MODE (scratch)))
17987 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17989 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17990 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17992 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17993 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17995 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17996 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17998 else
17999 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18001 /* Make sure the mode is valid in this register class. */
18002 if ((addr_mask & RELOAD_REG_VALID) == 0)
18003 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18005 if (TARGET_DEBUG_ADDR)
18006 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
18008 new_addr = addr = XEXP (mem, 0);
18009 switch (GET_CODE (addr))
18011 /* Does the register class support auto update forms for this mode? If
18012 not, do the update now. We don't need a scratch register, since the
18013 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
18014 case PRE_INC:
18015 case PRE_DEC:
18016 op_reg = XEXP (addr, 0);
18017 if (!base_reg_operand (op_reg, Pmode))
18018 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18020 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
18022 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
18023 new_addr = op_reg;
18025 break;
18027 case PRE_MODIFY:
18028 op0 = XEXP (addr, 0);
18029 op1 = XEXP (addr, 1);
18030 if (!base_reg_operand (op0, Pmode)
18031 || GET_CODE (op1) != PLUS
18032 || !rtx_equal_p (op0, XEXP (op1, 0)))
18033 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18035 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
18037 emit_insn (gen_rtx_SET (op0, op1));
18038 new_addr = reg;
18040 break;
18042 /* Do we need to simulate AND -16 to clear the bottom address bits used
18043 in VMX load/stores? */
18044 case AND:
18045 op0 = XEXP (addr, 0);
18046 op1 = XEXP (addr, 1);
18047 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
18049 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
18050 op_reg = op0;
18052 else if (GET_CODE (op1) == PLUS)
18054 emit_insn (gen_rtx_SET (scratch, op1));
18055 op_reg = scratch;
18058 else
18059 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18061 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
18062 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
18063 rv = gen_rtvec (2, gen_rtx_SET (scratch, and_op), cc_clobber);
18064 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
18065 new_addr = scratch;
18067 break;
18069 /* If this is an indirect address, make sure it is a base register. */
18070 case REG:
18071 case SUBREG:
18072 if (!base_reg_operand (addr, GET_MODE (addr)))
18074 emit_insn (gen_rtx_SET (scratch, addr));
18075 new_addr = scratch;
18077 break;
18079 /* If this is an indexed address, make sure the register class can handle
18080 indexed addresses for this mode. */
18081 case PLUS:
18082 op0 = XEXP (addr, 0);
18083 op1 = XEXP (addr, 1);
18084 if (!base_reg_operand (op0, Pmode))
18085 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18087 else if (int_reg_operand (op1, Pmode))
18089 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18091 emit_insn (gen_rtx_SET (scratch, addr));
18092 new_addr = scratch;
18096 /* Make sure the register class can handle offset addresses. */
18097 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
18099 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18101 emit_insn (gen_rtx_SET (scratch, addr));
18102 new_addr = scratch;
18106 else
18107 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18109 break;
18111 case LO_SUM:
18112 op0 = XEXP (addr, 0);
18113 op1 = XEXP (addr, 1);
18114 if (!base_reg_operand (op0, Pmode))
18115 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18117 else if (int_reg_operand (op1, Pmode))
18119 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
18121 emit_insn (gen_rtx_SET (scratch, addr));
18122 new_addr = scratch;
18126 /* Make sure the register class can handle offset addresses. */
18127 else if (legitimate_lo_sum_address_p (mode, addr, false))
18129 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
18131 emit_insn (gen_rtx_SET (scratch, addr));
18132 new_addr = scratch;
18136 else
18137 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18139 break;
18141 case SYMBOL_REF:
18142 case CONST:
18143 case LABEL_REF:
18144 rs6000_emit_move (scratch, addr, Pmode);
18145 new_addr = scratch;
18146 break;
18148 default:
18149 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
18152 /* Adjust the address if it changed. */
18153 if (addr != new_addr)
18155 mem = replace_equiv_address_nv (mem, new_addr);
18156 if (TARGET_DEBUG_ADDR)
18157 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
18160 /* Now create the move. */
18161 if (store_p)
18162 emit_insn (gen_rtx_SET (mem, reg));
18163 else
18164 emit_insn (gen_rtx_SET (reg, mem));
18166 return;
18169 /* Convert reloads involving 64-bit gprs and misaligned offset
18170 addressing, or multiple 32-bit gprs and offsets that are too large,
18171 to use indirect addressing. */
18173 void
18174 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
18176 int regno = true_regnum (reg);
18177 enum reg_class rclass;
18178 rtx addr;
18179 rtx scratch_or_premodify = scratch;
18181 if (TARGET_DEBUG_ADDR)
18183 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
18184 store_p ? "store" : "load");
18185 fprintf (stderr, "reg:\n");
18186 debug_rtx (reg);
18187 fprintf (stderr, "mem:\n");
18188 debug_rtx (mem);
18189 fprintf (stderr, "scratch:\n");
18190 debug_rtx (scratch);
18193 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
18194 gcc_assert (GET_CODE (mem) == MEM);
18195 rclass = REGNO_REG_CLASS (regno);
18196 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
18197 addr = XEXP (mem, 0);
18199 if (GET_CODE (addr) == PRE_MODIFY)
18201 gcc_assert (REG_P (XEXP (addr, 0))
18202 && GET_CODE (XEXP (addr, 1)) == PLUS
18203 && XEXP (XEXP (addr, 1), 0) == XEXP (addr, 0));
18204 scratch_or_premodify = XEXP (addr, 0);
18205 if (!HARD_REGISTER_P (scratch_or_premodify))
18206 /* If we have a pseudo here then reload will have arranged
18207 to have it replaced, but only in the original insn.
18208 Use the replacement here too. */
18209 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
18211 /* RTL emitted by rs6000_secondary_reload_gpr uses RTL
18212 expressions from the original insn, without unsharing them.
18213 Any RTL that points into the original insn will of course
18214 have register replacements applied. That is why we don't
18215 need to look for replacements under the PLUS. */
18216 addr = XEXP (addr, 1);
18218 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
18220 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
18222 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
18224 /* Now create the move. */
18225 if (store_p)
18226 emit_insn (gen_rtx_SET (mem, reg));
18227 else
18228 emit_insn (gen_rtx_SET (reg, mem));
18230 return;
18233 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
18234 this function has any SDmode references. If we are on a power7 or later, we
18235 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
18236 can load/store the value. */
18238 static void
18239 rs6000_alloc_sdmode_stack_slot (void)
18241 tree t;
18242 basic_block bb;
18243 gimple_stmt_iterator gsi;
18245 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
18246 /* We use a different approach for dealing with the secondary
18247 memory in LRA. */
18248 if (ira_use_lra_p)
18249 return;
18251 if (TARGET_NO_SDMODE_STACK)
18252 return;
18254 FOR_EACH_BB_FN (bb, cfun)
18255 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
18257 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
18258 if (ret)
18260 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
18261 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
18262 SDmode, 0);
18263 return;
18267 /* Check for any SDmode parameters of the function. */
18268 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
18270 if (TREE_TYPE (t) == error_mark_node)
18271 continue;
18273 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
18274 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
18276 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
18277 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
18278 SDmode, 0);
18279 return;
18284 static void
18285 rs6000_instantiate_decls (void)
18287 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
18288 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
18291 /* Given an rtx X being reloaded into a reg required to be
18292 in class CLASS, return the class of reg to actually use.
18293 In general this is just CLASS; but on some machines
18294 in some cases it is preferable to use a more restrictive class.
18296 On the RS/6000, we have to return NO_REGS when we want to reload a
18297 floating-point CONST_DOUBLE to force it to be copied to memory.
18299 We also don't want to reload integer values into floating-point
18300 registers if we can at all help it. In fact, this can
18301 cause reload to die, if it tries to generate a reload of CTR
18302 into a FP register and discovers it doesn't have the memory location
18303 required.
18305 ??? Would it be a good idea to have reload do the converse, that is
18306 try to reload floating modes into FP registers if possible?
18309 static enum reg_class
18310 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
18312 machine_mode mode = GET_MODE (x);
18313 bool is_constant = CONSTANT_P (x);
18315 /* For VSX, see if we should prefer FLOAT_REGS or ALTIVEC_REGS. Do not allow
18316 the reloading of address expressions using PLUS into floating point
18317 registers. */
18318 if (TARGET_VSX && VSX_REG_CLASS_P (rclass) && GET_CODE (x) != PLUS)
18320 if (is_constant)
18322 /* Zero is always allowed in all VSX registers. */
18323 if (x == CONST0_RTX (mode))
18324 return rclass;
18326 /* If this is a vector constant that can be formed with a few Altivec
18327 instructions, we want altivec registers. */
18328 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
18329 return ALTIVEC_REGS;
18331 /* Force constant to memory. */
18332 return NO_REGS;
18335 /* If this is a scalar floating point value, prefer the traditional
18336 floating point registers so that we can use D-form (register+offset)
18337 addressing. */
18338 if (GET_MODE_SIZE (mode) < 16)
18339 return FLOAT_REGS;
18341 /* Prefer the Altivec registers if Altivec is handling the vector
18342 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
18343 loads. */
18344 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
18345 || mode == V1TImode)
18346 return ALTIVEC_REGS;
18348 return rclass;
18351 if (is_constant || GET_CODE (x) == PLUS)
18353 if (reg_class_subset_p (GENERAL_REGS, rclass))
18354 return GENERAL_REGS;
18355 if (reg_class_subset_p (BASE_REGS, rclass))
18356 return BASE_REGS;
18357 return NO_REGS;
18360 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
18361 return GENERAL_REGS;
18363 return rclass;
18366 /* Debug version of rs6000_preferred_reload_class. */
18367 static enum reg_class
18368 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
18370 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
18372 fprintf (stderr,
18373 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
18374 "mode = %s, x:\n",
18375 reg_class_names[ret], reg_class_names[rclass],
18376 GET_MODE_NAME (GET_MODE (x)));
18377 debug_rtx (x);
18379 return ret;
18382 /* If we are copying between FP or AltiVec registers and anything else, we need
18383 a memory location. The exception is when we are targeting ppc64 and the
18384 move to/from fpr to gpr instructions are available. Also, under VSX, you
18385 can copy vector registers from the FP register set to the Altivec register
18386 set and vice versa. */
18388 static bool
18389 rs6000_secondary_memory_needed (enum reg_class from_class,
18390 enum reg_class to_class,
18391 machine_mode mode)
18393 enum rs6000_reg_type from_type, to_type;
18394 bool altivec_p = ((from_class == ALTIVEC_REGS)
18395 || (to_class == ALTIVEC_REGS));
18397 /* If a simple/direct move is available, we don't need secondary memory */
18398 from_type = reg_class_to_reg_type[(int)from_class];
18399 to_type = reg_class_to_reg_type[(int)to_class];
18401 if (rs6000_secondary_reload_move (to_type, from_type, mode,
18402 (secondary_reload_info *)0, altivec_p))
18403 return false;
18405 /* If we have a floating point or vector register class, we need to use
18406 memory to transfer the data. */
18407 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
18408 return true;
18410 return false;
18413 /* Debug version of rs6000_secondary_memory_needed. */
18414 static bool
18415 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
18416 enum reg_class to_class,
18417 machine_mode mode)
18419 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
18421 fprintf (stderr,
18422 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
18423 "to_class = %s, mode = %s\n",
18424 ret ? "true" : "false",
18425 reg_class_names[from_class],
18426 reg_class_names[to_class],
18427 GET_MODE_NAME (mode));
18429 return ret;
18432 /* Return the register class of a scratch register needed to copy IN into
18433 or out of a register in RCLASS in MODE. If it can be done directly,
18434 NO_REGS is returned. */
18436 static enum reg_class
18437 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
18438 rtx in)
18440 int regno;
18442 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
18443 #if TARGET_MACHO
18444 && MACHOPIC_INDIRECT
18445 #endif
18448 /* We cannot copy a symbolic operand directly into anything
18449 other than BASE_REGS for TARGET_ELF. So indicate that a
18450 register from BASE_REGS is needed as an intermediate
18451 register.
18453 On Darwin, pic addresses require a load from memory, which
18454 needs a base register. */
18455 if (rclass != BASE_REGS
18456 && (GET_CODE (in) == SYMBOL_REF
18457 || GET_CODE (in) == HIGH
18458 || GET_CODE (in) == LABEL_REF
18459 || GET_CODE (in) == CONST))
18460 return BASE_REGS;
18463 if (GET_CODE (in) == REG)
18465 regno = REGNO (in);
18466 if (regno >= FIRST_PSEUDO_REGISTER)
18468 regno = true_regnum (in);
18469 if (regno >= FIRST_PSEUDO_REGISTER)
18470 regno = -1;
18473 else if (GET_CODE (in) == SUBREG)
18475 regno = true_regnum (in);
18476 if (regno >= FIRST_PSEUDO_REGISTER)
18477 regno = -1;
18479 else
18480 regno = -1;
18482 /* If we have VSX register moves, prefer moving scalar values between
18483 Altivec registers and GPR by going via an FPR (and then via memory)
18484 instead of reloading the secondary memory address for Altivec moves. */
18485 if (TARGET_VSX
18486 && GET_MODE_SIZE (mode) < 16
18487 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
18488 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
18489 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
18490 && (regno >= 0 && INT_REGNO_P (regno)))))
18491 return FLOAT_REGS;
18493 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
18494 into anything. */
18495 if (rclass == GENERAL_REGS || rclass == BASE_REGS
18496 || (regno >= 0 && INT_REGNO_P (regno)))
18497 return NO_REGS;
18499 /* Constants, memory, and VSX registers can go into VSX registers (both the
18500 traditional floating point and the altivec registers). */
18501 if (rclass == VSX_REGS
18502 && (regno == -1 || VSX_REGNO_P (regno)))
18503 return NO_REGS;
18505 /* Constants, memory, and FP registers can go into FP registers. */
18506 if ((regno == -1 || FP_REGNO_P (regno))
18507 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
18508 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
18510 /* Memory, and AltiVec registers can go into AltiVec registers. */
18511 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
18512 && rclass == ALTIVEC_REGS)
18513 return NO_REGS;
18515 /* We can copy among the CR registers. */
18516 if ((rclass == CR_REGS || rclass == CR0_REGS)
18517 && regno >= 0 && CR_REGNO_P (regno))
18518 return NO_REGS;
18520 /* Otherwise, we need GENERAL_REGS. */
18521 return GENERAL_REGS;
18524 /* Debug version of rs6000_secondary_reload_class. */
18525 static enum reg_class
18526 rs6000_debug_secondary_reload_class (enum reg_class rclass,
18527 machine_mode mode, rtx in)
18529 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
18530 fprintf (stderr,
18531 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
18532 "mode = %s, input rtx:\n",
18533 reg_class_names[ret], reg_class_names[rclass],
18534 GET_MODE_NAME (mode));
18535 debug_rtx (in);
18537 return ret;
18540 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
18542 static bool
18543 rs6000_cannot_change_mode_class (machine_mode from,
18544 machine_mode to,
18545 enum reg_class rclass)
18547 unsigned from_size = GET_MODE_SIZE (from);
18548 unsigned to_size = GET_MODE_SIZE (to);
18550 if (from_size != to_size)
18552 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
18554 if (reg_classes_intersect_p (xclass, rclass))
18556 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
18557 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
18558 bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
18559 bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
18561 /* Don't allow 64-bit types to overlap with 128-bit types that take a
18562 single register under VSX because the scalar part of the register
18563 is in the upper 64-bits, and not the lower 64-bits. Types like
18564 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
18565 IEEE floating point can't overlap, and neither can small
18566 values. */
18568 if (to_float128_vector_p && from_float128_vector_p)
18569 return false;
18571 else if (to_float128_vector_p || from_float128_vector_p)
18572 return true;
18574 /* TDmode in floating-mode registers must always go into a register
18575 pair with the most significant word in the even-numbered register
18576 to match ISA requirements. In little-endian mode, this does not
18577 match subreg numbering, so we cannot allow subregs. */
18578 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
18579 return true;
18581 if (from_size < 8 || to_size < 8)
18582 return true;
18584 if (from_size == 8 && (8 * to_nregs) != to_size)
18585 return true;
18587 if (to_size == 8 && (8 * from_nregs) != from_size)
18588 return true;
18590 return false;
18592 else
18593 return false;
18596 if (TARGET_E500_DOUBLE
18597 && ((((to) == DFmode) + ((from) == DFmode)) == 1
18598 || (((to) == TFmode) + ((from) == TFmode)) == 1
18599 || (((to) == IFmode) + ((from) == IFmode)) == 1
18600 || (((to) == KFmode) + ((from) == KFmode)) == 1
18601 || (((to) == DDmode) + ((from) == DDmode)) == 1
18602 || (((to) == TDmode) + ((from) == TDmode)) == 1
18603 || (((to) == DImode) + ((from) == DImode)) == 1))
18604 return true;
18606 /* Since the VSX register set includes traditional floating point registers
18607 and altivec registers, just check for the size being different instead of
18608 trying to check whether the modes are vector modes. Otherwise it won't
18609 allow say DF and DI to change classes. For types like TFmode and TDmode
18610 that take 2 64-bit registers, rather than a single 128-bit register, don't
18611 allow subregs of those types to other 128 bit types. */
18612 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
18614 unsigned num_regs = (from_size + 15) / 16;
18615 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
18616 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
18617 return true;
18619 return (from_size != 8 && from_size != 16);
18622 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
18623 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
18624 return true;
18626 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
18627 && reg_classes_intersect_p (GENERAL_REGS, rclass))
18628 return true;
18630 return false;
18633 /* Debug version of rs6000_cannot_change_mode_class. */
18634 static bool
18635 rs6000_debug_cannot_change_mode_class (machine_mode from,
18636 machine_mode to,
18637 enum reg_class rclass)
18639 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
18641 fprintf (stderr,
18642 "rs6000_cannot_change_mode_class, return %s, from = %s, "
18643 "to = %s, rclass = %s\n",
18644 ret ? "true" : "false",
18645 GET_MODE_NAME (from), GET_MODE_NAME (to),
18646 reg_class_names[rclass]);
18648 return ret;
18651 /* Return a string to do a move operation of 128 bits of data. */
18653 const char *
18654 rs6000_output_move_128bit (rtx operands[])
18656 rtx dest = operands[0];
18657 rtx src = operands[1];
18658 machine_mode mode = GET_MODE (dest);
18659 int dest_regno;
18660 int src_regno;
18661 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
18662 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
18664 if (REG_P (dest))
18666 dest_regno = REGNO (dest);
18667 dest_gpr_p = INT_REGNO_P (dest_regno);
18668 dest_fp_p = FP_REGNO_P (dest_regno);
18669 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
18670 dest_vsx_p = dest_fp_p | dest_vmx_p;
18672 else
18674 dest_regno = -1;
18675 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
18678 if (REG_P (src))
18680 src_regno = REGNO (src);
18681 src_gpr_p = INT_REGNO_P (src_regno);
18682 src_fp_p = FP_REGNO_P (src_regno);
18683 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
18684 src_vsx_p = src_fp_p | src_vmx_p;
18686 else
18688 src_regno = -1;
18689 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
18692 /* Register moves. */
18693 if (dest_regno >= 0 && src_regno >= 0)
18695 if (dest_gpr_p)
18697 if (src_gpr_p)
18698 return "#";
18700 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
18701 return "#";
18704 else if (TARGET_VSX && dest_vsx_p)
18706 if (src_vsx_p)
18707 return "xxlor %x0,%x1,%x1";
18709 else if (TARGET_DIRECT_MOVE && src_gpr_p)
18710 return "#";
18713 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
18714 return "vor %0,%1,%1";
18716 else if (dest_fp_p && src_fp_p)
18717 return "#";
18720 /* Loads. */
18721 else if (dest_regno >= 0 && MEM_P (src))
18723 if (dest_gpr_p)
18725 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18726 return "lq %0,%1";
18727 else
18728 return "#";
18731 else if (TARGET_ALTIVEC && dest_vmx_p
18732 && altivec_indexed_or_indirect_operand (src, mode))
18733 return "lvx %0,%y1";
18735 else if (TARGET_VSX && dest_vsx_p)
18737 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18738 return "lxvw4x %x0,%y1";
18739 else
18740 return "lxvd2x %x0,%y1";
18743 else if (TARGET_ALTIVEC && dest_vmx_p)
18744 return "lvx %0,%y1";
18746 else if (dest_fp_p)
18747 return "#";
18750 /* Stores. */
18751 else if (src_regno >= 0 && MEM_P (dest))
18753 if (src_gpr_p)
18755 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
18756 return "stq %1,%0";
18757 else
18758 return "#";
18761 else if (TARGET_ALTIVEC && src_vmx_p
18762 && altivec_indexed_or_indirect_operand (src, mode))
18763 return "stvx %1,%y0";
18765 else if (TARGET_VSX && src_vsx_p)
18767 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
18768 return "stxvw4x %x1,%y0";
18769 else
18770 return "stxvd2x %x1,%y0";
18773 else if (TARGET_ALTIVEC && src_vmx_p)
18774 return "stvx %1,%y0";
18776 else if (src_fp_p)
18777 return "#";
18780 /* Constants. */
18781 else if (dest_regno >= 0
18782 && (GET_CODE (src) == CONST_INT
18783 || GET_CODE (src) == CONST_WIDE_INT
18784 || GET_CODE (src) == CONST_DOUBLE
18785 || GET_CODE (src) == CONST_VECTOR))
18787 if (dest_gpr_p)
18788 return "#";
18790 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18791 return "xxlxor %x0,%x0,%x0";
18793 else if (TARGET_ALTIVEC && dest_vmx_p)
18794 return output_vec_const_move (operands);
18797 fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
18800 /* Validate a 128-bit move. */
18801 bool
18802 rs6000_move_128bit_ok_p (rtx operands[])
18804 machine_mode mode = GET_MODE (operands[0]);
18805 return (gpc_reg_operand (operands[0], mode)
18806 || gpc_reg_operand (operands[1], mode));
18809 /* Return true if a 128-bit move needs to be split. */
18810 bool
18811 rs6000_split_128bit_ok_p (rtx operands[])
18813 if (!reload_completed)
18814 return false;
18816 if (!gpr_or_gpr_p (operands[0], operands[1]))
18817 return false;
18819 if (quad_load_store_p (operands[0], operands[1]))
18820 return false;
18822 return true;
18826 /* Given a comparison operation, return the bit number in CCR to test. We
18827 know this is a valid comparison.
18829 SCC_P is 1 if this is for an scc. That means that %D will have been
18830 used instead of %C, so the bits will be in different places.
18832 Return -1 if OP isn't a valid comparison for some reason. */
18835 ccr_bit (rtx op, int scc_p)
18837 enum rtx_code code = GET_CODE (op);
18838 machine_mode cc_mode;
18839 int cc_regnum;
18840 int base_bit;
18841 rtx reg;
18843 if (!COMPARISON_P (op))
18844 return -1;
18846 reg = XEXP (op, 0);
18848 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18850 cc_mode = GET_MODE (reg);
18851 cc_regnum = REGNO (reg);
18852 base_bit = 4 * (cc_regnum - CR0_REGNO);
18854 validate_condition_mode (code, cc_mode);
18856 /* When generating a sCOND operation, only positive conditions are
18857 allowed. */
18858 gcc_assert (!scc_p
18859 || code == EQ || code == GT || code == LT || code == UNORDERED
18860 || code == GTU || code == LTU);
18862 switch (code)
18864 case NE:
18865 return scc_p ? base_bit + 3 : base_bit + 2;
18866 case EQ:
18867 return base_bit + 2;
18868 case GT: case GTU: case UNLE:
18869 return base_bit + 1;
18870 case LT: case LTU: case UNGE:
18871 return base_bit;
18872 case ORDERED: case UNORDERED:
18873 return base_bit + 3;
18875 case GE: case GEU:
18876 /* If scc, we will have done a cror to put the bit in the
18877 unordered position. So test that bit. For integer, this is ! LT
18878 unless this is an scc insn. */
18879 return scc_p ? base_bit + 3 : base_bit;
18881 case LE: case LEU:
18882 return scc_p ? base_bit + 3 : base_bit + 1;
18884 default:
18885 gcc_unreachable ();
18889 /* Return the GOT register. */
18892 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18894 /* The second flow pass currently (June 1999) can't update
18895 regs_ever_live without disturbing other parts of the compiler, so
18896 update it here to make the prolog/epilogue code happy. */
18897 if (!can_create_pseudo_p ()
18898 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18899 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18901 crtl->uses_pic_offset_table = 1;
18903 return pic_offset_table_rtx;
18906 static rs6000_stack_t stack_info;
18908 /* Function to init struct machine_function.
18909 This will be called, via a pointer variable,
18910 from push_function_context. */
18912 static struct machine_function *
18913 rs6000_init_machine_status (void)
18915 stack_info.reload_completed = 0;
18916 return ggc_cleared_alloc<machine_function> ();
18919 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18921 /* Write out a function code label. */
18923 void
18924 rs6000_output_function_entry (FILE *file, const char *fname)
18926 if (fname[0] != '.')
18928 switch (DEFAULT_ABI)
18930 default:
18931 gcc_unreachable ();
18933 case ABI_AIX:
18934 if (DOT_SYMBOLS)
18935 putc ('.', file);
18936 else
18937 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18938 break;
18940 case ABI_ELFv2:
18941 case ABI_V4:
18942 case ABI_DARWIN:
18943 break;
18947 RS6000_OUTPUT_BASENAME (file, fname);
18950 /* Print an operand. Recognize special options, documented below. */
18952 #if TARGET_ELF
18953 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18954 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18955 #else
18956 #define SMALL_DATA_RELOC "sda21"
18957 #define SMALL_DATA_REG 0
18958 #endif
18960 void
18961 print_operand (FILE *file, rtx x, int code)
18963 int i;
18964 unsigned HOST_WIDE_INT uval;
18966 switch (code)
18968 /* %a is output_address. */
18970 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18971 output_operand. */
18973 case 'D':
18974 /* Like 'J' but get to the GT bit only. */
18975 gcc_assert (REG_P (x));
18977 /* Bit 1 is GT bit. */
18978 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18980 /* Add one for shift count in rlinm for scc. */
18981 fprintf (file, "%d", i + 1);
18982 return;
18984 case 'e':
18985 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18986 if (! INT_P (x))
18988 output_operand_lossage ("invalid %%e value");
18989 return;
18992 uval = INTVAL (x);
18993 if ((uval & 0xffff) == 0 && uval != 0)
18994 putc ('s', file);
18995 return;
18997 case 'E':
18998 /* X is a CR register. Print the number of the EQ bit of the CR */
18999 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19000 output_operand_lossage ("invalid %%E value");
19001 else
19002 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
19003 return;
19005 case 'f':
19006 /* X is a CR register. Print the shift count needed to move it
19007 to the high-order four bits. */
19008 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19009 output_operand_lossage ("invalid %%f value");
19010 else
19011 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
19012 return;
19014 case 'F':
19015 /* Similar, but print the count for the rotate in the opposite
19016 direction. */
19017 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19018 output_operand_lossage ("invalid %%F value");
19019 else
19020 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
19021 return;
19023 case 'G':
19024 /* X is a constant integer. If it is negative, print "m",
19025 otherwise print "z". This is to make an aze or ame insn. */
19026 if (GET_CODE (x) != CONST_INT)
19027 output_operand_lossage ("invalid %%G value");
19028 else if (INTVAL (x) >= 0)
19029 putc ('z', file);
19030 else
19031 putc ('m', file);
19032 return;
19034 case 'h':
19035 /* If constant, output low-order five bits. Otherwise, write
19036 normally. */
19037 if (INT_P (x))
19038 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
19039 else
19040 print_operand (file, x, 0);
19041 return;
19043 case 'H':
19044 /* If constant, output low-order six bits. Otherwise, write
19045 normally. */
19046 if (INT_P (x))
19047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
19048 else
19049 print_operand (file, x, 0);
19050 return;
19052 case 'I':
19053 /* Print `i' if this is a constant, else nothing. */
19054 if (INT_P (x))
19055 putc ('i', file);
19056 return;
19058 case 'j':
19059 /* Write the bit number in CCR for jump. */
19060 i = ccr_bit (x, 0);
19061 if (i == -1)
19062 output_operand_lossage ("invalid %%j code");
19063 else
19064 fprintf (file, "%d", i);
19065 return;
19067 case 'J':
19068 /* Similar, but add one for shift count in rlinm for scc and pass
19069 scc flag to `ccr_bit'. */
19070 i = ccr_bit (x, 1);
19071 if (i == -1)
19072 output_operand_lossage ("invalid %%J code");
19073 else
19074 /* If we want bit 31, write a shift count of zero, not 32. */
19075 fprintf (file, "%d", i == 31 ? 0 : i + 1);
19076 return;
19078 case 'k':
19079 /* X must be a constant. Write the 1's complement of the
19080 constant. */
19081 if (! INT_P (x))
19082 output_operand_lossage ("invalid %%k value");
19083 else
19084 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
19085 return;
19087 case 'K':
19088 /* X must be a symbolic constant on ELF. Write an
19089 expression suitable for an 'addi' that adds in the low 16
19090 bits of the MEM. */
19091 if (GET_CODE (x) == CONST)
19093 if (GET_CODE (XEXP (x, 0)) != PLUS
19094 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
19095 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
19096 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
19097 output_operand_lossage ("invalid %%K value");
19099 print_operand_address (file, x);
19100 fputs ("@l", file);
19101 return;
19103 /* %l is output_asm_label. */
19105 case 'L':
19106 /* Write second word of DImode or DFmode reference. Works on register
19107 or non-indexed memory only. */
19108 if (REG_P (x))
19109 fputs (reg_names[REGNO (x) + 1], file);
19110 else if (MEM_P (x))
19112 /* Handle possible auto-increment. Since it is pre-increment and
19113 we have already done it, we can just use an offset of word. */
19114 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19115 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19116 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
19117 UNITS_PER_WORD));
19118 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19119 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
19120 UNITS_PER_WORD));
19121 else
19122 output_address (XEXP (adjust_address_nv (x, SImode,
19123 UNITS_PER_WORD),
19124 0));
19126 if (small_data_operand (x, GET_MODE (x)))
19127 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19128 reg_names[SMALL_DATA_REG]);
19130 return;
19132 case 'N':
19133 /* Write the number of elements in the vector times 4. */
19134 if (GET_CODE (x) != PARALLEL)
19135 output_operand_lossage ("invalid %%N value");
19136 else
19137 fprintf (file, "%d", XVECLEN (x, 0) * 4);
19138 return;
19140 case 'O':
19141 /* Similar, but subtract 1 first. */
19142 if (GET_CODE (x) != PARALLEL)
19143 output_operand_lossage ("invalid %%O value");
19144 else
19145 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
19146 return;
19148 case 'p':
19149 /* X is a CONST_INT that is a power of two. Output the logarithm. */
19150 if (! INT_P (x)
19151 || INTVAL (x) < 0
19152 || (i = exact_log2 (INTVAL (x))) < 0)
19153 output_operand_lossage ("invalid %%p value");
19154 else
19155 fprintf (file, "%d", i);
19156 return;
19158 case 'P':
19159 /* The operand must be an indirect memory reference. The result
19160 is the register name. */
19161 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
19162 || REGNO (XEXP (x, 0)) >= 32)
19163 output_operand_lossage ("invalid %%P value");
19164 else
19165 fputs (reg_names[REGNO (XEXP (x, 0))], file);
19166 return;
19168 case 'q':
19169 /* This outputs the logical code corresponding to a boolean
19170 expression. The expression may have one or both operands
19171 negated (if one, only the first one). For condition register
19172 logical operations, it will also treat the negated
19173 CR codes as NOTs, but not handle NOTs of them. */
19175 const char *const *t = 0;
19176 const char *s;
19177 enum rtx_code code = GET_CODE (x);
19178 static const char * const tbl[3][3] = {
19179 { "and", "andc", "nor" },
19180 { "or", "orc", "nand" },
19181 { "xor", "eqv", "xor" } };
19183 if (code == AND)
19184 t = tbl[0];
19185 else if (code == IOR)
19186 t = tbl[1];
19187 else if (code == XOR)
19188 t = tbl[2];
19189 else
19190 output_operand_lossage ("invalid %%q value");
19192 if (GET_CODE (XEXP (x, 0)) != NOT)
19193 s = t[0];
19194 else
19196 if (GET_CODE (XEXP (x, 1)) == NOT)
19197 s = t[2];
19198 else
19199 s = t[1];
19202 fputs (s, file);
19204 return;
19206 case 'Q':
19207 if (! TARGET_MFCRF)
19208 return;
19209 fputc (',', file);
19210 /* FALLTHRU */
19212 case 'R':
19213 /* X is a CR register. Print the mask for `mtcrf'. */
19214 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
19215 output_operand_lossage ("invalid %%R value");
19216 else
19217 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
19218 return;
19220 case 't':
19221 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
19222 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
19224 /* Bit 3 is OV bit. */
19225 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
19227 /* If we want bit 31, write a shift count of zero, not 32. */
19228 fprintf (file, "%d", i == 31 ? 0 : i + 1);
19229 return;
19231 case 'T':
19232 /* Print the symbolic name of a branch target register. */
19233 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
19234 && REGNO (x) != CTR_REGNO))
19235 output_operand_lossage ("invalid %%T value");
19236 else if (REGNO (x) == LR_REGNO)
19237 fputs ("lr", file);
19238 else
19239 fputs ("ctr", file);
19240 return;
19242 case 'u':
19243 /* High-order or low-order 16 bits of constant, whichever is non-zero,
19244 for use in unsigned operand. */
19245 if (! INT_P (x))
19247 output_operand_lossage ("invalid %%u value");
19248 return;
19251 uval = INTVAL (x);
19252 if ((uval & 0xffff) == 0)
19253 uval >>= 16;
19255 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
19256 return;
19258 case 'v':
19259 /* High-order 16 bits of constant for use in signed operand. */
19260 if (! INT_P (x))
19261 output_operand_lossage ("invalid %%v value");
19262 else
19263 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
19264 (INTVAL (x) >> 16) & 0xffff);
19265 return;
19267 case 'U':
19268 /* Print `u' if this has an auto-increment or auto-decrement. */
19269 if (MEM_P (x)
19270 && (GET_CODE (XEXP (x, 0)) == PRE_INC
19271 || GET_CODE (XEXP (x, 0)) == PRE_DEC
19272 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
19273 putc ('u', file);
19274 return;
19276 case 'V':
19277 /* Print the trap code for this operand. */
19278 switch (GET_CODE (x))
19280 case EQ:
19281 fputs ("eq", file); /* 4 */
19282 break;
19283 case NE:
19284 fputs ("ne", file); /* 24 */
19285 break;
19286 case LT:
19287 fputs ("lt", file); /* 16 */
19288 break;
19289 case LE:
19290 fputs ("le", file); /* 20 */
19291 break;
19292 case GT:
19293 fputs ("gt", file); /* 8 */
19294 break;
19295 case GE:
19296 fputs ("ge", file); /* 12 */
19297 break;
19298 case LTU:
19299 fputs ("llt", file); /* 2 */
19300 break;
19301 case LEU:
19302 fputs ("lle", file); /* 6 */
19303 break;
19304 case GTU:
19305 fputs ("lgt", file); /* 1 */
19306 break;
19307 case GEU:
19308 fputs ("lge", file); /* 5 */
19309 break;
19310 default:
19311 gcc_unreachable ();
19313 break;
19315 case 'w':
19316 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
19317 normally. */
19318 if (INT_P (x))
19319 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
19320 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
19321 else
19322 print_operand (file, x, 0);
19323 return;
19325 case 'x':
19326 /* X is a FPR or Altivec register used in a VSX context. */
19327 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
19328 output_operand_lossage ("invalid %%x value");
19329 else
19331 int reg = REGNO (x);
19332 int vsx_reg = (FP_REGNO_P (reg)
19333 ? reg - 32
19334 : reg - FIRST_ALTIVEC_REGNO + 32);
19336 #ifdef TARGET_REGNAMES
19337 if (TARGET_REGNAMES)
19338 fprintf (file, "%%vs%d", vsx_reg);
19339 else
19340 #endif
19341 fprintf (file, "%d", vsx_reg);
19343 return;
19345 case 'X':
19346 if (MEM_P (x)
19347 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
19348 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
19349 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
19350 putc ('x', file);
19351 return;
19353 case 'Y':
19354 /* Like 'L', for third word of TImode/PTImode */
19355 if (REG_P (x))
19356 fputs (reg_names[REGNO (x) + 2], file);
19357 else if (MEM_P (x))
19359 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19360 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19361 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
19362 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19363 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
19364 else
19365 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
19366 if (small_data_operand (x, GET_MODE (x)))
19367 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19368 reg_names[SMALL_DATA_REG]);
19370 return;
19372 case 'z':
19373 /* X is a SYMBOL_REF. Write out the name preceded by a
19374 period and without any trailing data in brackets. Used for function
19375 names. If we are configured for System V (or the embedded ABI) on
19376 the PowerPC, do not emit the period, since those systems do not use
19377 TOCs and the like. */
19378 gcc_assert (GET_CODE (x) == SYMBOL_REF);
19380 /* For macho, check to see if we need a stub. */
19381 if (TARGET_MACHO)
19383 const char *name = XSTR (x, 0);
19384 #if TARGET_MACHO
19385 if (darwin_emit_branch_islands
19386 && MACHOPIC_INDIRECT
19387 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
19388 name = machopic_indirection_name (x, /*stub_p=*/true);
19389 #endif
19390 assemble_name (file, name);
19392 else if (!DOT_SYMBOLS)
19393 assemble_name (file, XSTR (x, 0));
19394 else
19395 rs6000_output_function_entry (file, XSTR (x, 0));
19396 return;
19398 case 'Z':
19399 /* Like 'L', for last word of TImode/PTImode. */
19400 if (REG_P (x))
19401 fputs (reg_names[REGNO (x) + 3], file);
19402 else if (MEM_P (x))
19404 if (GET_CODE (XEXP (x, 0)) == PRE_INC
19405 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
19406 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
19407 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19408 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
19409 else
19410 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
19411 if (small_data_operand (x, GET_MODE (x)))
19412 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19413 reg_names[SMALL_DATA_REG]);
19415 return;
19417 /* Print AltiVec or SPE memory operand. */
19418 case 'y':
19420 rtx tmp;
19422 gcc_assert (MEM_P (x));
19424 tmp = XEXP (x, 0);
19426 /* Ugly hack because %y is overloaded. */
19427 if ((TARGET_SPE || TARGET_E500_DOUBLE)
19428 && (GET_MODE_SIZE (GET_MODE (x)) == 8
19429 || FLOAT128_2REG_P (GET_MODE (x))
19430 || GET_MODE (x) == TImode
19431 || GET_MODE (x) == PTImode))
19433 /* Handle [reg]. */
19434 if (REG_P (tmp))
19436 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
19437 break;
19439 /* Handle [reg+UIMM]. */
19440 else if (GET_CODE (tmp) == PLUS &&
19441 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
19443 int x;
19445 gcc_assert (REG_P (XEXP (tmp, 0)));
19447 x = INTVAL (XEXP (tmp, 1));
19448 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
19449 break;
19452 /* Fall through. Must be [reg+reg]. */
19454 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
19455 && GET_CODE (tmp) == AND
19456 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
19457 && INTVAL (XEXP (tmp, 1)) == -16)
19458 tmp = XEXP (tmp, 0);
19459 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
19460 && GET_CODE (tmp) == PRE_MODIFY)
19461 tmp = XEXP (tmp, 1);
19462 if (REG_P (tmp))
19463 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
19464 else
19466 if (GET_CODE (tmp) != PLUS
19467 || !REG_P (XEXP (tmp, 0))
19468 || !REG_P (XEXP (tmp, 1)))
19470 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
19471 break;
19474 if (REGNO (XEXP (tmp, 0)) == 0)
19475 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
19476 reg_names[ REGNO (XEXP (tmp, 0)) ]);
19477 else
19478 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
19479 reg_names[ REGNO (XEXP (tmp, 1)) ]);
19481 break;
19484 case 0:
19485 if (REG_P (x))
19486 fprintf (file, "%s", reg_names[REGNO (x)]);
19487 else if (MEM_P (x))
19489 /* We need to handle PRE_INC and PRE_DEC here, since we need to
19490 know the width from the mode. */
19491 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
19492 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
19493 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
19494 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
19495 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
19496 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
19497 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
19498 output_address (XEXP (XEXP (x, 0), 1));
19499 else
19500 output_address (XEXP (x, 0));
19502 else
19504 if (toc_relative_expr_p (x, false))
19505 /* This hack along with a corresponding hack in
19506 rs6000_output_addr_const_extra arranges to output addends
19507 where the assembler expects to find them. eg.
19508 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
19509 without this hack would be output as "x@toc+4". We
19510 want "x+4@toc". */
19511 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19512 else
19513 output_addr_const (file, x);
19515 return;
19517 case '&':
19518 if (const char *name = get_some_local_dynamic_name ())
19519 assemble_name (file, name);
19520 else
19521 output_operand_lossage ("'%%&' used without any "
19522 "local dynamic TLS references");
19523 return;
19525 default:
19526 output_operand_lossage ("invalid %%xn code");
19530 /* Print the address of an operand. */
19532 void
19533 print_operand_address (FILE *file, rtx x)
19535 if (REG_P (x))
19536 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
19537 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
19538 || GET_CODE (x) == LABEL_REF)
19540 output_addr_const (file, x);
19541 if (small_data_operand (x, GET_MODE (x)))
19542 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
19543 reg_names[SMALL_DATA_REG]);
19544 else
19545 gcc_assert (!TARGET_TOC);
19547 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19548 && REG_P (XEXP (x, 1)))
19550 if (REGNO (XEXP (x, 0)) == 0)
19551 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
19552 reg_names[ REGNO (XEXP (x, 0)) ]);
19553 else
19554 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
19555 reg_names[ REGNO (XEXP (x, 1)) ]);
19557 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
19558 && GET_CODE (XEXP (x, 1)) == CONST_INT)
19559 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
19560 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
19561 #if TARGET_MACHO
19562 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19563 && CONSTANT_P (XEXP (x, 1)))
19565 fprintf (file, "lo16(");
19566 output_addr_const (file, XEXP (x, 1));
19567 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19569 #endif
19570 #if TARGET_ELF
19571 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
19572 && CONSTANT_P (XEXP (x, 1)))
19574 output_addr_const (file, XEXP (x, 1));
19575 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
19577 #endif
19578 else if (toc_relative_expr_p (x, false))
19580 /* This hack along with a corresponding hack in
19581 rs6000_output_addr_const_extra arranges to output addends
19582 where the assembler expects to find them. eg.
19583 (lo_sum (reg 9)
19584 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
19585 without this hack would be output as "x@toc+8@l(9)". We
19586 want "x+8@toc@l(9)". */
19587 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
19588 if (GET_CODE (x) == LO_SUM)
19589 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
19590 else
19591 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
19593 else
19594 gcc_unreachable ();
19597 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
19599 static bool
19600 rs6000_output_addr_const_extra (FILE *file, rtx x)
19602 if (GET_CODE (x) == UNSPEC)
19603 switch (XINT (x, 1))
19605 case UNSPEC_TOCREL:
19606 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
19607 && REG_P (XVECEXP (x, 0, 1))
19608 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
19609 output_addr_const (file, XVECEXP (x, 0, 0));
19610 if (x == tocrel_base && tocrel_offset != const0_rtx)
19612 if (INTVAL (tocrel_offset) >= 0)
19613 fprintf (file, "+");
19614 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
19616 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
19618 putc ('-', file);
19619 assemble_name (file, toc_label_name);
19621 else if (TARGET_ELF)
19622 fputs ("@toc", file);
19623 return true;
19625 #if TARGET_MACHO
19626 case UNSPEC_MACHOPIC_OFFSET:
19627 output_addr_const (file, XVECEXP (x, 0, 0));
19628 putc ('-', file);
19629 machopic_output_function_base_name (file);
19630 return true;
19631 #endif
19633 return false;
19636 /* Target hook for assembling integer objects. The PowerPC version has
19637 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19638 is defined. It also needs to handle DI-mode objects on 64-bit
19639 targets. */
19641 static bool
19642 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19644 #ifdef RELOCATABLE_NEEDS_FIXUP
19645 /* Special handling for SI values. */
19646 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19648 static int recurse = 0;
19650 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19651 the .fixup section. Since the TOC section is already relocated, we
19652 don't need to mark it here. We used to skip the text section, but it
19653 should never be valid for relocated addresses to be placed in the text
19654 section. */
19655 if (TARGET_RELOCATABLE
19656 && in_section != toc_section
19657 && !recurse
19658 && !CONST_SCALAR_INT_P (x)
19659 && CONSTANT_P (x))
19661 char buf[256];
19663 recurse = 1;
19664 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19665 fixuplabelno++;
19666 ASM_OUTPUT_LABEL (asm_out_file, buf);
19667 fprintf (asm_out_file, "\t.long\t(");
19668 output_addr_const (asm_out_file, x);
19669 fprintf (asm_out_file, ")@fixup\n");
19670 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19671 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19672 fprintf (asm_out_file, "\t.long\t");
19673 assemble_name (asm_out_file, buf);
19674 fprintf (asm_out_file, "\n\t.previous\n");
19675 recurse = 0;
19676 return true;
19678 /* Remove initial .'s to turn a -mcall-aixdesc function
19679 address into the address of the descriptor, not the function
19680 itself. */
19681 else if (GET_CODE (x) == SYMBOL_REF
19682 && XSTR (x, 0)[0] == '.'
19683 && DEFAULT_ABI == ABI_AIX)
19685 const char *name = XSTR (x, 0);
19686 while (*name == '.')
19687 name++;
19689 fprintf (asm_out_file, "\t.long\t%s\n", name);
19690 return true;
19693 #endif /* RELOCATABLE_NEEDS_FIXUP */
19694 return default_assemble_integer (x, size, aligned_p);
19697 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19698 /* Emit an assembler directive to set symbol visibility for DECL to
19699 VISIBILITY_TYPE. */
19701 static void
19702 rs6000_assemble_visibility (tree decl, int vis)
19704 if (TARGET_XCOFF)
19705 return;
19707 /* Functions need to have their entry point symbol visibility set as
19708 well as their descriptor symbol visibility. */
19709 if (DEFAULT_ABI == ABI_AIX
19710 && DOT_SYMBOLS
19711 && TREE_CODE (decl) == FUNCTION_DECL)
19713 static const char * const visibility_types[] = {
19714 NULL, "internal", "hidden", "protected"
19717 const char *name, *type;
19719 name = ((* targetm.strip_name_encoding)
19720 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19721 type = visibility_types[vis];
19723 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19724 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19726 else
19727 default_assemble_visibility (decl, vis);
19729 #endif
19731 enum rtx_code
19732 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19734 /* Reversal of FP compares takes care -- an ordered compare
19735 becomes an unordered compare and vice versa. */
19736 if (mode == CCFPmode
19737 && (!flag_finite_math_only
19738 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19739 || code == UNEQ || code == LTGT))
19740 return reverse_condition_maybe_unordered (code);
19741 else
19742 return reverse_condition (code);
19745 /* Generate a compare for CODE. Return a brand-new rtx that
19746 represents the result of the compare. */
19748 static rtx
19749 rs6000_generate_compare (rtx cmp, machine_mode mode)
19751 machine_mode comp_mode;
19752 rtx compare_result;
19753 enum rtx_code code = GET_CODE (cmp);
19754 rtx op0 = XEXP (cmp, 0);
19755 rtx op1 = XEXP (cmp, 1);
19757 if (FLOAT_MODE_P (mode))
19758 comp_mode = CCFPmode;
19759 else if (code == GTU || code == LTU
19760 || code == GEU || code == LEU)
19761 comp_mode = CCUNSmode;
19762 else if ((code == EQ || code == NE)
19763 && unsigned_reg_p (op0)
19764 && (unsigned_reg_p (op1)
19765 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19766 /* These are unsigned values, perhaps there will be a later
19767 ordering compare that can be shared with this one. */
19768 comp_mode = CCUNSmode;
19769 else
19770 comp_mode = CCmode;
19772 /* If we have an unsigned compare, make sure we don't have a signed value as
19773 an immediate. */
19774 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19775 && INTVAL (op1) < 0)
19777 op0 = copy_rtx_if_shared (op0);
19778 op1 = force_reg (GET_MODE (op0), op1);
19779 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19782 /* First, the compare. */
19783 compare_result = gen_reg_rtx (comp_mode);
19785 /* E500 FP compare instructions on the GPRs. Yuck! */
19786 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19787 && FLOAT_MODE_P (mode))
19789 rtx cmp, or_result, compare_result2;
19790 machine_mode op_mode = GET_MODE (op0);
19791 bool reverse_p;
19793 if (op_mode == VOIDmode)
19794 op_mode = GET_MODE (op1);
19796 /* First reverse the condition codes that aren't directly supported. */
19797 switch (code)
19799 case NE:
19800 case UNLT:
19801 case UNLE:
19802 case UNGT:
19803 case UNGE:
19804 code = reverse_condition_maybe_unordered (code);
19805 reverse_p = true;
19806 break;
19808 case EQ:
19809 case LT:
19810 case LE:
19811 case GT:
19812 case GE:
19813 reverse_p = false;
19814 break;
19816 default:
19817 gcc_unreachable ();
19820 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19821 This explains the following mess. */
19823 switch (code)
19825 case EQ:
19826 switch (op_mode)
19828 case SFmode:
19829 cmp = (flag_finite_math_only && !flag_trapping_math)
19830 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19831 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19832 break;
19834 case DFmode:
19835 cmp = (flag_finite_math_only && !flag_trapping_math)
19836 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19837 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19838 break;
19840 case TFmode:
19841 case IFmode:
19842 case KFmode:
19843 cmp = (flag_finite_math_only && !flag_trapping_math)
19844 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19845 : gen_cmptfeq_gpr (compare_result, op0, op1);
19846 break;
19848 default:
19849 gcc_unreachable ();
19851 break;
19853 case GT:
19854 case GE:
19855 switch (op_mode)
19857 case SFmode:
19858 cmp = (flag_finite_math_only && !flag_trapping_math)
19859 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19860 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19861 break;
19863 case DFmode:
19864 cmp = (flag_finite_math_only && !flag_trapping_math)
19865 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19866 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19867 break;
19869 case TFmode:
19870 case IFmode:
19871 case KFmode:
19872 cmp = (flag_finite_math_only && !flag_trapping_math)
19873 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19874 : gen_cmptfgt_gpr (compare_result, op0, op1);
19875 break;
19877 default:
19878 gcc_unreachable ();
19880 break;
19882 case LT:
19883 case LE:
19884 switch (op_mode)
19886 case SFmode:
19887 cmp = (flag_finite_math_only && !flag_trapping_math)
19888 ? gen_tstsflt_gpr (compare_result, op0, op1)
19889 : gen_cmpsflt_gpr (compare_result, op0, op1);
19890 break;
19892 case DFmode:
19893 cmp = (flag_finite_math_only && !flag_trapping_math)
19894 ? gen_tstdflt_gpr (compare_result, op0, op1)
19895 : gen_cmpdflt_gpr (compare_result, op0, op1);
19896 break;
19898 case TFmode:
19899 case IFmode:
19900 case KFmode:
19901 cmp = (flag_finite_math_only && !flag_trapping_math)
19902 ? gen_tsttflt_gpr (compare_result, op0, op1)
19903 : gen_cmptflt_gpr (compare_result, op0, op1);
19904 break;
19906 default:
19907 gcc_unreachable ();
19909 break;
19911 default:
19912 gcc_unreachable ();
19915 /* Synthesize LE and GE from LT/GT || EQ. */
19916 if (code == LE || code == GE)
19918 emit_insn (cmp);
19920 compare_result2 = gen_reg_rtx (CCFPmode);
19922 /* Do the EQ. */
19923 switch (op_mode)
19925 case SFmode:
19926 cmp = (flag_finite_math_only && !flag_trapping_math)
19927 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19928 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19929 break;
19931 case DFmode:
19932 cmp = (flag_finite_math_only && !flag_trapping_math)
19933 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19934 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19935 break;
19937 case TFmode:
19938 case IFmode:
19939 case KFmode:
19940 cmp = (flag_finite_math_only && !flag_trapping_math)
19941 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19942 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19943 break;
19945 default:
19946 gcc_unreachable ();
19949 emit_insn (cmp);
19951 /* OR them together. */
19952 or_result = gen_reg_rtx (CCFPmode);
19953 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19954 compare_result2);
19955 compare_result = or_result;
19958 code = reverse_p ? NE : EQ;
19960 emit_insn (cmp);
19963 /* IEEE 128-bit support in VSX registers. The comparison function (__cmpkf2)
19964 returns 0..15 that is laid out the same way as the PowerPC CR register
19965 would for a normal floating point comparison. */
19966 else if (FLOAT128_IEEE_P (mode))
19968 rtx and_reg = gen_reg_rtx (SImode);
19969 rtx dest = gen_reg_rtx (SImode);
19970 rtx libfunc = optab_libfunc (cmp_optab, mode);
19971 HOST_WIDE_INT mask_value = 0;
19973 /* Values that __cmpkf2 returns. */
19974 #define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */
19975 #define PPC_CMP_EQUAL 0x2 /* a == b. */
19976 #define PPC_CMP_GREATER_THEN 0x4 /* a > b. */
19977 #define PPC_CMP_LESS_THEN 0x8 /* a < b. */
19979 switch (code)
19981 case EQ:
19982 mask_value = PPC_CMP_EQUAL;
19983 code = NE;
19984 break;
19986 case NE:
19987 mask_value = PPC_CMP_EQUAL;
19988 code = EQ;
19989 break;
19991 case GT:
19992 mask_value = PPC_CMP_GREATER_THEN;
19993 code = NE;
19994 break;
19996 case GE:
19997 mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
19998 code = NE;
19999 break;
20001 case LT:
20002 mask_value = PPC_CMP_LESS_THEN;
20003 code = NE;
20004 break;
20006 case LE:
20007 mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
20008 code = NE;
20009 break;
20011 case UNLE:
20012 mask_value = PPC_CMP_GREATER_THEN;
20013 code = EQ;
20014 break;
20016 case UNLT:
20017 mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
20018 code = EQ;
20019 break;
20021 case UNGE:
20022 mask_value = PPC_CMP_LESS_THEN;
20023 code = EQ;
20024 break;
20026 case UNGT:
20027 mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
20028 code = EQ;
20029 break;
20031 case UNEQ:
20032 mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
20033 code = NE;
20035 case LTGT:
20036 mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
20037 code = EQ;
20038 break;
20040 case UNORDERED:
20041 mask_value = PPC_CMP_UNORDERED;
20042 code = NE;
20043 break;
20045 case ORDERED:
20046 mask_value = PPC_CMP_UNORDERED;
20047 code = EQ;
20048 break;
20050 default:
20051 gcc_unreachable ();
20054 gcc_assert (mask_value != 0);
20055 and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2,
20056 op0, mode, op1, mode);
20058 emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value)));
20059 compare_result = gen_reg_rtx (CCmode);
20060 comp_mode = CCmode;
20062 emit_insn (gen_rtx_SET (compare_result,
20063 gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
20066 else
20068 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
20069 CLOBBERs to match cmptf_internal2 pattern. */
20070 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
20071 && FLOAT128_IBM_P (GET_MODE (op0))
20072 && TARGET_HARD_FLOAT && TARGET_FPRS)
20073 emit_insn (gen_rtx_PARALLEL (VOIDmode,
20074 gen_rtvec (10,
20075 gen_rtx_SET (compare_result,
20076 gen_rtx_COMPARE (comp_mode, op0, op1)),
20077 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20078 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20079 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20080 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20081 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20082 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20083 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20084 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
20085 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
20086 else if (GET_CODE (op1) == UNSPEC
20087 && XINT (op1, 1) == UNSPEC_SP_TEST)
20089 rtx op1b = XVECEXP (op1, 0, 0);
20090 comp_mode = CCEQmode;
20091 compare_result = gen_reg_rtx (CCEQmode);
20092 if (TARGET_64BIT)
20093 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
20094 else
20095 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
20097 else
20098 emit_insn (gen_rtx_SET (compare_result,
20099 gen_rtx_COMPARE (comp_mode, op0, op1)));
20102 /* Some kinds of FP comparisons need an OR operation;
20103 under flag_finite_math_only we don't bother. */
20104 if (FLOAT_MODE_P (mode)
20105 && !FLOAT128_IEEE_P (mode)
20106 && !flag_finite_math_only
20107 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
20108 && (code == LE || code == GE
20109 || code == UNEQ || code == LTGT
20110 || code == UNGT || code == UNLT))
20112 enum rtx_code or1, or2;
20113 rtx or1_rtx, or2_rtx, compare2_rtx;
20114 rtx or_result = gen_reg_rtx (CCEQmode);
20116 switch (code)
20118 case LE: or1 = LT; or2 = EQ; break;
20119 case GE: or1 = GT; or2 = EQ; break;
20120 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
20121 case LTGT: or1 = LT; or2 = GT; break;
20122 case UNGT: or1 = UNORDERED; or2 = GT; break;
20123 case UNLT: or1 = UNORDERED; or2 = LT; break;
20124 default: gcc_unreachable ();
20126 validate_condition_mode (or1, comp_mode);
20127 validate_condition_mode (or2, comp_mode);
20128 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
20129 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
20130 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
20131 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
20132 const_true_rtx);
20133 emit_insn (gen_rtx_SET (or_result, compare2_rtx));
20135 compare_result = or_result;
20136 code = EQ;
20139 validate_condition_mode (code, GET_MODE (compare_result));
20141 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
20145 /* Expand floating point conversion to/from __float128 and __ibm128. */
20147 void
20148 rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
20150 machine_mode dest_mode = GET_MODE (dest);
20151 machine_mode src_mode = GET_MODE (src);
20152 convert_optab cvt = unknown_optab;
20153 rtx libfunc = NULL_RTX;
20154 rtx dest2;
20156 if (dest_mode == src_mode)
20157 gcc_unreachable ();
20159 if (FLOAT128_IEEE_P (dest_mode))
20161 if (src_mode == SFmode
20162 || src_mode == DFmode
20163 || FLOAT128_IBM_P (src_mode))
20164 cvt = sext_optab;
20166 else if (GET_MODE_CLASS (src_mode) == MODE_INT)
20167 cvt = (unsigned_p) ? ufloat_optab : sfloat_optab;
20169 else if (FLOAT128_IEEE_P (src_mode))
20170 emit_move_insn (dest, gen_lowpart (dest_mode, src));
20172 else
20173 gcc_unreachable ();
20176 else if (FLOAT128_IEEE_P (src_mode))
20178 if (dest_mode == SFmode
20179 || dest_mode == DFmode
20180 || FLOAT128_IBM_P (dest_mode))
20181 cvt = trunc_optab;
20183 else if (GET_MODE_CLASS (dest_mode) == MODE_INT)
20184 cvt = (unsigned_p) ? ufix_optab : sfix_optab;
20186 else
20187 gcc_unreachable ();
20190 else
20191 gcc_unreachable ();
20193 gcc_assert (cvt != unknown_optab);
20194 libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
20195 gcc_assert (libfunc != NULL_RTX);
20197 dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
20198 src_mode);
20200 gcc_assert (dest != NULL_RTX);
20201 if (!rtx_equal_p (dest, dest2))
20202 emit_move_insn (dest, dest2);
20204 return;
20207 /* Emit the RTL for an sISEL pattern. */
20209 void
20210 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
20212 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
20215 /* Emit RTL that sets a register to zero if OP1 and OP2 are equal. SCRATCH
20216 can be used as that dest register. Return the dest register. */
20219 rs6000_emit_eqne (machine_mode mode, rtx op1, rtx op2, rtx scratch)
20221 if (op2 == const0_rtx)
20222 return op1;
20224 if (GET_CODE (scratch) == SCRATCH)
20225 scratch = gen_reg_rtx (mode);
20227 if (logical_operand (op2, mode))
20228 emit_insn (gen_rtx_SET (scratch, gen_rtx_XOR (mode, op1, op2)));
20229 else
20230 emit_insn (gen_rtx_SET (scratch,
20231 gen_rtx_PLUS (mode, op1, negate_rtx (mode, op2))));
20233 return scratch;
20236 void
20237 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
20239 rtx condition_rtx;
20240 machine_mode op_mode;
20241 enum rtx_code cond_code;
20242 rtx result = operands[0];
20244 condition_rtx = rs6000_generate_compare (operands[1], mode);
20245 cond_code = GET_CODE (condition_rtx);
20247 if (FLOAT_MODE_P (mode)
20248 && !TARGET_FPRS && TARGET_HARD_FLOAT)
20250 rtx t;
20252 PUT_MODE (condition_rtx, SImode);
20253 t = XEXP (condition_rtx, 0);
20255 gcc_assert (cond_code == NE || cond_code == EQ);
20257 if (cond_code == NE)
20258 emit_insn (gen_e500_flip_gt_bit (t, t));
20260 emit_insn (gen_move_from_CR_gt_bit (result, t));
20261 return;
20264 if (cond_code == NE
20265 || cond_code == GE || cond_code == LE
20266 || cond_code == GEU || cond_code == LEU
20267 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
20269 rtx not_result = gen_reg_rtx (CCEQmode);
20270 rtx not_op, rev_cond_rtx;
20271 machine_mode cc_mode;
20273 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
20275 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
20276 SImode, XEXP (condition_rtx, 0), const0_rtx);
20277 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
20278 emit_insn (gen_rtx_SET (not_result, not_op));
20279 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
20282 op_mode = GET_MODE (XEXP (operands[1], 0));
20283 if (op_mode == VOIDmode)
20284 op_mode = GET_MODE (XEXP (operands[1], 1));
20286 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
20288 PUT_MODE (condition_rtx, DImode);
20289 convert_move (result, condition_rtx, 0);
20291 else
20293 PUT_MODE (condition_rtx, SImode);
20294 emit_insn (gen_rtx_SET (result, condition_rtx));
20298 /* Emit a branch of kind CODE to location LOC. */
20300 void
20301 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
20303 rtx condition_rtx, loc_ref;
20305 condition_rtx = rs6000_generate_compare (operands[0], mode);
20306 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
20307 emit_jump_insn (gen_rtx_SET (pc_rtx,
20308 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
20309 loc_ref, pc_rtx)));
20312 /* Return the string to output a conditional branch to LABEL, which is
20313 the operand template of the label, or NULL if the branch is really a
20314 conditional return.
20316 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
20317 condition code register and its mode specifies what kind of
20318 comparison we made.
20320 REVERSED is nonzero if we should reverse the sense of the comparison.
20322 INSN is the insn. */
20324 char *
20325 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
20327 static char string[64];
20328 enum rtx_code code = GET_CODE (op);
20329 rtx cc_reg = XEXP (op, 0);
20330 machine_mode mode = GET_MODE (cc_reg);
20331 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
20332 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
20333 int really_reversed = reversed ^ need_longbranch;
20334 char *s = string;
20335 const char *ccode;
20336 const char *pred;
20337 rtx note;
20339 validate_condition_mode (code, mode);
20341 /* Work out which way this really branches. We could use
20342 reverse_condition_maybe_unordered here always but this
20343 makes the resulting assembler clearer. */
20344 if (really_reversed)
20346 /* Reversal of FP compares takes care -- an ordered compare
20347 becomes an unordered compare and vice versa. */
20348 if (mode == CCFPmode)
20349 code = reverse_condition_maybe_unordered (code);
20350 else
20351 code = reverse_condition (code);
20354 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
20356 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
20357 to the GT bit. */
20358 switch (code)
20360 case EQ:
20361 /* Opposite of GT. */
20362 code = GT;
20363 break;
20365 case NE:
20366 code = UNLE;
20367 break;
20369 default:
20370 gcc_unreachable ();
20374 switch (code)
20376 /* Not all of these are actually distinct opcodes, but
20377 we distinguish them for clarity of the resulting assembler. */
20378 case NE: case LTGT:
20379 ccode = "ne"; break;
20380 case EQ: case UNEQ:
20381 ccode = "eq"; break;
20382 case GE: case GEU:
20383 ccode = "ge"; break;
20384 case GT: case GTU: case UNGT:
20385 ccode = "gt"; break;
20386 case LE: case LEU:
20387 ccode = "le"; break;
20388 case LT: case LTU: case UNLT:
20389 ccode = "lt"; break;
20390 case UNORDERED: ccode = "un"; break;
20391 case ORDERED: ccode = "nu"; break;
20392 case UNGE: ccode = "nl"; break;
20393 case UNLE: ccode = "ng"; break;
20394 default:
20395 gcc_unreachable ();
20398 /* Maybe we have a guess as to how likely the branch is. */
20399 pred = "";
20400 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
20401 if (note != NULL_RTX)
20403 /* PROB is the difference from 50%. */
20404 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
20406 /* Only hint for highly probable/improbable branches on newer
20407 cpus as static prediction overrides processor dynamic
20408 prediction. For older cpus we may as well always hint, but
20409 assume not taken for branches that are very close to 50% as a
20410 mispredicted taken branch is more expensive than a
20411 mispredicted not-taken branch. */
20412 if (rs6000_always_hint
20413 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
20414 && br_prob_note_reliable_p (note)))
20416 if (abs (prob) > REG_BR_PROB_BASE / 20
20417 && ((prob > 0) ^ need_longbranch))
20418 pred = "+";
20419 else
20420 pred = "-";
20424 if (label == NULL)
20425 s += sprintf (s, "b%slr%s ", ccode, pred);
20426 else
20427 s += sprintf (s, "b%s%s ", ccode, pred);
20429 /* We need to escape any '%' characters in the reg_names string.
20430 Assume they'd only be the first character.... */
20431 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
20432 *s++ = '%';
20433 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
20435 if (label != NULL)
20437 /* If the branch distance was too far, we may have to use an
20438 unconditional branch to go the distance. */
20439 if (need_longbranch)
20440 s += sprintf (s, ",$+8\n\tb %s", label);
20441 else
20442 s += sprintf (s, ",%s", label);
20445 return string;
20448 /* Return the string to flip the GT bit on a CR. */
20449 char *
20450 output_e500_flip_gt_bit (rtx dst, rtx src)
20452 static char string[64];
20453 int a, b;
20455 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
20456 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
20458 /* GT bit. */
20459 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
20460 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
20462 sprintf (string, "crnot %d,%d", a, b);
20463 return string;
20466 /* Return insn for VSX or Altivec comparisons. */
20468 static rtx
20469 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
20471 rtx mask;
20472 machine_mode mode = GET_MODE (op0);
20474 switch (code)
20476 default:
20477 break;
20479 case GE:
20480 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
20481 return NULL_RTX;
20483 case EQ:
20484 case GT:
20485 case GTU:
20486 case ORDERED:
20487 case UNORDERED:
20488 case UNEQ:
20489 case LTGT:
20490 mask = gen_reg_rtx (mode);
20491 emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
20492 return mask;
20495 return NULL_RTX;
20498 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
20499 DMODE is expected destination mode. This is a recursive function. */
20501 static rtx
20502 rs6000_emit_vector_compare (enum rtx_code rcode,
20503 rtx op0, rtx op1,
20504 machine_mode dmode)
20506 rtx mask;
20507 bool swap_operands = false;
20508 bool try_again = false;
20510 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
20511 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
20513 /* See if the comparison works as is. */
20514 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
20515 if (mask)
20516 return mask;
20518 switch (rcode)
20520 case LT:
20521 rcode = GT;
20522 swap_operands = true;
20523 try_again = true;
20524 break;
20525 case LTU:
20526 rcode = GTU;
20527 swap_operands = true;
20528 try_again = true;
20529 break;
20530 case NE:
20531 case UNLE:
20532 case UNLT:
20533 case UNGE:
20534 case UNGT:
20535 /* Invert condition and try again.
20536 e.g., A != B becomes ~(A==B). */
20538 enum rtx_code rev_code;
20539 enum insn_code nor_code;
20540 rtx mask2;
20542 rev_code = reverse_condition_maybe_unordered (rcode);
20543 if (rev_code == UNKNOWN)
20544 return NULL_RTX;
20546 nor_code = optab_handler (one_cmpl_optab, dmode);
20547 if (nor_code == CODE_FOR_nothing)
20548 return NULL_RTX;
20550 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
20551 if (!mask2)
20552 return NULL_RTX;
20554 mask = gen_reg_rtx (dmode);
20555 emit_insn (GEN_FCN (nor_code) (mask, mask2));
20556 return mask;
20558 break;
20559 case GE:
20560 case GEU:
20561 case LE:
20562 case LEU:
20563 /* Try GT/GTU/LT/LTU OR EQ */
20565 rtx c_rtx, eq_rtx;
20566 enum insn_code ior_code;
20567 enum rtx_code new_code;
20569 switch (rcode)
20571 case GE:
20572 new_code = GT;
20573 break;
20575 case GEU:
20576 new_code = GTU;
20577 break;
20579 case LE:
20580 new_code = LT;
20581 break;
20583 case LEU:
20584 new_code = LTU;
20585 break;
20587 default:
20588 gcc_unreachable ();
20591 ior_code = optab_handler (ior_optab, dmode);
20592 if (ior_code == CODE_FOR_nothing)
20593 return NULL_RTX;
20595 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
20596 if (!c_rtx)
20597 return NULL_RTX;
20599 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
20600 if (!eq_rtx)
20601 return NULL_RTX;
20603 mask = gen_reg_rtx (dmode);
20604 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
20605 return mask;
20607 break;
20608 default:
20609 return NULL_RTX;
20612 if (try_again)
20614 if (swap_operands)
20615 std::swap (op0, op1);
20617 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
20618 if (mask)
20619 return mask;
20622 /* You only get two chances. */
20623 return NULL_RTX;
20626 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
20627 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
20628 operands for the relation operation COND. */
20631 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
20632 rtx cond, rtx cc_op0, rtx cc_op1)
20634 machine_mode dest_mode = GET_MODE (dest);
20635 machine_mode mask_mode = GET_MODE (cc_op0);
20636 enum rtx_code rcode = GET_CODE (cond);
20637 machine_mode cc_mode = CCmode;
20638 rtx mask;
20639 rtx cond2;
20640 rtx tmp;
20641 bool invert_move = false;
20643 if (VECTOR_UNIT_NONE_P (dest_mode))
20644 return 0;
20646 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
20647 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
20649 switch (rcode)
20651 /* Swap operands if we can, and fall back to doing the operation as
20652 specified, and doing a NOR to invert the test. */
20653 case NE:
20654 case UNLE:
20655 case UNLT:
20656 case UNGE:
20657 case UNGT:
20658 /* Invert condition and try again.
20659 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
20660 invert_move = true;
20661 rcode = reverse_condition_maybe_unordered (rcode);
20662 if (rcode == UNKNOWN)
20663 return 0;
20664 break;
20666 /* Mark unsigned tests with CCUNSmode. */
20667 case GTU:
20668 case GEU:
20669 case LTU:
20670 case LEU:
20671 cc_mode = CCUNSmode;
20672 break;
20674 default:
20675 break;
20678 /* Get the vector mask for the given relational operations. */
20679 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
20681 if (!mask)
20682 return 0;
20684 if (invert_move)
20686 tmp = op_true;
20687 op_true = op_false;
20688 op_false = tmp;
20691 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
20692 CONST0_RTX (dest_mode));
20693 emit_insn (gen_rtx_SET (dest,
20694 gen_rtx_IF_THEN_ELSE (dest_mode,
20695 cond2,
20696 op_true,
20697 op_false)));
20698 return 1;
20701 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
20702 operands of the last comparison is nonzero/true, FALSE_COND if it
20703 is zero/false. Return 0 if the hardware has no such operation. */
20706 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20708 enum rtx_code code = GET_CODE (op);
20709 rtx op0 = XEXP (op, 0);
20710 rtx op1 = XEXP (op, 1);
20711 REAL_VALUE_TYPE c1;
20712 machine_mode compare_mode = GET_MODE (op0);
20713 machine_mode result_mode = GET_MODE (dest);
20714 rtx temp;
20715 bool is_against_zero;
20717 /* These modes should always match. */
20718 if (GET_MODE (op1) != compare_mode
20719 /* In the isel case however, we can use a compare immediate, so
20720 op1 may be a small constant. */
20721 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
20722 return 0;
20723 if (GET_MODE (true_cond) != result_mode)
20724 return 0;
20725 if (GET_MODE (false_cond) != result_mode)
20726 return 0;
20728 /* Don't allow using floating point comparisons for integer results for
20729 now. */
20730 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
20731 return 0;
20733 /* First, work out if the hardware can do this at all, or
20734 if it's too slow.... */
20735 if (!FLOAT_MODE_P (compare_mode))
20737 if (TARGET_ISEL)
20738 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
20739 return 0;
20741 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
20742 && SCALAR_FLOAT_MODE_P (compare_mode))
20743 return 0;
20745 is_against_zero = op1 == CONST0_RTX (compare_mode);
20747 /* A floating-point subtract might overflow, underflow, or produce
20748 an inexact result, thus changing the floating-point flags, so it
20749 can't be generated if we care about that. It's safe if one side
20750 of the construct is zero, since then no subtract will be
20751 generated. */
20752 if (SCALAR_FLOAT_MODE_P (compare_mode)
20753 && flag_trapping_math && ! is_against_zero)
20754 return 0;
20756 /* Eliminate half of the comparisons by switching operands, this
20757 makes the remaining code simpler. */
20758 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
20759 || code == LTGT || code == LT || code == UNLE)
20761 code = reverse_condition_maybe_unordered (code);
20762 temp = true_cond;
20763 true_cond = false_cond;
20764 false_cond = temp;
20767 /* UNEQ and LTGT take four instructions for a comparison with zero,
20768 it'll probably be faster to use a branch here too. */
20769 if (code == UNEQ && HONOR_NANS (compare_mode))
20770 return 0;
20772 if (GET_CODE (op1) == CONST_DOUBLE)
20773 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
20775 /* We're going to try to implement comparisons by performing
20776 a subtract, then comparing against zero. Unfortunately,
20777 Inf - Inf is NaN which is not zero, and so if we don't
20778 know that the operand is finite and the comparison
20779 would treat EQ different to UNORDERED, we can't do it. */
20780 if (HONOR_INFINITIES (compare_mode)
20781 && code != GT && code != UNGE
20782 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
20783 /* Constructs of the form (a OP b ? a : b) are safe. */
20784 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
20785 || (! rtx_equal_p (op0, true_cond)
20786 && ! rtx_equal_p (op1, true_cond))))
20787 return 0;
20789 /* At this point we know we can use fsel. */
20791 /* Reduce the comparison to a comparison against zero. */
20792 if (! is_against_zero)
20794 temp = gen_reg_rtx (compare_mode);
20795 emit_insn (gen_rtx_SET (temp, gen_rtx_MINUS (compare_mode, op0, op1)));
20796 op0 = temp;
20797 op1 = CONST0_RTX (compare_mode);
20800 /* If we don't care about NaNs we can reduce some of the comparisons
20801 down to faster ones. */
20802 if (! HONOR_NANS (compare_mode))
20803 switch (code)
20805 case GT:
20806 code = LE;
20807 temp = true_cond;
20808 true_cond = false_cond;
20809 false_cond = temp;
20810 break;
20811 case UNGE:
20812 code = GE;
20813 break;
20814 case UNEQ:
20815 code = EQ;
20816 break;
20817 default:
20818 break;
20821 /* Now, reduce everything down to a GE. */
20822 switch (code)
20824 case GE:
20825 break;
20827 case LE:
20828 temp = gen_reg_rtx (compare_mode);
20829 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
20830 op0 = temp;
20831 break;
20833 case ORDERED:
20834 temp = gen_reg_rtx (compare_mode);
20835 emit_insn (gen_rtx_SET (temp, gen_rtx_ABS (compare_mode, op0)));
20836 op0 = temp;
20837 break;
20839 case EQ:
20840 temp = gen_reg_rtx (compare_mode);
20841 emit_insn (gen_rtx_SET (temp,
20842 gen_rtx_NEG (compare_mode,
20843 gen_rtx_ABS (compare_mode, op0))));
20844 op0 = temp;
20845 break;
20847 case UNGE:
20848 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20849 temp = gen_reg_rtx (result_mode);
20850 emit_insn (gen_rtx_SET (temp,
20851 gen_rtx_IF_THEN_ELSE (result_mode,
20852 gen_rtx_GE (VOIDmode,
20853 op0, op1),
20854 true_cond, false_cond)));
20855 false_cond = true_cond;
20856 true_cond = temp;
20858 temp = gen_reg_rtx (compare_mode);
20859 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
20860 op0 = temp;
20861 break;
20863 case GT:
20864 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20865 temp = gen_reg_rtx (result_mode);
20866 emit_insn (gen_rtx_SET (temp,
20867 gen_rtx_IF_THEN_ELSE (result_mode,
20868 gen_rtx_GE (VOIDmode,
20869 op0, op1),
20870 true_cond, false_cond)));
20871 true_cond = false_cond;
20872 false_cond = temp;
20874 temp = gen_reg_rtx (compare_mode);
20875 emit_insn (gen_rtx_SET (temp, gen_rtx_NEG (compare_mode, op0)));
20876 op0 = temp;
20877 break;
20879 default:
20880 gcc_unreachable ();
20883 emit_insn (gen_rtx_SET (dest,
20884 gen_rtx_IF_THEN_ELSE (result_mode,
20885 gen_rtx_GE (VOIDmode,
20886 op0, op1),
20887 true_cond, false_cond)));
20888 return 1;
20891 /* Same as above, but for ints (isel). */
20893 static int
20894 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20896 rtx condition_rtx, cr;
20897 machine_mode mode = GET_MODE (dest);
20898 enum rtx_code cond_code;
20899 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20900 bool signedp;
20902 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20903 return 0;
20905 /* We still have to do the compare, because isel doesn't do a
20906 compare, it just looks at the CRx bits set by a previous compare
20907 instruction. */
20908 condition_rtx = rs6000_generate_compare (op, mode);
20909 cond_code = GET_CODE (condition_rtx);
20910 cr = XEXP (condition_rtx, 0);
20911 signedp = GET_MODE (cr) == CCmode;
20913 isel_func = (mode == SImode
20914 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20915 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20917 switch (cond_code)
20919 case LT: case GT: case LTU: case GTU: case EQ:
20920 /* isel handles these directly. */
20921 break;
20923 default:
20924 /* We need to swap the sense of the comparison. */
20926 std::swap (false_cond, true_cond);
20927 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20929 break;
20932 false_cond = force_reg (mode, false_cond);
20933 if (true_cond != const0_rtx)
20934 true_cond = force_reg (mode, true_cond);
20936 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20938 return 1;
20941 const char *
20942 output_isel (rtx *operands)
20944 enum rtx_code code;
20946 code = GET_CODE (operands[1]);
20948 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20950 gcc_assert (GET_CODE (operands[2]) == REG
20951 && GET_CODE (operands[3]) == REG);
20952 PUT_CODE (operands[1], reverse_condition (code));
20953 return "isel %0,%3,%2,%j1";
20956 return "isel %0,%2,%3,%j1";
20959 void
20960 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20962 machine_mode mode = GET_MODE (op0);
20963 enum rtx_code c;
20964 rtx target;
20966 /* VSX/altivec have direct min/max insns. */
20967 if ((code == SMAX || code == SMIN)
20968 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20969 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20971 emit_insn (gen_rtx_SET (dest, gen_rtx_fmt_ee (code, mode, op0, op1)));
20972 return;
20975 if (code == SMAX || code == SMIN)
20976 c = GE;
20977 else
20978 c = GEU;
20980 if (code == SMAX || code == UMAX)
20981 target = emit_conditional_move (dest, c, op0, op1, mode,
20982 op0, op1, mode, 0);
20983 else
20984 target = emit_conditional_move (dest, c, op0, op1, mode,
20985 op1, op0, mode, 0);
20986 gcc_assert (target);
20987 if (target != dest)
20988 emit_move_insn (dest, target);
20991 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20992 COND is true. Mark the jump as unlikely to be taken. */
20994 static void
20995 emit_unlikely_jump (rtx cond, rtx label)
20997 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20998 rtx x;
21000 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
21001 x = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
21002 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
21005 /* A subroutine of the atomic operation splitters. Emit a load-locked
21006 instruction in MODE. For QI/HImode, possibly use a pattern than includes
21007 the zero_extend operation. */
21009 static void
21010 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
21012 rtx (*fn) (rtx, rtx) = NULL;
21014 switch (mode)
21016 case QImode:
21017 fn = gen_load_lockedqi;
21018 break;
21019 case HImode:
21020 fn = gen_load_lockedhi;
21021 break;
21022 case SImode:
21023 if (GET_MODE (mem) == QImode)
21024 fn = gen_load_lockedqi_si;
21025 else if (GET_MODE (mem) == HImode)
21026 fn = gen_load_lockedhi_si;
21027 else
21028 fn = gen_load_lockedsi;
21029 break;
21030 case DImode:
21031 fn = gen_load_lockeddi;
21032 break;
21033 case TImode:
21034 fn = gen_load_lockedti;
21035 break;
21036 default:
21037 gcc_unreachable ();
21039 emit_insn (fn (reg, mem));
21042 /* A subroutine of the atomic operation splitters. Emit a store-conditional
21043 instruction in MODE. */
21045 static void
21046 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
21048 rtx (*fn) (rtx, rtx, rtx) = NULL;
21050 switch (mode)
21052 case QImode:
21053 fn = gen_store_conditionalqi;
21054 break;
21055 case HImode:
21056 fn = gen_store_conditionalhi;
21057 break;
21058 case SImode:
21059 fn = gen_store_conditionalsi;
21060 break;
21061 case DImode:
21062 fn = gen_store_conditionaldi;
21063 break;
21064 case TImode:
21065 fn = gen_store_conditionalti;
21066 break;
21067 default:
21068 gcc_unreachable ();
21071 /* Emit sync before stwcx. to address PPC405 Erratum. */
21072 if (PPC405_ERRATUM77)
21073 emit_insn (gen_hwsync ());
21075 emit_insn (fn (res, mem, val));
21078 /* Expand barriers before and after a load_locked/store_cond sequence. */
21080 static rtx
21081 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
21083 rtx addr = XEXP (mem, 0);
21084 int strict_p = (reload_in_progress || reload_completed);
21086 if (!legitimate_indirect_address_p (addr, strict_p)
21087 && !legitimate_indexed_address_p (addr, strict_p))
21089 addr = force_reg (Pmode, addr);
21090 mem = replace_equiv_address_nv (mem, addr);
21093 switch (model)
21095 case MEMMODEL_RELAXED:
21096 case MEMMODEL_CONSUME:
21097 case MEMMODEL_ACQUIRE:
21098 break;
21099 case MEMMODEL_RELEASE:
21100 case MEMMODEL_ACQ_REL:
21101 emit_insn (gen_lwsync ());
21102 break;
21103 case MEMMODEL_SEQ_CST:
21104 emit_insn (gen_hwsync ());
21105 break;
21106 default:
21107 gcc_unreachable ();
21109 return mem;
21112 static void
21113 rs6000_post_atomic_barrier (enum memmodel model)
21115 switch (model)
21117 case MEMMODEL_RELAXED:
21118 case MEMMODEL_CONSUME:
21119 case MEMMODEL_RELEASE:
21120 break;
21121 case MEMMODEL_ACQUIRE:
21122 case MEMMODEL_ACQ_REL:
21123 case MEMMODEL_SEQ_CST:
21124 emit_insn (gen_isync ());
21125 break;
21126 default:
21127 gcc_unreachable ();
21131 /* A subroutine of the various atomic expanders. For sub-word operations,
21132 we must adjust things to operate on SImode. Given the original MEM,
21133 return a new aligned memory. Also build and return the quantities by
21134 which to shift and mask. */
21136 static rtx
21137 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
21139 rtx addr, align, shift, mask, mem;
21140 HOST_WIDE_INT shift_mask;
21141 machine_mode mode = GET_MODE (orig_mem);
21143 /* For smaller modes, we have to implement this via SImode. */
21144 shift_mask = (mode == QImode ? 0x18 : 0x10);
21146 addr = XEXP (orig_mem, 0);
21147 addr = force_reg (GET_MODE (addr), addr);
21149 /* Aligned memory containing subword. Generate a new memory. We
21150 do not want any of the existing MEM_ATTR data, as we're now
21151 accessing memory outside the original object. */
21152 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
21153 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21154 mem = gen_rtx_MEM (SImode, align);
21155 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
21156 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
21157 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
21159 /* Shift amount for subword relative to aligned word. */
21160 shift = gen_reg_rtx (SImode);
21161 addr = gen_lowpart (SImode, addr);
21162 rtx tmp = gen_reg_rtx (SImode);
21163 emit_insn (gen_ashlsi3 (tmp, addr, GEN_INT (3)));
21164 emit_insn (gen_andsi3 (shift, tmp, GEN_INT (shift_mask)));
21165 if (BYTES_BIG_ENDIAN)
21166 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
21167 shift, 1, OPTAB_LIB_WIDEN);
21168 *pshift = shift;
21170 /* Mask for insertion. */
21171 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
21172 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
21173 *pmask = mask;
21175 return mem;
21178 /* A subroutine of the various atomic expanders. For sub-word operands,
21179 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
21181 static rtx
21182 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
21184 rtx x;
21186 x = gen_reg_rtx (SImode);
21187 emit_insn (gen_rtx_SET (x, gen_rtx_AND (SImode,
21188 gen_rtx_NOT (SImode, mask),
21189 oldval)));
21191 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
21193 return x;
21196 /* A subroutine of the various atomic expanders. For sub-word operands,
21197 extract WIDE to NARROW via SHIFT. */
21199 static void
21200 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
21202 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
21203 wide, 1, OPTAB_LIB_WIDEN);
21204 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
21207 /* Expand an atomic compare and swap operation. */
21209 void
21210 rs6000_expand_atomic_compare_and_swap (rtx operands[])
21212 rtx boolval, retval, mem, oldval, newval, cond;
21213 rtx label1, label2, x, mask, shift;
21214 machine_mode mode, orig_mode;
21215 enum memmodel mod_s, mod_f;
21216 bool is_weak;
21218 boolval = operands[0];
21219 retval = operands[1];
21220 mem = operands[2];
21221 oldval = operands[3];
21222 newval = operands[4];
21223 is_weak = (INTVAL (operands[5]) != 0);
21224 mod_s = memmodel_base (INTVAL (operands[6]));
21225 mod_f = memmodel_base (INTVAL (operands[7]));
21226 orig_mode = mode = GET_MODE (mem);
21228 mask = shift = NULL_RTX;
21229 if (mode == QImode || mode == HImode)
21231 /* Before power8, we didn't have access to lbarx/lharx, so generate a
21232 lwarx and shift/mask operations. With power8, we need to do the
21233 comparison in SImode, but the store is still done in QI/HImode. */
21234 oldval = convert_modes (SImode, mode, oldval, 1);
21236 if (!TARGET_SYNC_HI_QI)
21238 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
21240 /* Shift and mask OLDVAL into position with the word. */
21241 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
21242 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21244 /* Shift and mask NEWVAL into position within the word. */
21245 newval = convert_modes (SImode, mode, newval, 1);
21246 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
21247 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21250 /* Prepare to adjust the return value. */
21251 retval = gen_reg_rtx (SImode);
21252 mode = SImode;
21254 else if (reg_overlap_mentioned_p (retval, oldval))
21255 oldval = copy_to_reg (oldval);
21257 mem = rs6000_pre_atomic_barrier (mem, mod_s);
21259 label1 = NULL_RTX;
21260 if (!is_weak)
21262 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
21263 emit_label (XEXP (label1, 0));
21265 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
21267 emit_load_locked (mode, retval, mem);
21269 x = retval;
21270 if (mask)
21272 x = expand_simple_binop (SImode, AND, retval, mask,
21273 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21276 cond = gen_reg_rtx (CCmode);
21277 /* If we have TImode, synthesize a comparison. */
21278 if (mode != TImode)
21279 x = gen_rtx_COMPARE (CCmode, x, oldval);
21280 else
21282 rtx xor1_result = gen_reg_rtx (DImode);
21283 rtx xor2_result = gen_reg_rtx (DImode);
21284 rtx or_result = gen_reg_rtx (DImode);
21285 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
21286 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
21287 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
21288 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
21290 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
21291 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
21292 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
21293 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
21296 emit_insn (gen_rtx_SET (cond, x));
21298 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
21299 emit_unlikely_jump (x, label2);
21301 x = newval;
21302 if (mask)
21303 x = rs6000_mask_atomic_subword (retval, newval, mask);
21305 emit_store_conditional (orig_mode, cond, mem, x);
21307 if (!is_weak)
21309 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
21310 emit_unlikely_jump (x, label1);
21313 if (!is_mm_relaxed (mod_f))
21314 emit_label (XEXP (label2, 0));
21316 rs6000_post_atomic_barrier (mod_s);
21318 if (is_mm_relaxed (mod_f))
21319 emit_label (XEXP (label2, 0));
21321 if (shift)
21322 rs6000_finish_atomic_subword (operands[1], retval, shift);
21323 else if (mode != GET_MODE (operands[1]))
21324 convert_move (operands[1], retval, 1);
21326 /* In all cases, CR0 contains EQ on success, and NE on failure. */
21327 x = gen_rtx_EQ (SImode, cond, const0_rtx);
21328 emit_insn (gen_rtx_SET (boolval, x));
21331 /* Expand an atomic exchange operation. */
21333 void
21334 rs6000_expand_atomic_exchange (rtx operands[])
21336 rtx retval, mem, val, cond;
21337 machine_mode mode;
21338 enum memmodel model;
21339 rtx label, x, mask, shift;
21341 retval = operands[0];
21342 mem = operands[1];
21343 val = operands[2];
21344 model = memmodel_base (INTVAL (operands[3]));
21345 mode = GET_MODE (mem);
21347 mask = shift = NULL_RTX;
21348 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
21350 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
21352 /* Shift and mask VAL into position with the word. */
21353 val = convert_modes (SImode, mode, val, 1);
21354 val = expand_simple_binop (SImode, ASHIFT, val, shift,
21355 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21357 /* Prepare to adjust the return value. */
21358 retval = gen_reg_rtx (SImode);
21359 mode = SImode;
21362 mem = rs6000_pre_atomic_barrier (mem, model);
21364 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
21365 emit_label (XEXP (label, 0));
21367 emit_load_locked (mode, retval, mem);
21369 x = val;
21370 if (mask)
21371 x = rs6000_mask_atomic_subword (retval, val, mask);
21373 cond = gen_reg_rtx (CCmode);
21374 emit_store_conditional (mode, cond, mem, x);
21376 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
21377 emit_unlikely_jump (x, label);
21379 rs6000_post_atomic_barrier (model);
21381 if (shift)
21382 rs6000_finish_atomic_subword (operands[0], retval, shift);
21385 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
21386 to perform. MEM is the memory on which to operate. VAL is the second
21387 operand of the binary operator. BEFORE and AFTER are optional locations to
21388 return the value of MEM either before of after the operation. MODEL_RTX
21389 is a CONST_INT containing the memory model to use. */
21391 void
21392 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
21393 rtx orig_before, rtx orig_after, rtx model_rtx)
21395 enum memmodel model = memmodel_base (INTVAL (model_rtx));
21396 machine_mode mode = GET_MODE (mem);
21397 machine_mode store_mode = mode;
21398 rtx label, x, cond, mask, shift;
21399 rtx before = orig_before, after = orig_after;
21401 mask = shift = NULL_RTX;
21402 /* On power8, we want to use SImode for the operation. On previous systems,
21403 use the operation in a subword and shift/mask to get the proper byte or
21404 halfword. */
21405 if (mode == QImode || mode == HImode)
21407 if (TARGET_SYNC_HI_QI)
21409 val = convert_modes (SImode, mode, val, 1);
21411 /* Prepare to adjust the return value. */
21412 before = gen_reg_rtx (SImode);
21413 if (after)
21414 after = gen_reg_rtx (SImode);
21415 mode = SImode;
21417 else
21419 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
21421 /* Shift and mask VAL into position with the word. */
21422 val = convert_modes (SImode, mode, val, 1);
21423 val = expand_simple_binop (SImode, ASHIFT, val, shift,
21424 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21426 switch (code)
21428 case IOR:
21429 case XOR:
21430 /* We've already zero-extended VAL. That is sufficient to
21431 make certain that it does not affect other bits. */
21432 mask = NULL;
21433 break;
21435 case AND:
21436 /* If we make certain that all of the other bits in VAL are
21437 set, that will be sufficient to not affect other bits. */
21438 x = gen_rtx_NOT (SImode, mask);
21439 x = gen_rtx_IOR (SImode, x, val);
21440 emit_insn (gen_rtx_SET (val, x));
21441 mask = NULL;
21442 break;
21444 case NOT:
21445 case PLUS:
21446 case MINUS:
21447 /* These will all affect bits outside the field and need
21448 adjustment via MASK within the loop. */
21449 break;
21451 default:
21452 gcc_unreachable ();
21455 /* Prepare to adjust the return value. */
21456 before = gen_reg_rtx (SImode);
21457 if (after)
21458 after = gen_reg_rtx (SImode);
21459 store_mode = mode = SImode;
21463 mem = rs6000_pre_atomic_barrier (mem, model);
21465 label = gen_label_rtx ();
21466 emit_label (label);
21467 label = gen_rtx_LABEL_REF (VOIDmode, label);
21469 if (before == NULL_RTX)
21470 before = gen_reg_rtx (mode);
21472 emit_load_locked (mode, before, mem);
21474 if (code == NOT)
21476 x = expand_simple_binop (mode, AND, before, val,
21477 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21478 after = expand_simple_unop (mode, NOT, x, after, 1);
21480 else
21482 after = expand_simple_binop (mode, code, before, val,
21483 after, 1, OPTAB_LIB_WIDEN);
21486 x = after;
21487 if (mask)
21489 x = expand_simple_binop (SImode, AND, after, mask,
21490 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21491 x = rs6000_mask_atomic_subword (before, x, mask);
21493 else if (store_mode != mode)
21494 x = convert_modes (store_mode, mode, x, 1);
21496 cond = gen_reg_rtx (CCmode);
21497 emit_store_conditional (store_mode, cond, mem, x);
21499 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
21500 emit_unlikely_jump (x, label);
21502 rs6000_post_atomic_barrier (model);
21504 if (shift)
21506 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
21507 then do the calcuations in a SImode register. */
21508 if (orig_before)
21509 rs6000_finish_atomic_subword (orig_before, before, shift);
21510 if (orig_after)
21511 rs6000_finish_atomic_subword (orig_after, after, shift);
21513 else if (store_mode != mode)
21515 /* QImode/HImode on machines with lbarx/lharx where we do the native
21516 operation and then do the calcuations in a SImode register. */
21517 if (orig_before)
21518 convert_move (orig_before, before, 1);
21519 if (orig_after)
21520 convert_move (orig_after, after, 1);
21522 else if (orig_after && after != orig_after)
21523 emit_move_insn (orig_after, after);
21526 /* Emit instructions to move SRC to DST. Called by splitters for
21527 multi-register moves. It will emit at most one instruction for
21528 each register that is accessed; that is, it won't emit li/lis pairs
21529 (or equivalent for 64-bit code). One of SRC or DST must be a hard
21530 register. */
21532 void
21533 rs6000_split_multireg_move (rtx dst, rtx src)
21535 /* The register number of the first register being moved. */
21536 int reg;
21537 /* The mode that is to be moved. */
21538 machine_mode mode;
21539 /* The mode that the move is being done in, and its size. */
21540 machine_mode reg_mode;
21541 int reg_mode_size;
21542 /* The number of registers that will be moved. */
21543 int nregs;
21545 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
21546 mode = GET_MODE (dst);
21547 nregs = hard_regno_nregs[reg][mode];
21548 if (FP_REGNO_P (reg))
21549 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
21550 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
21551 else if (ALTIVEC_REGNO_P (reg))
21552 reg_mode = V16QImode;
21553 else if (TARGET_E500_DOUBLE && FLOAT128_2REG_P (mode))
21554 reg_mode = DFmode;
21555 else
21556 reg_mode = word_mode;
21557 reg_mode_size = GET_MODE_SIZE (reg_mode);
21559 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
21561 /* TDmode residing in FP registers is special, since the ISA requires that
21562 the lower-numbered word of a register pair is always the most significant
21563 word, even in little-endian mode. This does not match the usual subreg
21564 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
21565 the appropriate constituent registers "by hand" in little-endian mode.
21567 Note we do not need to check for destructive overlap here since TDmode
21568 can only reside in even/odd register pairs. */
21569 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
21571 rtx p_src, p_dst;
21572 int i;
21574 for (i = 0; i < nregs; i++)
21576 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
21577 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
21578 else
21579 p_src = simplify_gen_subreg (reg_mode, src, mode,
21580 i * reg_mode_size);
21582 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
21583 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
21584 else
21585 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
21586 i * reg_mode_size);
21588 emit_insn (gen_rtx_SET (p_dst, p_src));
21591 return;
21594 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
21596 /* Move register range backwards, if we might have destructive
21597 overlap. */
21598 int i;
21599 for (i = nregs - 1; i >= 0; i--)
21600 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
21601 i * reg_mode_size),
21602 simplify_gen_subreg (reg_mode, src, mode,
21603 i * reg_mode_size)));
21605 else
21607 int i;
21608 int j = -1;
21609 bool used_update = false;
21610 rtx restore_basereg = NULL_RTX;
21612 if (MEM_P (src) && INT_REGNO_P (reg))
21614 rtx breg;
21616 if (GET_CODE (XEXP (src, 0)) == PRE_INC
21617 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
21619 rtx delta_rtx;
21620 breg = XEXP (XEXP (src, 0), 0);
21621 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
21622 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
21623 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
21624 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
21625 src = replace_equiv_address (src, breg);
21627 else if (! rs6000_offsettable_memref_p (src, reg_mode))
21629 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
21631 rtx basereg = XEXP (XEXP (src, 0), 0);
21632 if (TARGET_UPDATE)
21634 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
21635 emit_insn (gen_rtx_SET (ndst,
21636 gen_rtx_MEM (reg_mode,
21637 XEXP (src, 0))));
21638 used_update = true;
21640 else
21641 emit_insn (gen_rtx_SET (basereg,
21642 XEXP (XEXP (src, 0), 1)));
21643 src = replace_equiv_address (src, basereg);
21645 else
21647 rtx basereg = gen_rtx_REG (Pmode, reg);
21648 emit_insn (gen_rtx_SET (basereg, XEXP (src, 0)));
21649 src = replace_equiv_address (src, basereg);
21653 breg = XEXP (src, 0);
21654 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
21655 breg = XEXP (breg, 0);
21657 /* If the base register we are using to address memory is
21658 also a destination reg, then change that register last. */
21659 if (REG_P (breg)
21660 && REGNO (breg) >= REGNO (dst)
21661 && REGNO (breg) < REGNO (dst) + nregs)
21662 j = REGNO (breg) - REGNO (dst);
21664 else if (MEM_P (dst) && INT_REGNO_P (reg))
21666 rtx breg;
21668 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
21669 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
21671 rtx delta_rtx;
21672 breg = XEXP (XEXP (dst, 0), 0);
21673 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
21674 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
21675 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
21677 /* We have to update the breg before doing the store.
21678 Use store with update, if available. */
21680 if (TARGET_UPDATE)
21682 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21683 emit_insn (TARGET_32BIT
21684 ? (TARGET_POWERPC64
21685 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
21686 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
21687 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
21688 used_update = true;
21690 else
21691 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
21692 dst = replace_equiv_address (dst, breg);
21694 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
21695 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
21697 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
21699 rtx basereg = XEXP (XEXP (dst, 0), 0);
21700 if (TARGET_UPDATE)
21702 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
21703 emit_insn (gen_rtx_SET (gen_rtx_MEM (reg_mode,
21704 XEXP (dst, 0)),
21705 nsrc));
21706 used_update = true;
21708 else
21709 emit_insn (gen_rtx_SET (basereg,
21710 XEXP (XEXP (dst, 0), 1)));
21711 dst = replace_equiv_address (dst, basereg);
21713 else
21715 rtx basereg = XEXP (XEXP (dst, 0), 0);
21716 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
21717 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
21718 && REG_P (basereg)
21719 && REG_P (offsetreg)
21720 && REGNO (basereg) != REGNO (offsetreg));
21721 if (REGNO (basereg) == 0)
21723 rtx tmp = offsetreg;
21724 offsetreg = basereg;
21725 basereg = tmp;
21727 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
21728 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
21729 dst = replace_equiv_address (dst, basereg);
21732 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
21733 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
21736 for (i = 0; i < nregs; i++)
21738 /* Calculate index to next subword. */
21739 ++j;
21740 if (j == nregs)
21741 j = 0;
21743 /* If compiler already emitted move of first word by
21744 store with update, no need to do anything. */
21745 if (j == 0 && used_update)
21746 continue;
21748 emit_insn (gen_rtx_SET (simplify_gen_subreg (reg_mode, dst, mode,
21749 j * reg_mode_size),
21750 simplify_gen_subreg (reg_mode, src, mode,
21751 j * reg_mode_size)));
21753 if (restore_basereg != NULL_RTX)
21754 emit_insn (restore_basereg);
21759 /* This page contains routines that are used to determine what the
21760 function prologue and epilogue code will do and write them out. */
21762 static inline bool
21763 save_reg_p (int r)
21765 return !call_used_regs[r] && df_regs_ever_live_p (r);
21768 /* Return the first fixed-point register that is required to be
21769 saved. 32 if none. */
21772 first_reg_to_save (void)
21774 int first_reg;
21776 /* Find lowest numbered live register. */
21777 for (first_reg = 13; first_reg <= 31; first_reg++)
21778 if (save_reg_p (first_reg))
21779 break;
21781 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
21782 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
21783 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
21784 || (TARGET_TOC && TARGET_MINIMAL_TOC))
21785 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
21786 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
21788 #if TARGET_MACHO
21789 if (flag_pic
21790 && crtl->uses_pic_offset_table
21791 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
21792 return RS6000_PIC_OFFSET_TABLE_REGNUM;
21793 #endif
21795 return first_reg;
21798 /* Similar, for FP regs. */
21801 first_fp_reg_to_save (void)
21803 int first_reg;
21805 /* Find lowest numbered live register. */
21806 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
21807 if (save_reg_p (first_reg))
21808 break;
21810 return first_reg;
21813 /* Similar, for AltiVec regs. */
21815 static int
21816 first_altivec_reg_to_save (void)
21818 int i;
21820 /* Stack frame remains as is unless we are in AltiVec ABI. */
21821 if (! TARGET_ALTIVEC_ABI)
21822 return LAST_ALTIVEC_REGNO + 1;
21824 /* On Darwin, the unwind routines are compiled without
21825 TARGET_ALTIVEC, and use save_world to save/restore the
21826 altivec registers when necessary. */
21827 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21828 && ! TARGET_ALTIVEC)
21829 return FIRST_ALTIVEC_REGNO + 20;
21831 /* Find lowest numbered live register. */
21832 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21833 if (save_reg_p (i))
21834 break;
21836 return i;
21839 /* Return a 32-bit mask of the AltiVec registers we need to set in
21840 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21841 the 32-bit word is 0. */
21843 static unsigned int
21844 compute_vrsave_mask (void)
21846 unsigned int i, mask = 0;
21848 /* On Darwin, the unwind routines are compiled without
21849 TARGET_ALTIVEC, and use save_world to save/restore the
21850 call-saved altivec registers when necessary. */
21851 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21852 && ! TARGET_ALTIVEC)
21853 mask |= 0xFFF;
21855 /* First, find out if we use _any_ altivec registers. */
21856 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21857 if (df_regs_ever_live_p (i))
21858 mask |= ALTIVEC_REG_BIT (i);
21860 if (mask == 0)
21861 return mask;
21863 /* Next, remove the argument registers from the set. These must
21864 be in the VRSAVE mask set by the caller, so we don't need to add
21865 them in again. More importantly, the mask we compute here is
21866 used to generate CLOBBERs in the set_vrsave insn, and we do not
21867 wish the argument registers to die. */
21868 for (i = ALTIVEC_ARG_MIN_REG; i < (unsigned) crtl->args.info.vregno; i++)
21869 mask &= ~ALTIVEC_REG_BIT (i);
21871 /* Similarly, remove the return value from the set. */
21873 bool yes = false;
21874 diddle_return_value (is_altivec_return_reg, &yes);
21875 if (yes)
21876 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21879 return mask;
21882 /* For a very restricted set of circumstances, we can cut down the
21883 size of prologues/epilogues by calling our own save/restore-the-world
21884 routines. */
21886 static void
21887 compute_save_world_info (rs6000_stack_t *info_ptr)
21889 info_ptr->world_save_p = 1;
21890 info_ptr->world_save_p
21891 = (WORLD_SAVE_P (info_ptr)
21892 && DEFAULT_ABI == ABI_DARWIN
21893 && !cfun->has_nonlocal_label
21894 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21895 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21896 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21897 && info_ptr->cr_save_p);
21899 /* This will not work in conjunction with sibcalls. Make sure there
21900 are none. (This check is expensive, but seldom executed.) */
21901 if (WORLD_SAVE_P (info_ptr))
21903 rtx_insn *insn;
21904 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21905 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21907 info_ptr->world_save_p = 0;
21908 break;
21912 if (WORLD_SAVE_P (info_ptr))
21914 /* Even if we're not touching VRsave, make sure there's room on the
21915 stack for it, if it looks like we're calling SAVE_WORLD, which
21916 will attempt to save it. */
21917 info_ptr->vrsave_size = 4;
21919 /* If we are going to save the world, we need to save the link register too. */
21920 info_ptr->lr_save_p = 1;
21922 /* "Save" the VRsave register too if we're saving the world. */
21923 if (info_ptr->vrsave_mask == 0)
21924 info_ptr->vrsave_mask = compute_vrsave_mask ();
21926 /* Because the Darwin register save/restore routines only handle
21927 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21928 check. */
21929 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21930 && (info_ptr->first_altivec_reg_save
21931 >= FIRST_SAVED_ALTIVEC_REGNO));
21933 return;
21937 static void
21938 is_altivec_return_reg (rtx reg, void *xyes)
21940 bool *yes = (bool *) xyes;
21941 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21942 *yes = true;
21946 /* Look for user-defined global regs in the range FIRST to LAST-1.
21947 We should not restore these, and so cannot use lmw or out-of-line
21948 restore functions if there are any. We also can't save them
21949 (well, emit frame notes for them), because frame unwinding during
21950 exception handling will restore saved registers. */
21952 static bool
21953 global_regs_p (unsigned first, unsigned last)
21955 while (first < last)
21956 if (global_regs[first++])
21957 return true;
21958 return false;
21961 /* Determine the strategy for savings/restoring registers. */
21963 enum {
21964 SAVRES_MULTIPLE = 0x1,
21965 SAVE_INLINE_FPRS = 0x2,
21966 SAVE_INLINE_GPRS = 0x4,
21967 REST_INLINE_FPRS = 0x8,
21968 REST_INLINE_GPRS = 0x10,
21969 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21970 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21971 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21972 SAVE_INLINE_VRS = 0x100,
21973 REST_INLINE_VRS = 0x200
21976 static int
21977 rs6000_savres_strategy (rs6000_stack_t *info,
21978 bool using_static_chain_p)
21980 int strategy = 0;
21981 bool lr_save_p;
21983 if (TARGET_MULTIPLE
21984 && !TARGET_POWERPC64
21985 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21986 && info->first_gp_reg_save < 31
21987 && !global_regs_p (info->first_gp_reg_save, 32))
21988 strategy |= SAVRES_MULTIPLE;
21990 if (crtl->calls_eh_return
21991 || cfun->machine->ra_need_lr)
21992 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21993 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21994 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21996 if (info->first_fp_reg_save == 64
21997 /* The out-of-line FP routines use double-precision stores;
21998 we can't use those routines if we don't have such stores. */
21999 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
22000 || global_regs_p (info->first_fp_reg_save, 64))
22001 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22003 if (info->first_gp_reg_save == 32
22004 || (!(strategy & SAVRES_MULTIPLE)
22005 && global_regs_p (info->first_gp_reg_save, 32)))
22006 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22008 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
22009 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
22010 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22012 /* Define cutoff for using out-of-line functions to save registers. */
22013 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
22015 if (!optimize_size)
22017 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22018 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22019 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22021 else
22023 /* Prefer out-of-line restore if it will exit. */
22024 if (info->first_fp_reg_save > 61)
22025 strategy |= SAVE_INLINE_FPRS;
22026 if (info->first_gp_reg_save > 29)
22028 if (info->first_fp_reg_save == 64)
22029 strategy |= SAVE_INLINE_GPRS;
22030 else
22031 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22033 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
22034 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22037 else if (DEFAULT_ABI == ABI_DARWIN)
22039 if (info->first_fp_reg_save > 60)
22040 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22041 if (info->first_gp_reg_save > 29)
22042 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22043 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22045 else
22047 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22048 if (info->first_fp_reg_save > 61)
22049 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
22050 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
22051 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
22054 /* Don't bother to try to save things out-of-line if r11 is occupied
22055 by the static chain. It would require too much fiddling and the
22056 static chain is rarely used anyway. FPRs are saved w.r.t the stack
22057 pointer on Darwin, and AIX uses r1 or r12. */
22058 if (using_static_chain_p
22059 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
22060 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
22061 | SAVE_INLINE_GPRS
22062 | SAVE_INLINE_VRS | REST_INLINE_VRS);
22064 /* We can only use the out-of-line routines to restore if we've
22065 saved all the registers from first_fp_reg_save in the prologue.
22066 Otherwise, we risk loading garbage. */
22067 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
22069 int i;
22071 for (i = info->first_fp_reg_save; i < 64; i++)
22072 if (!save_reg_p (i))
22074 strategy |= REST_INLINE_FPRS;
22075 break;
22079 /* If we are going to use store multiple, then don't even bother
22080 with the out-of-line routines, since the store-multiple
22081 instruction will always be smaller. */
22082 if ((strategy & SAVRES_MULTIPLE))
22083 strategy |= SAVE_INLINE_GPRS;
22085 /* info->lr_save_p isn't yet set if the only reason lr needs to be
22086 saved is an out-of-line save or restore. Set up the value for
22087 the next test (excluding out-of-line gpr restore). */
22088 lr_save_p = (info->lr_save_p
22089 || !(strategy & SAVE_INLINE_GPRS)
22090 || !(strategy & SAVE_INLINE_FPRS)
22091 || !(strategy & SAVE_INLINE_VRS)
22092 || !(strategy & REST_INLINE_FPRS)
22093 || !(strategy & REST_INLINE_VRS));
22095 /* The situation is more complicated with load multiple. We'd
22096 prefer to use the out-of-line routines for restores, since the
22097 "exit" out-of-line routines can handle the restore of LR and the
22098 frame teardown. However if doesn't make sense to use the
22099 out-of-line routine if that is the only reason we'd need to save
22100 LR, and we can't use the "exit" out-of-line gpr restore if we
22101 have saved some fprs; In those cases it is advantageous to use
22102 load multiple when available. */
22103 if ((strategy & SAVRES_MULTIPLE)
22104 && (!lr_save_p
22105 || info->first_fp_reg_save != 64))
22106 strategy |= REST_INLINE_GPRS;
22108 /* Saving CR interferes with the exit routines used on the SPE, so
22109 just punt here. */
22110 if (TARGET_SPE_ABI
22111 && info->spe_64bit_regs_used
22112 && info->cr_save_p)
22113 strategy |= REST_INLINE_GPRS;
22115 /* We can only use load multiple or the out-of-line routines to
22116 restore if we've used store multiple or out-of-line routines
22117 in the prologue, i.e. if we've saved all the registers from
22118 first_gp_reg_save. Otherwise, we risk loading garbage. */
22119 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
22120 == SAVE_INLINE_GPRS)
22122 int i;
22124 for (i = info->first_gp_reg_save; i < 32; i++)
22125 if (!save_reg_p (i))
22127 strategy |= REST_INLINE_GPRS;
22128 break;
22132 if (TARGET_ELF && TARGET_64BIT)
22134 if (!(strategy & SAVE_INLINE_FPRS))
22135 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
22136 else if (!(strategy & SAVE_INLINE_GPRS)
22137 && info->first_fp_reg_save == 64)
22138 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
22140 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
22141 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
22143 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
22144 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
22146 return strategy;
22149 /* Calculate the stack information for the current function. This is
22150 complicated by having two separate calling sequences, the AIX calling
22151 sequence and the V.4 calling sequence.
22153 AIX (and Darwin/Mac OS X) stack frames look like:
22154 32-bit 64-bit
22155 SP----> +---------------------------------------+
22156 | back chain to caller | 0 0
22157 +---------------------------------------+
22158 | saved CR | 4 8 (8-11)
22159 +---------------------------------------+
22160 | saved LR | 8 16
22161 +---------------------------------------+
22162 | reserved for compilers | 12 24
22163 +---------------------------------------+
22164 | reserved for binders | 16 32
22165 +---------------------------------------+
22166 | saved TOC pointer | 20 40
22167 +---------------------------------------+
22168 | Parameter save area (P) | 24 48
22169 +---------------------------------------+
22170 | Alloca space (A) | 24+P etc.
22171 +---------------------------------------+
22172 | Local variable space (L) | 24+P+A
22173 +---------------------------------------+
22174 | Float/int conversion temporary (X) | 24+P+A+L
22175 +---------------------------------------+
22176 | Save area for AltiVec registers (W) | 24+P+A+L+X
22177 +---------------------------------------+
22178 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
22179 +---------------------------------------+
22180 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
22181 +---------------------------------------+
22182 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
22183 +---------------------------------------+
22184 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
22185 +---------------------------------------+
22186 old SP->| back chain to caller's caller |
22187 +---------------------------------------+
22189 The required alignment for AIX configurations is two words (i.e., 8
22190 or 16 bytes).
22192 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
22194 SP----> +---------------------------------------+
22195 | Back chain to caller | 0
22196 +---------------------------------------+
22197 | Save area for CR | 8
22198 +---------------------------------------+
22199 | Saved LR | 16
22200 +---------------------------------------+
22201 | Saved TOC pointer | 24
22202 +---------------------------------------+
22203 | Parameter save area (P) | 32
22204 +---------------------------------------+
22205 | Alloca space (A) | 32+P
22206 +---------------------------------------+
22207 | Local variable space (L) | 32+P+A
22208 +---------------------------------------+
22209 | Save area for AltiVec registers (W) | 32+P+A+L
22210 +---------------------------------------+
22211 | AltiVec alignment padding (Y) | 32+P+A+L+W
22212 +---------------------------------------+
22213 | Save area for GP registers (G) | 32+P+A+L+W+Y
22214 +---------------------------------------+
22215 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
22216 +---------------------------------------+
22217 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
22218 +---------------------------------------+
22221 V.4 stack frames look like:
22223 SP----> +---------------------------------------+
22224 | back chain to caller | 0
22225 +---------------------------------------+
22226 | caller's saved LR | 4
22227 +---------------------------------------+
22228 | Parameter save area (P) | 8
22229 +---------------------------------------+
22230 | Alloca space (A) | 8+P
22231 +---------------------------------------+
22232 | Varargs save area (V) | 8+P+A
22233 +---------------------------------------+
22234 | Local variable space (L) | 8+P+A+V
22235 +---------------------------------------+
22236 | Float/int conversion temporary (X) | 8+P+A+V+L
22237 +---------------------------------------+
22238 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
22239 +---------------------------------------+
22240 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
22241 +---------------------------------------+
22242 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
22243 +---------------------------------------+
22244 | SPE: area for 64-bit GP registers |
22245 +---------------------------------------+
22246 | SPE alignment padding |
22247 +---------------------------------------+
22248 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
22249 +---------------------------------------+
22250 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
22251 +---------------------------------------+
22252 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
22253 +---------------------------------------+
22254 old SP->| back chain to caller's caller |
22255 +---------------------------------------+
22257 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
22258 given. (But note below and in sysv4.h that we require only 8 and
22259 may round up the size of our stack frame anyways. The historical
22260 reason is early versions of powerpc-linux which didn't properly
22261 align the stack at program startup. A happy side-effect is that
22262 -mno-eabi libraries can be used with -meabi programs.)
22264 The EABI configuration defaults to the V.4 layout. However,
22265 the stack alignment requirements may differ. If -mno-eabi is not
22266 given, the required stack alignment is 8 bytes; if -mno-eabi is
22267 given, the required alignment is 16 bytes. (But see V.4 comment
22268 above.) */
22270 #ifndef ABI_STACK_BOUNDARY
22271 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
22272 #endif
22274 static rs6000_stack_t *
22275 rs6000_stack_info (void)
22277 /* We should never be called for thunks, we are not set up for that. */
22278 gcc_assert (!cfun->is_thunk);
22280 rs6000_stack_t *info_ptr = &stack_info;
22281 int reg_size = TARGET_32BIT ? 4 : 8;
22282 int ehrd_size;
22283 int ehcr_size;
22284 int save_align;
22285 int first_gp;
22286 HOST_WIDE_INT non_fixed_size;
22287 bool using_static_chain_p;
22289 if (reload_completed && info_ptr->reload_completed)
22290 return info_ptr;
22292 memset (info_ptr, 0, sizeof (*info_ptr));
22293 info_ptr->reload_completed = reload_completed;
22295 if (TARGET_SPE)
22297 /* Cache value so we don't rescan instruction chain over and over. */
22298 if (cfun->machine->insn_chain_scanned_p == 0)
22299 cfun->machine->insn_chain_scanned_p
22300 = spe_func_has_64bit_regs_p () + 1;
22301 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
22304 /* Select which calling sequence. */
22305 info_ptr->abi = DEFAULT_ABI;
22307 /* Calculate which registers need to be saved & save area size. */
22308 info_ptr->first_gp_reg_save = first_reg_to_save ();
22309 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
22310 even if it currently looks like we won't. Reload may need it to
22311 get at a constant; if so, it will have already created a constant
22312 pool entry for it. */
22313 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
22314 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
22315 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
22316 && crtl->uses_const_pool
22317 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
22318 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
22319 else
22320 first_gp = info_ptr->first_gp_reg_save;
22322 info_ptr->gp_size = reg_size * (32 - first_gp);
22324 /* For the SPE, we have an additional upper 32-bits on each GPR.
22325 Ideally we should save the entire 64-bits only when the upper
22326 half is used in SIMD instructions. Since we only record
22327 registers live (not the size they are used in), this proves
22328 difficult because we'd have to traverse the instruction chain at
22329 the right time, taking reload into account. This is a real pain,
22330 so we opt to save the GPRs in 64-bits always if but one register
22331 gets used in 64-bits. Otherwise, all the registers in the frame
22332 get saved in 32-bits.
22334 So... since when we save all GPRs (except the SP) in 64-bits, the
22335 traditional GP save area will be empty. */
22336 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
22337 info_ptr->gp_size = 0;
22339 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
22340 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
22342 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
22343 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
22344 - info_ptr->first_altivec_reg_save);
22346 /* Does this function call anything? */
22347 info_ptr->calls_p = (! crtl->is_leaf
22348 || cfun->machine->ra_needs_full_frame);
22350 /* Determine if we need to save the condition code registers. */
22351 if (df_regs_ever_live_p (CR2_REGNO)
22352 || df_regs_ever_live_p (CR3_REGNO)
22353 || df_regs_ever_live_p (CR4_REGNO))
22355 info_ptr->cr_save_p = 1;
22356 if (DEFAULT_ABI == ABI_V4)
22357 info_ptr->cr_size = reg_size;
22360 /* If the current function calls __builtin_eh_return, then we need
22361 to allocate stack space for registers that will hold data for
22362 the exception handler. */
22363 if (crtl->calls_eh_return)
22365 unsigned int i;
22366 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
22367 continue;
22369 /* SPE saves EH registers in 64-bits. */
22370 ehrd_size = i * (TARGET_SPE_ABI
22371 && info_ptr->spe_64bit_regs_used != 0
22372 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
22374 else
22375 ehrd_size = 0;
22377 /* In the ELFv2 ABI, we also need to allocate space for separate
22378 CR field save areas if the function calls __builtin_eh_return. */
22379 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
22381 /* This hard-codes that we have three call-saved CR fields. */
22382 ehcr_size = 3 * reg_size;
22383 /* We do *not* use the regular CR save mechanism. */
22384 info_ptr->cr_save_p = 0;
22386 else
22387 ehcr_size = 0;
22389 /* Determine various sizes. */
22390 info_ptr->reg_size = reg_size;
22391 info_ptr->fixed_size = RS6000_SAVE_AREA;
22392 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
22393 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
22394 TARGET_ALTIVEC ? 16 : 8);
22395 if (FRAME_GROWS_DOWNWARD)
22396 info_ptr->vars_size
22397 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
22398 + info_ptr->parm_size,
22399 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
22400 - (info_ptr->fixed_size + info_ptr->vars_size
22401 + info_ptr->parm_size);
22403 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
22404 info_ptr->spe_gp_size = 8 * (32 - first_gp);
22405 else
22406 info_ptr->spe_gp_size = 0;
22408 if (TARGET_ALTIVEC_ABI)
22409 info_ptr->vrsave_mask = compute_vrsave_mask ();
22410 else
22411 info_ptr->vrsave_mask = 0;
22413 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
22414 info_ptr->vrsave_size = 4;
22415 else
22416 info_ptr->vrsave_size = 0;
22418 compute_save_world_info (info_ptr);
22420 /* Calculate the offsets. */
22421 switch (DEFAULT_ABI)
22423 case ABI_NONE:
22424 default:
22425 gcc_unreachable ();
22427 case ABI_AIX:
22428 case ABI_ELFv2:
22429 case ABI_DARWIN:
22430 info_ptr->fp_save_offset = - info_ptr->fp_size;
22431 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
22433 if (TARGET_ALTIVEC_ABI)
22435 info_ptr->vrsave_save_offset
22436 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
22438 /* Align stack so vector save area is on a quadword boundary.
22439 The padding goes above the vectors. */
22440 if (info_ptr->altivec_size != 0)
22441 info_ptr->altivec_padding_size
22442 = info_ptr->vrsave_save_offset & 0xF;
22443 else
22444 info_ptr->altivec_padding_size = 0;
22446 info_ptr->altivec_save_offset
22447 = info_ptr->vrsave_save_offset
22448 - info_ptr->altivec_padding_size
22449 - info_ptr->altivec_size;
22450 gcc_assert (info_ptr->altivec_size == 0
22451 || info_ptr->altivec_save_offset % 16 == 0);
22453 /* Adjust for AltiVec case. */
22454 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
22456 else
22457 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
22459 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
22460 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
22461 info_ptr->lr_save_offset = 2*reg_size;
22462 break;
22464 case ABI_V4:
22465 info_ptr->fp_save_offset = - info_ptr->fp_size;
22466 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
22467 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
22469 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
22471 /* Align stack so SPE GPR save area is aligned on a
22472 double-word boundary. */
22473 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
22474 info_ptr->spe_padding_size
22475 = 8 - (-info_ptr->cr_save_offset % 8);
22476 else
22477 info_ptr->spe_padding_size = 0;
22479 info_ptr->spe_gp_save_offset
22480 = info_ptr->cr_save_offset
22481 - info_ptr->spe_padding_size
22482 - info_ptr->spe_gp_size;
22484 /* Adjust for SPE case. */
22485 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
22487 else if (TARGET_ALTIVEC_ABI)
22489 info_ptr->vrsave_save_offset
22490 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
22492 /* Align stack so vector save area is on a quadword boundary. */
22493 if (info_ptr->altivec_size != 0)
22494 info_ptr->altivec_padding_size
22495 = 16 - (-info_ptr->vrsave_save_offset % 16);
22496 else
22497 info_ptr->altivec_padding_size = 0;
22499 info_ptr->altivec_save_offset
22500 = info_ptr->vrsave_save_offset
22501 - info_ptr->altivec_padding_size
22502 - info_ptr->altivec_size;
22504 /* Adjust for AltiVec case. */
22505 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
22507 else
22508 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
22509 info_ptr->ehrd_offset -= ehrd_size;
22510 info_ptr->lr_save_offset = reg_size;
22511 break;
22514 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
22515 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
22516 + info_ptr->gp_size
22517 + info_ptr->altivec_size
22518 + info_ptr->altivec_padding_size
22519 + info_ptr->spe_gp_size
22520 + info_ptr->spe_padding_size
22521 + ehrd_size
22522 + ehcr_size
22523 + info_ptr->cr_size
22524 + info_ptr->vrsave_size,
22525 save_align);
22527 non_fixed_size = (info_ptr->vars_size
22528 + info_ptr->parm_size
22529 + info_ptr->save_size);
22531 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
22532 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
22534 /* Determine if we need to save the link register. */
22535 if (info_ptr->calls_p
22536 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22537 && crtl->profile
22538 && !TARGET_PROFILE_KERNEL)
22539 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
22540 #ifdef TARGET_RELOCATABLE
22541 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
22542 #endif
22543 || rs6000_ra_ever_killed ())
22544 info_ptr->lr_save_p = 1;
22546 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
22547 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
22548 && call_used_regs[STATIC_CHAIN_REGNUM]);
22549 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
22550 using_static_chain_p);
22552 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
22553 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
22554 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
22555 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
22556 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
22557 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
22558 info_ptr->lr_save_p = 1;
22560 if (info_ptr->lr_save_p)
22561 df_set_regs_ever_live (LR_REGNO, true);
22563 /* Determine if we need to allocate any stack frame:
22565 For AIX we need to push the stack if a frame pointer is needed
22566 (because the stack might be dynamically adjusted), if we are
22567 debugging, if we make calls, or if the sum of fp_save, gp_save,
22568 and local variables are more than the space needed to save all
22569 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
22570 + 18*8 = 288 (GPR13 reserved).
22572 For V.4 we don't have the stack cushion that AIX uses, but assume
22573 that the debugger can handle stackless frames. */
22575 if (info_ptr->calls_p)
22576 info_ptr->push_p = 1;
22578 else if (DEFAULT_ABI == ABI_V4)
22579 info_ptr->push_p = non_fixed_size != 0;
22581 else if (frame_pointer_needed)
22582 info_ptr->push_p = 1;
22584 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
22585 info_ptr->push_p = 1;
22587 else
22588 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
22590 return info_ptr;
22593 /* Return true if the current function uses any GPRs in 64-bit SIMD
22594 mode. */
22596 static bool
22597 spe_func_has_64bit_regs_p (void)
22599 rtx_insn *insns, *insn;
22601 /* Functions that save and restore all the call-saved registers will
22602 need to save/restore the registers in 64-bits. */
22603 if (crtl->calls_eh_return
22604 || cfun->calls_setjmp
22605 || crtl->has_nonlocal_goto)
22606 return true;
22608 insns = get_insns ();
22610 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
22612 if (INSN_P (insn))
22614 rtx i;
22616 /* FIXME: This should be implemented with attributes...
22618 (set_attr "spe64" "true")....then,
22619 if (get_spe64(insn)) return true;
22621 It's the only reliable way to do the stuff below. */
22623 i = PATTERN (insn);
22624 if (GET_CODE (i) == SET)
22626 machine_mode mode = GET_MODE (SET_SRC (i));
22628 if (SPE_VECTOR_MODE (mode))
22629 return true;
22630 if (TARGET_E500_DOUBLE
22631 && (mode == DFmode || FLOAT128_2REG_P (mode)))
22632 return true;
22637 return false;
22640 static void
22641 debug_stack_info (rs6000_stack_t *info)
22643 const char *abi_string;
22645 if (! info)
22646 info = rs6000_stack_info ();
22648 fprintf (stderr, "\nStack information for function %s:\n",
22649 ((current_function_decl && DECL_NAME (current_function_decl))
22650 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
22651 : "<unknown>"));
22653 switch (info->abi)
22655 default: abi_string = "Unknown"; break;
22656 case ABI_NONE: abi_string = "NONE"; break;
22657 case ABI_AIX: abi_string = "AIX"; break;
22658 case ABI_ELFv2: abi_string = "ELFv2"; break;
22659 case ABI_DARWIN: abi_string = "Darwin"; break;
22660 case ABI_V4: abi_string = "V.4"; break;
22663 fprintf (stderr, "\tABI = %5s\n", abi_string);
22665 if (TARGET_ALTIVEC_ABI)
22666 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
22668 if (TARGET_SPE_ABI)
22669 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
22671 if (info->first_gp_reg_save != 32)
22672 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
22674 if (info->first_fp_reg_save != 64)
22675 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
22677 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
22678 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
22679 info->first_altivec_reg_save);
22681 if (info->lr_save_p)
22682 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
22684 if (info->cr_save_p)
22685 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
22687 if (info->vrsave_mask)
22688 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
22690 if (info->push_p)
22691 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
22693 if (info->calls_p)
22694 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
22696 if (info->gp_size)
22697 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
22699 if (info->fp_size)
22700 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
22702 if (info->altivec_size)
22703 fprintf (stderr, "\taltivec_save_offset = %5d\n",
22704 info->altivec_save_offset);
22706 if (info->spe_gp_size == 0)
22707 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
22708 info->spe_gp_save_offset);
22710 if (info->vrsave_size)
22711 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
22712 info->vrsave_save_offset);
22714 if (info->lr_save_p)
22715 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
22717 if (info->cr_save_p)
22718 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
22720 if (info->varargs_save_offset)
22721 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
22723 if (info->total_size)
22724 fprintf (stderr, "\ttotal_size = " HOST_WIDE_INT_PRINT_DEC"\n",
22725 info->total_size);
22727 if (info->vars_size)
22728 fprintf (stderr, "\tvars_size = " HOST_WIDE_INT_PRINT_DEC"\n",
22729 info->vars_size);
22731 if (info->parm_size)
22732 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
22734 if (info->fixed_size)
22735 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
22737 if (info->gp_size)
22738 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
22740 if (info->spe_gp_size)
22741 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
22743 if (info->fp_size)
22744 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
22746 if (info->altivec_size)
22747 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
22749 if (info->vrsave_size)
22750 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
22752 if (info->altivec_padding_size)
22753 fprintf (stderr, "\taltivec_padding_size= %5d\n",
22754 info->altivec_padding_size);
22756 if (info->spe_padding_size)
22757 fprintf (stderr, "\tspe_padding_size = %5d\n",
22758 info->spe_padding_size);
22760 if (info->cr_size)
22761 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
22763 if (info->save_size)
22764 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
22766 if (info->reg_size != 4)
22767 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
22769 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
22771 fprintf (stderr, "\n");
22775 rs6000_return_addr (int count, rtx frame)
22777 /* Currently we don't optimize very well between prolog and body
22778 code and for PIC code the code can be actually quite bad, so
22779 don't try to be too clever here. */
22780 if (count != 0
22781 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
22783 cfun->machine->ra_needs_full_frame = 1;
22785 return
22786 gen_rtx_MEM
22787 (Pmode,
22788 memory_address
22789 (Pmode,
22790 plus_constant (Pmode,
22791 copy_to_reg
22792 (gen_rtx_MEM (Pmode,
22793 memory_address (Pmode, frame))),
22794 RETURN_ADDRESS_OFFSET)));
22797 cfun->machine->ra_need_lr = 1;
22798 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22801 /* Say whether a function is a candidate for sibcall handling or not. */
22803 static bool
22804 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22806 tree fntype;
22808 if (decl)
22809 fntype = TREE_TYPE (decl);
22810 else
22811 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22813 /* We can't do it if the called function has more vector parameters
22814 than the current function; there's nowhere to put the VRsave code. */
22815 if (TARGET_ALTIVEC_ABI
22816 && TARGET_ALTIVEC_VRSAVE
22817 && !(decl && decl == current_function_decl))
22819 function_args_iterator args_iter;
22820 tree type;
22821 int nvreg = 0;
22823 /* Functions with vector parameters are required to have a
22824 prototype, so the argument type info must be available
22825 here. */
22826 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22827 if (TREE_CODE (type) == VECTOR_TYPE
22828 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22829 nvreg++;
22831 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22832 if (TREE_CODE (type) == VECTOR_TYPE
22833 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22834 nvreg--;
22836 if (nvreg > 0)
22837 return false;
22840 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22841 functions, because the callee may have a different TOC pointer to
22842 the caller and there's no way to ensure we restore the TOC when
22843 we return. With the secure-plt SYSV ABI we can't make non-local
22844 calls when -fpic/PIC because the plt call stubs use r30. */
22845 if (DEFAULT_ABI == ABI_DARWIN
22846 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22847 && decl
22848 && !DECL_EXTERNAL (decl)
22849 && !DECL_WEAK (decl)
22850 && (*targetm.binds_local_p) (decl))
22851 || (DEFAULT_ABI == ABI_V4
22852 && (!TARGET_SECURE_PLT
22853 || !flag_pic
22854 || (decl
22855 && (*targetm.binds_local_p) (decl)))))
22857 tree attr_list = TYPE_ATTRIBUTES (fntype);
22859 if (!lookup_attribute ("longcall", attr_list)
22860 || lookup_attribute ("shortcall", attr_list))
22861 return true;
22864 return false;
22867 static int
22868 rs6000_ra_ever_killed (void)
22870 rtx_insn *top;
22871 rtx reg;
22872 rtx_insn *insn;
22874 if (cfun->is_thunk)
22875 return 0;
22877 if (cfun->machine->lr_save_state)
22878 return cfun->machine->lr_save_state - 1;
22880 /* regs_ever_live has LR marked as used if any sibcalls are present,
22881 but this should not force saving and restoring in the
22882 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22883 clobbers LR, so that is inappropriate. */
22885 /* Also, the prologue can generate a store into LR that
22886 doesn't really count, like this:
22888 move LR->R0
22889 bcl to set PIC register
22890 move LR->R31
22891 move R0->LR
22893 When we're called from the epilogue, we need to avoid counting
22894 this as a store. */
22896 push_topmost_sequence ();
22897 top = get_insns ();
22898 pop_topmost_sequence ();
22899 reg = gen_rtx_REG (Pmode, LR_REGNO);
22901 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22903 if (INSN_P (insn))
22905 if (CALL_P (insn))
22907 if (!SIBLING_CALL_P (insn))
22908 return 1;
22910 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22911 return 1;
22912 else if (set_of (reg, insn) != NULL_RTX
22913 && !prologue_epilogue_contains (insn))
22914 return 1;
22917 return 0;
22920 /* Emit instructions needed to load the TOC register.
22921 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22922 a constant pool; or for SVR4 -fpic. */
22924 void
22925 rs6000_emit_load_toc_table (int fromprolog)
22927 rtx dest;
22928 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22930 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22932 char buf[30];
22933 rtx lab, tmp1, tmp2, got;
22935 lab = gen_label_rtx ();
22936 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22937 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22938 if (flag_pic == 2)
22939 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22940 else
22941 got = rs6000_got_sym ();
22942 tmp1 = tmp2 = dest;
22943 if (!fromprolog)
22945 tmp1 = gen_reg_rtx (Pmode);
22946 tmp2 = gen_reg_rtx (Pmode);
22948 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22949 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22950 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22951 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22953 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22955 emit_insn (gen_load_toc_v4_pic_si ());
22956 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22958 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22960 char buf[30];
22961 rtx temp0 = (fromprolog
22962 ? gen_rtx_REG (Pmode, 0)
22963 : gen_reg_rtx (Pmode));
22965 if (fromprolog)
22967 rtx symF, symL;
22969 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22970 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22972 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22973 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22975 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22976 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22977 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22979 else
22981 rtx tocsym, lab;
22983 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22984 lab = gen_label_rtx ();
22985 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22986 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22987 if (TARGET_LINK_STACK)
22988 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22989 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22991 emit_insn (gen_addsi3 (dest, temp0, dest));
22993 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22995 /* This is for AIX code running in non-PIC ELF32. */
22996 char buf[30];
22997 rtx realsym;
22998 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22999 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
23001 emit_insn (gen_elf_high (dest, realsym));
23002 emit_insn (gen_elf_low (dest, dest, realsym));
23004 else
23006 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23008 if (TARGET_32BIT)
23009 emit_insn (gen_load_toc_aix_si (dest));
23010 else
23011 emit_insn (gen_load_toc_aix_di (dest));
23015 /* Emit instructions to restore the link register after determining where
23016 its value has been stored. */
23018 void
23019 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
23021 rs6000_stack_t *info = rs6000_stack_info ();
23022 rtx operands[2];
23024 operands[0] = source;
23025 operands[1] = scratch;
23027 if (info->lr_save_p)
23029 rtx frame_rtx = stack_pointer_rtx;
23030 HOST_WIDE_INT sp_offset = 0;
23031 rtx tmp;
23033 if (frame_pointer_needed
23034 || cfun->calls_alloca
23035 || info->total_size > 32767)
23037 tmp = gen_frame_mem (Pmode, frame_rtx);
23038 emit_move_insn (operands[1], tmp);
23039 frame_rtx = operands[1];
23041 else if (info->push_p)
23042 sp_offset = info->total_size;
23044 tmp = plus_constant (Pmode, frame_rtx,
23045 info->lr_save_offset + sp_offset);
23046 tmp = gen_frame_mem (Pmode, tmp);
23047 emit_move_insn (tmp, operands[0]);
23049 else
23050 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
23052 /* Freeze lr_save_p. We've just emitted rtl that depends on the
23053 state of lr_save_p so any change from here on would be a bug. In
23054 particular, stop rs6000_ra_ever_killed from considering the SET
23055 of lr we may have added just above. */
23056 cfun->machine->lr_save_state = info->lr_save_p + 1;
23059 static GTY(()) alias_set_type set = -1;
23061 alias_set_type
23062 get_TOC_alias_set (void)
23064 if (set == -1)
23065 set = new_alias_set ();
23066 return set;
23069 /* This returns nonzero if the current function uses the TOC. This is
23070 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
23071 is generated by the ABI_V4 load_toc_* patterns. */
23072 #if TARGET_ELF
23073 static int
23074 uses_TOC (void)
23076 rtx_insn *insn;
23078 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23079 if (INSN_P (insn))
23081 rtx pat = PATTERN (insn);
23082 int i;
23084 if (GET_CODE (pat) == PARALLEL)
23085 for (i = 0; i < XVECLEN (pat, 0); i++)
23087 rtx sub = XVECEXP (pat, 0, i);
23088 if (GET_CODE (sub) == USE)
23090 sub = XEXP (sub, 0);
23091 if (GET_CODE (sub) == UNSPEC
23092 && XINT (sub, 1) == UNSPEC_TOC)
23093 return 1;
23097 return 0;
23099 #endif
23102 create_TOC_reference (rtx symbol, rtx largetoc_reg)
23104 rtx tocrel, tocreg, hi;
23106 if (TARGET_DEBUG_ADDR)
23108 if (GET_CODE (symbol) == SYMBOL_REF)
23109 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
23110 XSTR (symbol, 0));
23111 else
23113 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
23114 GET_RTX_NAME (GET_CODE (symbol)));
23115 debug_rtx (symbol);
23119 if (!can_create_pseudo_p ())
23120 df_set_regs_ever_live (TOC_REGISTER, true);
23122 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
23123 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
23124 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
23125 return tocrel;
23127 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
23128 if (largetoc_reg != NULL)
23130 emit_move_insn (largetoc_reg, hi);
23131 hi = largetoc_reg;
23133 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
23136 /* Issue assembly directives that create a reference to the given DWARF
23137 FRAME_TABLE_LABEL from the current function section. */
23138 void
23139 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
23141 fprintf (asm_out_file, "\t.ref %s\n",
23142 (* targetm.strip_name_encoding) (frame_table_label));
23145 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
23146 and the change to the stack pointer. */
23148 static void
23149 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
23151 rtvec p;
23152 int i;
23153 rtx regs[3];
23155 i = 0;
23156 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23157 if (hard_frame_needed)
23158 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
23159 if (!(REGNO (fp) == STACK_POINTER_REGNUM
23160 || (hard_frame_needed
23161 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
23162 regs[i++] = fp;
23164 p = rtvec_alloc (i);
23165 while (--i >= 0)
23167 rtx mem = gen_frame_mem (BLKmode, regs[i]);
23168 RTVEC_ELT (p, i) = gen_rtx_SET (mem, const0_rtx);
23171 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
23174 /* Emit the correct code for allocating stack space, as insns.
23175 If COPY_REG, make sure a copy of the old frame is left there.
23176 The generated code may use hard register 0 as a temporary. */
23178 static rtx_insn *
23179 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
23181 rtx_insn *insn;
23182 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23183 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
23184 rtx todec = gen_int_mode (-size, Pmode);
23185 rtx par, set, mem;
23187 if (INTVAL (todec) != -size)
23189 warning (0, "stack frame too large");
23190 emit_insn (gen_trap ());
23191 return 0;
23194 if (crtl->limit_stack)
23196 if (REG_P (stack_limit_rtx)
23197 && REGNO (stack_limit_rtx) > 1
23198 && REGNO (stack_limit_rtx) <= 31)
23200 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
23201 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
23202 const0_rtx));
23204 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
23205 && TARGET_32BIT
23206 && DEFAULT_ABI == ABI_V4)
23208 rtx toload = gen_rtx_CONST (VOIDmode,
23209 gen_rtx_PLUS (Pmode,
23210 stack_limit_rtx,
23211 GEN_INT (size)));
23213 emit_insn (gen_elf_high (tmp_reg, toload));
23214 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
23215 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
23216 const0_rtx));
23218 else
23219 warning (0, "stack limit expression is not supported");
23222 if (copy_reg)
23224 if (copy_off != 0)
23225 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
23226 else
23227 emit_move_insn (copy_reg, stack_reg);
23230 if (size > 32767)
23232 /* Need a note here so that try_split doesn't get confused. */
23233 if (get_last_insn () == NULL_RTX)
23234 emit_note (NOTE_INSN_DELETED);
23235 insn = emit_move_insn (tmp_reg, todec);
23236 try_split (PATTERN (insn), insn, 0);
23237 todec = tmp_reg;
23240 insn = emit_insn (TARGET_32BIT
23241 ? gen_movsi_update_stack (stack_reg, stack_reg,
23242 todec, stack_reg)
23243 : gen_movdi_di_update_stack (stack_reg, stack_reg,
23244 todec, stack_reg));
23245 /* Since we didn't use gen_frame_mem to generate the MEM, grab
23246 it now and set the alias set/attributes. The above gen_*_update
23247 calls will generate a PARALLEL with the MEM set being the first
23248 operation. */
23249 par = PATTERN (insn);
23250 gcc_assert (GET_CODE (par) == PARALLEL);
23251 set = XVECEXP (par, 0, 0);
23252 gcc_assert (GET_CODE (set) == SET);
23253 mem = SET_DEST (set);
23254 gcc_assert (MEM_P (mem));
23255 MEM_NOTRAP_P (mem) = 1;
23256 set_mem_alias_set (mem, get_frame_alias_set ());
23258 RTX_FRAME_RELATED_P (insn) = 1;
23259 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23260 gen_rtx_SET (stack_reg, gen_rtx_PLUS (Pmode, stack_reg,
23261 GEN_INT (-size))));
23262 return insn;
23265 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
23267 #if PROBE_INTERVAL > 32768
23268 #error Cannot use indexed addressing mode for stack probing
23269 #endif
23271 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
23272 inclusive. These are offsets from the current stack pointer. */
23274 static void
23275 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
23277 /* See if we have a constant small number of probes to generate. If so,
23278 that's the easy case. */
23279 if (first + size <= 32768)
23281 HOST_WIDE_INT i;
23283 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
23284 it exceeds SIZE. If only one probe is needed, this will not
23285 generate any code. Then probe at FIRST + SIZE. */
23286 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
23287 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
23288 -(first + i)));
23290 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
23291 -(first + size)));
23294 /* Otherwise, do the same as above, but in a loop. Note that we must be
23295 extra careful with variables wrapping around because we might be at
23296 the very top (or the very bottom) of the address space and we have
23297 to be able to handle this case properly; in particular, we use an
23298 equality test for the loop condition. */
23299 else
23301 HOST_WIDE_INT rounded_size;
23302 rtx r12 = gen_rtx_REG (Pmode, 12);
23303 rtx r0 = gen_rtx_REG (Pmode, 0);
23305 /* Sanity check for the addressing mode we're going to use. */
23306 gcc_assert (first <= 32768);
23308 /* Step 1: round SIZE to the previous multiple of the interval. */
23310 rounded_size = size & -PROBE_INTERVAL;
23313 /* Step 2: compute initial and final value of the loop counter. */
23315 /* TEST_ADDR = SP + FIRST. */
23316 emit_insn (gen_rtx_SET (r12, plus_constant (Pmode, stack_pointer_rtx,
23317 -first)));
23319 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
23320 if (rounded_size > 32768)
23322 emit_move_insn (r0, GEN_INT (-rounded_size));
23323 emit_insn (gen_rtx_SET (r0, gen_rtx_PLUS (Pmode, r12, r0)));
23325 else
23326 emit_insn (gen_rtx_SET (r0, plus_constant (Pmode, r12,
23327 -rounded_size)));
23330 /* Step 3: the loop
23332 while (TEST_ADDR != LAST_ADDR)
23334 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
23335 probe at TEST_ADDR
23338 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
23339 until it is equal to ROUNDED_SIZE. */
23341 if (TARGET_64BIT)
23342 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
23343 else
23344 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
23347 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
23348 that SIZE is equal to ROUNDED_SIZE. */
23350 if (size != rounded_size)
23351 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
23355 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
23356 absolute addresses. */
23358 const char *
23359 output_probe_stack_range (rtx reg1, rtx reg2)
23361 static int labelno = 0;
23362 char loop_lab[32], end_lab[32];
23363 rtx xops[2];
23365 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
23366 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
23368 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
23370 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
23371 xops[0] = reg1;
23372 xops[1] = reg2;
23373 if (TARGET_64BIT)
23374 output_asm_insn ("cmpd 0,%0,%1", xops);
23375 else
23376 output_asm_insn ("cmpw 0,%0,%1", xops);
23378 fputs ("\tbeq 0,", asm_out_file);
23379 assemble_name_raw (asm_out_file, end_lab);
23380 fputc ('\n', asm_out_file);
23382 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
23383 xops[1] = GEN_INT (-PROBE_INTERVAL);
23384 output_asm_insn ("addi %0,%0,%1", xops);
23386 /* Probe at TEST_ADDR and branch. */
23387 xops[1] = gen_rtx_REG (Pmode, 0);
23388 output_asm_insn ("stw %1,0(%0)", xops);
23389 fprintf (asm_out_file, "\tb ");
23390 assemble_name_raw (asm_out_file, loop_lab);
23391 fputc ('\n', asm_out_file);
23393 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
23395 return "";
23398 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
23399 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
23400 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
23401 deduce these equivalences by itself so it wasn't necessary to hold
23402 its hand so much. Don't be tempted to always supply d2_f_d_e with
23403 the actual cfa register, ie. r31 when we are using a hard frame
23404 pointer. That fails when saving regs off r1, and sched moves the
23405 r31 setup past the reg saves. */
23407 static rtx
23408 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
23409 rtx reg2, rtx rreg)
23411 rtx real, temp;
23413 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
23415 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
23416 int i;
23418 gcc_checking_assert (val == 0);
23419 real = PATTERN (insn);
23420 if (GET_CODE (real) == PARALLEL)
23421 for (i = 0; i < XVECLEN (real, 0); i++)
23422 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
23424 rtx set = XVECEXP (real, 0, i);
23426 RTX_FRAME_RELATED_P (set) = 1;
23428 RTX_FRAME_RELATED_P (insn) = 1;
23429 return insn;
23432 /* copy_rtx will not make unique copies of registers, so we need to
23433 ensure we don't have unwanted sharing here. */
23434 if (reg == reg2)
23435 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
23437 if (reg == rreg)
23438 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
23440 real = copy_rtx (PATTERN (insn));
23442 if (reg2 != NULL_RTX)
23443 real = replace_rtx (real, reg2, rreg);
23445 if (REGNO (reg) == STACK_POINTER_REGNUM)
23446 gcc_checking_assert (val == 0);
23447 else
23448 real = replace_rtx (real, reg,
23449 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
23450 STACK_POINTER_REGNUM),
23451 GEN_INT (val)));
23453 /* We expect that 'real' is either a SET or a PARALLEL containing
23454 SETs (and possibly other stuff). In a PARALLEL, all the SETs
23455 are important so they all have to be marked RTX_FRAME_RELATED_P. */
23457 if (GET_CODE (real) == SET)
23459 rtx set = real;
23461 temp = simplify_rtx (SET_SRC (set));
23462 if (temp)
23463 SET_SRC (set) = temp;
23464 temp = simplify_rtx (SET_DEST (set));
23465 if (temp)
23466 SET_DEST (set) = temp;
23467 if (GET_CODE (SET_DEST (set)) == MEM)
23469 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
23470 if (temp)
23471 XEXP (SET_DEST (set), 0) = temp;
23474 else
23476 int i;
23478 gcc_assert (GET_CODE (real) == PARALLEL);
23479 for (i = 0; i < XVECLEN (real, 0); i++)
23480 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
23482 rtx set = XVECEXP (real, 0, i);
23484 temp = simplify_rtx (SET_SRC (set));
23485 if (temp)
23486 SET_SRC (set) = temp;
23487 temp = simplify_rtx (SET_DEST (set));
23488 if (temp)
23489 SET_DEST (set) = temp;
23490 if (GET_CODE (SET_DEST (set)) == MEM)
23492 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
23493 if (temp)
23494 XEXP (SET_DEST (set), 0) = temp;
23496 RTX_FRAME_RELATED_P (set) = 1;
23500 RTX_FRAME_RELATED_P (insn) = 1;
23501 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
23503 return insn;
23506 /* Returns an insn that has a vrsave set operation with the
23507 appropriate CLOBBERs. */
23509 static rtx
23510 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
23512 int nclobs, i;
23513 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
23514 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
23516 clobs[0]
23517 = gen_rtx_SET (vrsave,
23518 gen_rtx_UNSPEC_VOLATILE (SImode,
23519 gen_rtvec (2, reg, vrsave),
23520 UNSPECV_SET_VRSAVE));
23522 nclobs = 1;
23524 /* We need to clobber the registers in the mask so the scheduler
23525 does not move sets to VRSAVE before sets of AltiVec registers.
23527 However, if the function receives nonlocal gotos, reload will set
23528 all call saved registers live. We will end up with:
23530 (set (reg 999) (mem))
23531 (parallel [ (set (reg vrsave) (unspec blah))
23532 (clobber (reg 999))])
23534 The clobber will cause the store into reg 999 to be dead, and
23535 flow will attempt to delete an epilogue insn. In this case, we
23536 need an unspec use/set of the register. */
23538 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
23539 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
23541 if (!epiloguep || call_used_regs [i])
23542 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
23543 gen_rtx_REG (V4SImode, i));
23544 else
23546 rtx reg = gen_rtx_REG (V4SImode, i);
23548 clobs[nclobs++]
23549 = gen_rtx_SET (reg,
23550 gen_rtx_UNSPEC (V4SImode,
23551 gen_rtvec (1, reg), 27));
23555 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
23557 for (i = 0; i < nclobs; ++i)
23558 XVECEXP (insn, 0, i) = clobs[i];
23560 return insn;
23563 static rtx
23564 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
23566 rtx addr, mem;
23568 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
23569 mem = gen_frame_mem (GET_MODE (reg), addr);
23570 return gen_rtx_SET (store ? mem : reg, store ? reg : mem);
23573 static rtx
23574 gen_frame_load (rtx reg, rtx frame_reg, int offset)
23576 return gen_frame_set (reg, frame_reg, offset, false);
23579 static rtx
23580 gen_frame_store (rtx reg, rtx frame_reg, int offset)
23582 return gen_frame_set (reg, frame_reg, offset, true);
23585 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
23586 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
23588 static rtx
23589 emit_frame_save (rtx frame_reg, machine_mode mode,
23590 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
23592 rtx reg, insn;
23594 /* Some cases that need register indexed addressing. */
23595 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
23596 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
23597 || (TARGET_E500_DOUBLE && mode == DFmode)
23598 || (TARGET_SPE_ABI
23599 && SPE_VECTOR_MODE (mode)
23600 && !SPE_CONST_OFFSET_OK (offset))));
23602 reg = gen_rtx_REG (mode, regno);
23603 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
23604 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
23605 NULL_RTX, NULL_RTX);
23608 /* Emit an offset memory reference suitable for a frame store, while
23609 converting to a valid addressing mode. */
23611 static rtx
23612 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
23614 rtx int_rtx, offset_rtx;
23616 int_rtx = GEN_INT (offset);
23618 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
23619 || (TARGET_E500_DOUBLE && mode == DFmode))
23621 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
23622 emit_move_insn (offset_rtx, int_rtx);
23624 else
23625 offset_rtx = int_rtx;
23627 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
23630 #ifndef TARGET_FIX_AND_CONTINUE
23631 #define TARGET_FIX_AND_CONTINUE 0
23632 #endif
23634 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
23635 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
23636 #define LAST_SAVRES_REGISTER 31
23637 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
23639 enum {
23640 SAVRES_LR = 0x1,
23641 SAVRES_SAVE = 0x2,
23642 SAVRES_REG = 0x0c,
23643 SAVRES_GPR = 0,
23644 SAVRES_FPR = 4,
23645 SAVRES_VR = 8
23648 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
23650 /* Temporary holding space for an out-of-line register save/restore
23651 routine name. */
23652 static char savres_routine_name[30];
23654 /* Return the name for an out-of-line register save/restore routine.
23655 We are saving/restoring GPRs if GPR is true. */
23657 static char *
23658 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
23660 const char *prefix = "";
23661 const char *suffix = "";
23663 /* Different targets are supposed to define
23664 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
23665 routine name could be defined with:
23667 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
23669 This is a nice idea in practice, but in reality, things are
23670 complicated in several ways:
23672 - ELF targets have save/restore routines for GPRs.
23674 - SPE targets use different prefixes for 32/64-bit registers, and
23675 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
23677 - PPC64 ELF targets have routines for save/restore of GPRs that
23678 differ in what they do with the link register, so having a set
23679 prefix doesn't work. (We only use one of the save routines at
23680 the moment, though.)
23682 - PPC32 elf targets have "exit" versions of the restore routines
23683 that restore the link register and can save some extra space.
23684 These require an extra suffix. (There are also "tail" versions
23685 of the restore routines and "GOT" versions of the save routines,
23686 but we don't generate those at present. Same problems apply,
23687 though.)
23689 We deal with all this by synthesizing our own prefix/suffix and
23690 using that for the simple sprintf call shown above. */
23691 if (TARGET_SPE)
23693 /* No floating point saves on the SPE. */
23694 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
23696 if ((sel & SAVRES_SAVE))
23697 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
23698 else
23699 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
23701 if ((sel & SAVRES_LR))
23702 suffix = "_x";
23704 else if (DEFAULT_ABI == ABI_V4)
23706 if (TARGET_64BIT)
23707 goto aix_names;
23709 if ((sel & SAVRES_REG) == SAVRES_GPR)
23710 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
23711 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23712 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
23713 else if ((sel & SAVRES_REG) == SAVRES_VR)
23714 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23715 else
23716 abort ();
23718 if ((sel & SAVRES_LR))
23719 suffix = "_x";
23721 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23723 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
23724 /* No out-of-line save/restore routines for GPRs on AIX. */
23725 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
23726 #endif
23728 aix_names:
23729 if ((sel & SAVRES_REG) == SAVRES_GPR)
23730 prefix = ((sel & SAVRES_SAVE)
23731 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
23732 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
23733 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23735 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
23736 if ((sel & SAVRES_LR))
23737 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
23738 else
23739 #endif
23741 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
23742 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
23745 else if ((sel & SAVRES_REG) == SAVRES_VR)
23746 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
23747 else
23748 abort ();
23751 if (DEFAULT_ABI == ABI_DARWIN)
23753 /* The Darwin approach is (slightly) different, in order to be
23754 compatible with code generated by the system toolchain. There is a
23755 single symbol for the start of save sequence, and the code here
23756 embeds an offset into that code on the basis of the first register
23757 to be saved. */
23758 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
23759 if ((sel & SAVRES_REG) == SAVRES_GPR)
23760 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
23761 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
23762 (regno - 13) * 4, prefix, regno);
23763 else if ((sel & SAVRES_REG) == SAVRES_FPR)
23764 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
23765 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
23766 else if ((sel & SAVRES_REG) == SAVRES_VR)
23767 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
23768 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
23769 else
23770 abort ();
23772 else
23773 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
23775 return savres_routine_name;
23778 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
23779 We are saving/restoring GPRs if GPR is true. */
23781 static rtx
23782 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23784 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23785 ? info->first_gp_reg_save
23786 : (sel & SAVRES_REG) == SAVRES_FPR
23787 ? info->first_fp_reg_save - 32
23788 : (sel & SAVRES_REG) == SAVRES_VR
23789 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23790 : -1);
23791 rtx sym;
23792 int select = sel;
23794 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23795 versions of the gpr routines. */
23796 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23797 && info->spe_64bit_regs_used)
23798 select ^= SAVRES_FPR ^ SAVRES_GPR;
23800 /* Don't generate bogus routine names. */
23801 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23802 && regno <= LAST_SAVRES_REGISTER
23803 && select >= 0 && select <= 12);
23805 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23807 if (sym == NULL)
23809 char *name;
23811 name = rs6000_savres_routine_name (info, regno, sel);
23813 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23814 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23815 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23818 return sym;
23821 /* Emit a sequence of insns, including a stack tie if needed, for
23822 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23823 reset the stack pointer, but move the base of the frame into
23824 reg UPDT_REGNO for use by out-of-line register restore routines. */
23826 static rtx
23827 rs6000_emit_stack_reset (rs6000_stack_t *info,
23828 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23829 unsigned updt_regno)
23831 rtx updt_reg_rtx;
23833 /* This blockage is needed so that sched doesn't decide to move
23834 the sp change before the register restores. */
23835 if (DEFAULT_ABI == ABI_V4
23836 || (TARGET_SPE_ABI
23837 && info->spe_64bit_regs_used != 0
23838 && info->first_gp_reg_save != 32))
23839 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23841 /* If we are restoring registers out-of-line, we will be using the
23842 "exit" variants of the restore routines, which will reset the
23843 stack for us. But we do need to point updt_reg into the
23844 right place for those routines. */
23845 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23847 if (frame_off != 0)
23848 return emit_insn (gen_add3_insn (updt_reg_rtx,
23849 frame_reg_rtx, GEN_INT (frame_off)));
23850 else if (REGNO (frame_reg_rtx) != updt_regno)
23851 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23853 return NULL_RTX;
23856 /* Return the register number used as a pointer by out-of-line
23857 save/restore functions. */
23859 static inline unsigned
23860 ptr_regno_for_savres (int sel)
23862 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23863 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23864 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23867 /* Construct a parallel rtx describing the effect of a call to an
23868 out-of-line register save/restore routine, and emit the insn
23869 or jump_insn as appropriate. */
23871 static rtx
23872 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23873 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23874 machine_mode reg_mode, int sel)
23876 int i;
23877 int offset, start_reg, end_reg, n_regs, use_reg;
23878 int reg_size = GET_MODE_SIZE (reg_mode);
23879 rtx sym;
23880 rtvec p;
23881 rtx par, insn;
23883 offset = 0;
23884 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23885 ? info->first_gp_reg_save
23886 : (sel & SAVRES_REG) == SAVRES_FPR
23887 ? info->first_fp_reg_save
23888 : (sel & SAVRES_REG) == SAVRES_VR
23889 ? info->first_altivec_reg_save
23890 : -1);
23891 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23892 ? 32
23893 : (sel & SAVRES_REG) == SAVRES_FPR
23894 ? 64
23895 : (sel & SAVRES_REG) == SAVRES_VR
23896 ? LAST_ALTIVEC_REGNO + 1
23897 : -1);
23898 n_regs = end_reg - start_reg;
23899 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23900 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23901 + n_regs);
23903 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23904 RTVEC_ELT (p, offset++) = ret_rtx;
23906 RTVEC_ELT (p, offset++)
23907 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23909 sym = rs6000_savres_routine_sym (info, sel);
23910 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23912 use_reg = ptr_regno_for_savres (sel);
23913 if ((sel & SAVRES_REG) == SAVRES_VR)
23915 /* Vector regs are saved/restored using [reg+reg] addressing. */
23916 RTVEC_ELT (p, offset++)
23917 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23918 RTVEC_ELT (p, offset++)
23919 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23921 else
23922 RTVEC_ELT (p, offset++)
23923 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23925 for (i = 0; i < end_reg - start_reg; i++)
23926 RTVEC_ELT (p, i + offset)
23927 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23928 frame_reg_rtx, save_area_offset + reg_size * i,
23929 (sel & SAVRES_SAVE) != 0);
23931 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23932 RTVEC_ELT (p, i + offset)
23933 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23935 par = gen_rtx_PARALLEL (VOIDmode, p);
23937 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23939 insn = emit_jump_insn (par);
23940 JUMP_LABEL (insn) = ret_rtx;
23942 else
23943 insn = emit_insn (par);
23944 return insn;
23947 /* Emit code to store CR fields that need to be saved into REG. */
23949 static void
23950 rs6000_emit_move_from_cr (rtx reg)
23952 /* Only the ELFv2 ABI allows storing only selected fields. */
23953 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23955 int i, cr_reg[8], count = 0;
23957 /* Collect CR fields that must be saved. */
23958 for (i = 0; i < 8; i++)
23959 if (save_reg_p (CR0_REGNO + i))
23960 cr_reg[count++] = i;
23962 /* If it's just a single one, use mfcrf. */
23963 if (count == 1)
23965 rtvec p = rtvec_alloc (1);
23966 rtvec r = rtvec_alloc (2);
23967 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23968 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23969 RTVEC_ELT (p, 0)
23970 = gen_rtx_SET (reg,
23971 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23973 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23974 return;
23977 /* ??? It might be better to handle count == 2 / 3 cases here
23978 as well, using logical operations to combine the values. */
23981 emit_insn (gen_movesi_from_cr (reg));
23984 /* Determine whether the gp REG is really used. */
23986 static bool
23987 rs6000_reg_live_or_pic_offset_p (int reg)
23989 /* If the function calls eh_return, claim used all the registers that would
23990 be checked for liveness otherwise. This is required for the PIC offset
23991 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23992 register allocation purposes in this case. */
23994 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23995 && (!call_used_regs[reg]
23996 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23997 && !TARGET_SINGLE_PIC_BASE
23998 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23999 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
24000 && !TARGET_SINGLE_PIC_BASE
24001 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
24002 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
24005 /* Return whether the split-stack arg pointer (r12) is used. */
24007 static bool
24008 split_stack_arg_pointer_used_p (void)
24010 /* If the pseudo holding the arg pointer is no longer a pseudo,
24011 then the arg pointer is used. */
24012 if (cfun->machine->split_stack_arg_pointer != NULL_RTX
24013 && (!REG_P (cfun->machine->split_stack_arg_pointer)
24014 || (REGNO (cfun->machine->split_stack_arg_pointer)
24015 < FIRST_PSEUDO_REGISTER)))
24016 return true;
24018 /* Unfortunately we also need to do some code scanning, since
24019 r12 may have been substituted for the pseudo. */
24020 rtx_insn *insn;
24021 basic_block bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
24022 FOR_BB_INSNS (bb, insn)
24023 if (NONDEBUG_INSN_P (insn))
24025 /* A call destroys r12. */
24026 if (CALL_P (insn))
24027 return false;
24029 df_ref use;
24030 FOR_EACH_INSN_USE (use, insn)
24032 rtx x = DF_REF_REG (use);
24033 if (REG_P (x) && REGNO (x) == 12)
24034 return true;
24036 df_ref def;
24037 FOR_EACH_INSN_DEF (def, insn)
24039 rtx x = DF_REF_REG (def);
24040 if (REG_P (x) && REGNO (x) == 12)
24041 return false;
24044 return bitmap_bit_p (DF_LR_OUT (bb), 12);
24047 /* Emit function prologue as insns. */
24049 void
24050 rs6000_emit_prologue (void)
24052 rs6000_stack_t *info = rs6000_stack_info ();
24053 machine_mode reg_mode = Pmode;
24054 int reg_size = TARGET_32BIT ? 4 : 8;
24055 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
24056 rtx frame_reg_rtx = sp_reg_rtx;
24057 unsigned int cr_save_regno;
24058 rtx cr_save_rtx = NULL_RTX;
24059 rtx insn;
24060 int strategy;
24061 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
24062 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
24063 && call_used_regs[STATIC_CHAIN_REGNUM]);
24064 int using_split_stack = (flag_split_stack
24065 && (lookup_attribute ("no_split_stack",
24066 DECL_ATTRIBUTES (cfun->decl))
24067 == NULL));
24069 /* Offset to top of frame for frame_reg and sp respectively. */
24070 HOST_WIDE_INT frame_off = 0;
24071 HOST_WIDE_INT sp_off = 0;
24072 /* sp_adjust is the stack adjusting instruction, tracked so that the
24073 insn setting up the split-stack arg pointer can be emitted just
24074 prior to it, when r12 is not used here for other purposes. */
24075 rtx_insn *sp_adjust = 0;
24077 #ifdef ENABLE_CHECKING
24078 /* Track and check usage of r0, r11, r12. */
24079 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
24080 #define START_USE(R) do \
24082 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
24083 reg_inuse |= 1 << (R); \
24084 } while (0)
24085 #define END_USE(R) do \
24087 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
24088 reg_inuse &= ~(1 << (R)); \
24089 } while (0)
24090 #define NOT_INUSE(R) do \
24092 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
24093 } while (0)
24094 #else
24095 #define START_USE(R) do {} while (0)
24096 #define END_USE(R) do {} while (0)
24097 #define NOT_INUSE(R) do {} while (0)
24098 #endif
24100 if (DEFAULT_ABI == ABI_ELFv2)
24102 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
24104 /* With -mminimal-toc we may generate an extra use of r2 below. */
24105 if (!TARGET_SINGLE_PIC_BASE
24106 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24107 cfun->machine->r2_setup_needed = true;
24111 if (flag_stack_usage_info)
24112 current_function_static_stack_size = info->total_size;
24114 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
24116 HOST_WIDE_INT size = info->total_size;
24118 if (crtl->is_leaf && !cfun->calls_alloca)
24120 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
24121 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
24122 size - STACK_CHECK_PROTECT);
24124 else if (size > 0)
24125 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
24128 if (TARGET_FIX_AND_CONTINUE)
24130 /* gdb on darwin arranges to forward a function from the old
24131 address by modifying the first 5 instructions of the function
24132 to branch to the overriding function. This is necessary to
24133 permit function pointers that point to the old function to
24134 actually forward to the new function. */
24135 emit_insn (gen_nop ());
24136 emit_insn (gen_nop ());
24137 emit_insn (gen_nop ());
24138 emit_insn (gen_nop ());
24139 emit_insn (gen_nop ());
24142 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24144 reg_mode = V2SImode;
24145 reg_size = 8;
24148 /* Handle world saves specially here. */
24149 if (WORLD_SAVE_P (info))
24151 int i, j, sz;
24152 rtx treg;
24153 rtvec p;
24154 rtx reg0;
24156 /* save_world expects lr in r0. */
24157 reg0 = gen_rtx_REG (Pmode, 0);
24158 if (info->lr_save_p)
24160 insn = emit_move_insn (reg0,
24161 gen_rtx_REG (Pmode, LR_REGNO));
24162 RTX_FRAME_RELATED_P (insn) = 1;
24165 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
24166 assumptions about the offsets of various bits of the stack
24167 frame. */
24168 gcc_assert (info->gp_save_offset == -220
24169 && info->fp_save_offset == -144
24170 && info->lr_save_offset == 8
24171 && info->cr_save_offset == 4
24172 && info->push_p
24173 && info->lr_save_p
24174 && (!crtl->calls_eh_return
24175 || info->ehrd_offset == -432)
24176 && info->vrsave_save_offset == -224
24177 && info->altivec_save_offset == -416);
24179 treg = gen_rtx_REG (SImode, 11);
24180 emit_move_insn (treg, GEN_INT (-info->total_size));
24182 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
24183 in R11. It also clobbers R12, so beware! */
24185 /* Preserve CR2 for save_world prologues */
24186 sz = 5;
24187 sz += 32 - info->first_gp_reg_save;
24188 sz += 64 - info->first_fp_reg_save;
24189 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
24190 p = rtvec_alloc (sz);
24191 j = 0;
24192 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
24193 gen_rtx_REG (SImode,
24194 LR_REGNO));
24195 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24196 gen_rtx_SYMBOL_REF (Pmode,
24197 "*save_world"));
24198 /* We do floats first so that the instruction pattern matches
24199 properly. */
24200 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
24201 RTVEC_ELT (p, j++)
24202 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24203 ? DFmode : SFmode,
24204 info->first_fp_reg_save + i),
24205 frame_reg_rtx,
24206 info->fp_save_offset + frame_off + 8 * i);
24207 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24208 RTVEC_ELT (p, j++)
24209 = gen_frame_store (gen_rtx_REG (V4SImode,
24210 info->first_altivec_reg_save + i),
24211 frame_reg_rtx,
24212 info->altivec_save_offset + frame_off + 16 * i);
24213 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24214 RTVEC_ELT (p, j++)
24215 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
24216 frame_reg_rtx,
24217 info->gp_save_offset + frame_off + reg_size * i);
24219 /* CR register traditionally saved as CR2. */
24220 RTVEC_ELT (p, j++)
24221 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
24222 frame_reg_rtx, info->cr_save_offset + frame_off);
24223 /* Explain about use of R0. */
24224 if (info->lr_save_p)
24225 RTVEC_ELT (p, j++)
24226 = gen_frame_store (reg0,
24227 frame_reg_rtx, info->lr_save_offset + frame_off);
24228 /* Explain what happens to the stack pointer. */
24230 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
24231 RTVEC_ELT (p, j++) = gen_rtx_SET (sp_reg_rtx, newval);
24234 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24235 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24236 treg, GEN_INT (-info->total_size));
24237 sp_off = frame_off = info->total_size;
24240 strategy = info->savres_strategy;
24242 /* For V.4, update stack before we do any saving and set back pointer. */
24243 if (! WORLD_SAVE_P (info)
24244 && info->push_p
24245 && (DEFAULT_ABI == ABI_V4
24246 || crtl->calls_eh_return))
24248 bool need_r11 = (TARGET_SPE
24249 ? (!(strategy & SAVE_INLINE_GPRS)
24250 && info->spe_64bit_regs_used == 0)
24251 : (!(strategy & SAVE_INLINE_FPRS)
24252 || !(strategy & SAVE_INLINE_GPRS)
24253 || !(strategy & SAVE_INLINE_VRS)));
24254 int ptr_regno = -1;
24255 rtx ptr_reg = NULL_RTX;
24256 int ptr_off = 0;
24258 if (info->total_size < 32767)
24259 frame_off = info->total_size;
24260 else if (need_r11)
24261 ptr_regno = 11;
24262 else if (info->cr_save_p
24263 || info->lr_save_p
24264 || info->first_fp_reg_save < 64
24265 || info->first_gp_reg_save < 32
24266 || info->altivec_size != 0
24267 || info->vrsave_size != 0
24268 || crtl->calls_eh_return)
24269 ptr_regno = 12;
24270 else
24272 /* The prologue won't be saving any regs so there is no need
24273 to set up a frame register to access any frame save area.
24274 We also won't be using frame_off anywhere below, but set
24275 the correct value anyway to protect against future
24276 changes to this function. */
24277 frame_off = info->total_size;
24279 if (ptr_regno != -1)
24281 /* Set up the frame offset to that needed by the first
24282 out-of-line save function. */
24283 START_USE (ptr_regno);
24284 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24285 frame_reg_rtx = ptr_reg;
24286 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
24287 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
24288 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
24289 ptr_off = info->gp_save_offset + info->gp_size;
24290 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
24291 ptr_off = info->altivec_save_offset + info->altivec_size;
24292 frame_off = -ptr_off;
24294 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
24295 ptr_reg, ptr_off);
24296 if (REGNO (frame_reg_rtx) == 12)
24297 sp_adjust = 0;
24298 sp_off = info->total_size;
24299 if (frame_reg_rtx != sp_reg_rtx)
24300 rs6000_emit_stack_tie (frame_reg_rtx, false);
24303 /* If we use the link register, get it into r0. */
24304 if (!WORLD_SAVE_P (info) && info->lr_save_p)
24306 rtx addr, reg, mem;
24308 reg = gen_rtx_REG (Pmode, 0);
24309 START_USE (0);
24310 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
24311 RTX_FRAME_RELATED_P (insn) = 1;
24313 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
24314 | SAVE_NOINLINE_FPRS_SAVES_LR)))
24316 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
24317 GEN_INT (info->lr_save_offset + frame_off));
24318 mem = gen_rtx_MEM (Pmode, addr);
24319 /* This should not be of rs6000_sr_alias_set, because of
24320 __builtin_return_address. */
24322 insn = emit_move_insn (mem, reg);
24323 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24324 NULL_RTX, NULL_RTX);
24325 END_USE (0);
24329 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
24330 r12 will be needed by out-of-line gpr restore. */
24331 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24332 && !(strategy & (SAVE_INLINE_GPRS
24333 | SAVE_NOINLINE_GPRS_SAVES_LR))
24334 ? 11 : 12);
24335 if (!WORLD_SAVE_P (info)
24336 && info->cr_save_p
24337 && REGNO (frame_reg_rtx) != cr_save_regno
24338 && !(using_static_chain_p && cr_save_regno == 11)
24339 && !(using_split_stack && cr_save_regno == 12 && sp_adjust))
24341 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
24342 START_USE (cr_save_regno);
24343 rs6000_emit_move_from_cr (cr_save_rtx);
24346 /* Do any required saving of fpr's. If only one or two to save, do
24347 it ourselves. Otherwise, call function. */
24348 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
24350 int i;
24351 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
24352 if (save_reg_p (info->first_fp_reg_save + i))
24353 emit_frame_save (frame_reg_rtx,
24354 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24355 ? DFmode : SFmode),
24356 info->first_fp_reg_save + i,
24357 info->fp_save_offset + frame_off + 8 * i,
24358 sp_off - frame_off);
24360 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
24362 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24363 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24364 unsigned ptr_regno = ptr_regno_for_savres (sel);
24365 rtx ptr_reg = frame_reg_rtx;
24367 if (REGNO (frame_reg_rtx) == ptr_regno)
24368 gcc_checking_assert (frame_off == 0);
24369 else
24371 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24372 NOT_INUSE (ptr_regno);
24373 emit_insn (gen_add3_insn (ptr_reg,
24374 frame_reg_rtx, GEN_INT (frame_off)));
24376 insn = rs6000_emit_savres_rtx (info, ptr_reg,
24377 info->fp_save_offset,
24378 info->lr_save_offset,
24379 DFmode, sel);
24380 rs6000_frame_related (insn, ptr_reg, sp_off,
24381 NULL_RTX, NULL_RTX);
24382 if (lr)
24383 END_USE (0);
24386 /* Save GPRs. This is done as a PARALLEL if we are using
24387 the store-multiple instructions. */
24388 if (!WORLD_SAVE_P (info)
24389 && TARGET_SPE_ABI
24390 && info->spe_64bit_regs_used != 0
24391 && info->first_gp_reg_save != 32)
24393 int i;
24394 rtx spe_save_area_ptr;
24395 HOST_WIDE_INT save_off;
24396 int ool_adjust = 0;
24398 /* Determine whether we can address all of the registers that need
24399 to be saved with an offset from frame_reg_rtx that fits in
24400 the small const field for SPE memory instructions. */
24401 int spe_regs_addressable
24402 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
24403 + reg_size * (32 - info->first_gp_reg_save - 1))
24404 && (strategy & SAVE_INLINE_GPRS));
24406 if (spe_regs_addressable)
24408 spe_save_area_ptr = frame_reg_rtx;
24409 save_off = frame_off;
24411 else
24413 /* Make r11 point to the start of the SPE save area. We need
24414 to be careful here if r11 is holding the static chain. If
24415 it is, then temporarily save it in r0. */
24416 HOST_WIDE_INT offset;
24418 if (!(strategy & SAVE_INLINE_GPRS))
24419 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
24420 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
24421 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
24422 save_off = frame_off - offset;
24424 if (using_static_chain_p)
24426 rtx r0 = gen_rtx_REG (Pmode, 0);
24428 START_USE (0);
24429 gcc_assert (info->first_gp_reg_save > 11);
24431 emit_move_insn (r0, spe_save_area_ptr);
24433 else if (REGNO (frame_reg_rtx) != 11)
24434 START_USE (11);
24436 emit_insn (gen_addsi3 (spe_save_area_ptr,
24437 frame_reg_rtx, GEN_INT (offset)));
24438 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
24439 frame_off = -info->spe_gp_save_offset + ool_adjust;
24442 if ((strategy & SAVE_INLINE_GPRS))
24444 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24445 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
24446 emit_frame_save (spe_save_area_ptr, reg_mode,
24447 info->first_gp_reg_save + i,
24448 (info->spe_gp_save_offset + save_off
24449 + reg_size * i),
24450 sp_off - save_off);
24452 else
24454 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
24455 info->spe_gp_save_offset + save_off,
24456 0, reg_mode,
24457 SAVRES_SAVE | SAVRES_GPR);
24459 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
24460 NULL_RTX, NULL_RTX);
24463 /* Move the static chain pointer back. */
24464 if (!spe_regs_addressable)
24466 if (using_static_chain_p)
24468 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
24469 END_USE (0);
24471 else if (REGNO (frame_reg_rtx) != 11)
24472 END_USE (11);
24475 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
24477 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
24478 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
24479 unsigned ptr_regno = ptr_regno_for_savres (sel);
24480 rtx ptr_reg = frame_reg_rtx;
24481 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
24482 int end_save = info->gp_save_offset + info->gp_size;
24483 int ptr_off;
24485 if (ptr_regno == 12)
24486 sp_adjust = 0;
24487 if (!ptr_set_up)
24488 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24490 /* Need to adjust r11 (r12) if we saved any FPRs. */
24491 if (end_save + frame_off != 0)
24493 rtx offset = GEN_INT (end_save + frame_off);
24495 if (ptr_set_up)
24496 frame_off = -end_save;
24497 else
24498 NOT_INUSE (ptr_regno);
24499 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24501 else if (!ptr_set_up)
24503 NOT_INUSE (ptr_regno);
24504 emit_move_insn (ptr_reg, frame_reg_rtx);
24506 ptr_off = -end_save;
24507 insn = rs6000_emit_savres_rtx (info, ptr_reg,
24508 info->gp_save_offset + ptr_off,
24509 info->lr_save_offset + ptr_off,
24510 reg_mode, sel);
24511 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
24512 NULL_RTX, NULL_RTX);
24513 if (lr)
24514 END_USE (0);
24516 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
24518 rtvec p;
24519 int i;
24520 p = rtvec_alloc (32 - info->first_gp_reg_save);
24521 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24522 RTVEC_ELT (p, i)
24523 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
24524 frame_reg_rtx,
24525 info->gp_save_offset + frame_off + reg_size * i);
24526 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24527 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24528 NULL_RTX, NULL_RTX);
24530 else if (!WORLD_SAVE_P (info))
24532 int i;
24533 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24534 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
24535 emit_frame_save (frame_reg_rtx, reg_mode,
24536 info->first_gp_reg_save + i,
24537 info->gp_save_offset + frame_off + reg_size * i,
24538 sp_off - frame_off);
24541 if (crtl->calls_eh_return)
24543 unsigned int i;
24544 rtvec p;
24546 for (i = 0; ; ++i)
24548 unsigned int regno = EH_RETURN_DATA_REGNO (i);
24549 if (regno == INVALID_REGNUM)
24550 break;
24553 p = rtvec_alloc (i);
24555 for (i = 0; ; ++i)
24557 unsigned int regno = EH_RETURN_DATA_REGNO (i);
24558 if (regno == INVALID_REGNUM)
24559 break;
24561 insn
24562 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
24563 sp_reg_rtx,
24564 info->ehrd_offset + sp_off + reg_size * (int) i);
24565 RTVEC_ELT (p, i) = insn;
24566 RTX_FRAME_RELATED_P (insn) = 1;
24569 insn = emit_insn (gen_blockage ());
24570 RTX_FRAME_RELATED_P (insn) = 1;
24571 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
24574 /* In AIX ABI we need to make sure r2 is really saved. */
24575 if (TARGET_AIX && crtl->calls_eh_return)
24577 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
24578 rtx save_insn, join_insn, note;
24579 long toc_restore_insn;
24581 tmp_reg = gen_rtx_REG (Pmode, 11);
24582 tmp_reg_si = gen_rtx_REG (SImode, 11);
24583 if (using_static_chain_p)
24585 START_USE (0);
24586 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
24588 else
24589 START_USE (11);
24590 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
24591 /* Peek at instruction to which this function returns. If it's
24592 restoring r2, then we know we've already saved r2. We can't
24593 unconditionally save r2 because the value we have will already
24594 be updated if we arrived at this function via a plt call or
24595 toc adjusting stub. */
24596 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
24597 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
24598 + RS6000_TOC_SAVE_SLOT);
24599 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
24600 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
24601 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
24602 validate_condition_mode (EQ, CCUNSmode);
24603 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
24604 emit_insn (gen_rtx_SET (compare_result,
24605 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
24606 toc_save_done = gen_label_rtx ();
24607 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
24608 gen_rtx_EQ (VOIDmode, compare_result,
24609 const0_rtx),
24610 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
24611 pc_rtx);
24612 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
24613 JUMP_LABEL (jump) = toc_save_done;
24614 LABEL_NUSES (toc_save_done) += 1;
24616 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
24617 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
24618 sp_off - frame_off);
24620 emit_label (toc_save_done);
24622 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
24623 have a CFG that has different saves along different paths.
24624 Move the note to a dummy blockage insn, which describes that
24625 R2 is unconditionally saved after the label. */
24626 /* ??? An alternate representation might be a special insn pattern
24627 containing both the branch and the store. That might let the
24628 code that minimizes the number of DW_CFA_advance opcodes better
24629 freedom in placing the annotations. */
24630 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
24631 if (note)
24632 remove_note (save_insn, note);
24633 else
24634 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
24635 copy_rtx (PATTERN (save_insn)), NULL_RTX);
24636 RTX_FRAME_RELATED_P (save_insn) = 0;
24638 join_insn = emit_insn (gen_blockage ());
24639 REG_NOTES (join_insn) = note;
24640 RTX_FRAME_RELATED_P (join_insn) = 1;
24642 if (using_static_chain_p)
24644 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
24645 END_USE (0);
24647 else
24648 END_USE (11);
24651 /* Save CR if we use any that must be preserved. */
24652 if (!WORLD_SAVE_P (info) && info->cr_save_p)
24654 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
24655 GEN_INT (info->cr_save_offset + frame_off));
24656 rtx mem = gen_frame_mem (SImode, addr);
24658 /* If we didn't copy cr before, do so now using r0. */
24659 if (cr_save_rtx == NULL_RTX)
24661 START_USE (0);
24662 cr_save_rtx = gen_rtx_REG (SImode, 0);
24663 rs6000_emit_move_from_cr (cr_save_rtx);
24666 /* Saving CR requires a two-instruction sequence: one instruction
24667 to move the CR to a general-purpose register, and a second
24668 instruction that stores the GPR to memory.
24670 We do not emit any DWARF CFI records for the first of these,
24671 because we cannot properly represent the fact that CR is saved in
24672 a register. One reason is that we cannot express that multiple
24673 CR fields are saved; another reason is that on 64-bit, the size
24674 of the CR register in DWARF (4 bytes) differs from the size of
24675 a general-purpose register.
24677 This means if any intervening instruction were to clobber one of
24678 the call-saved CR fields, we'd have incorrect CFI. To prevent
24679 this from happening, we mark the store to memory as a use of
24680 those CR fields, which prevents any such instruction from being
24681 scheduled in between the two instructions. */
24682 rtx crsave_v[9];
24683 int n_crsave = 0;
24684 int i;
24686 crsave_v[n_crsave++] = gen_rtx_SET (mem, cr_save_rtx);
24687 for (i = 0; i < 8; i++)
24688 if (save_reg_p (CR0_REGNO + i))
24689 crsave_v[n_crsave++]
24690 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24692 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
24693 gen_rtvec_v (n_crsave, crsave_v)));
24694 END_USE (REGNO (cr_save_rtx));
24696 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
24697 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
24698 so we need to construct a frame expression manually. */
24699 RTX_FRAME_RELATED_P (insn) = 1;
24701 /* Update address to be stack-pointer relative, like
24702 rs6000_frame_related would do. */
24703 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
24704 GEN_INT (info->cr_save_offset + sp_off));
24705 mem = gen_frame_mem (SImode, addr);
24707 if (DEFAULT_ABI == ABI_ELFv2)
24709 /* In the ELFv2 ABI we generate separate CFI records for each
24710 CR field that was actually saved. They all point to the
24711 same 32-bit stack slot. */
24712 rtx crframe[8];
24713 int n_crframe = 0;
24715 for (i = 0; i < 8; i++)
24716 if (save_reg_p (CR0_REGNO + i))
24718 crframe[n_crframe]
24719 = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR0_REGNO + i));
24721 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
24722 n_crframe++;
24725 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24726 gen_rtx_PARALLEL (VOIDmode,
24727 gen_rtvec_v (n_crframe, crframe)));
24729 else
24731 /* In other ABIs, by convention, we use a single CR regnum to
24732 represent the fact that all call-saved CR fields are saved.
24733 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
24734 rtx set = gen_rtx_SET (mem, gen_rtx_REG (SImode, CR2_REGNO));
24735 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
24739 /* In the ELFv2 ABI we need to save all call-saved CR fields into
24740 *separate* slots if the routine calls __builtin_eh_return, so
24741 that they can be independently restored by the unwinder. */
24742 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24744 int i, cr_off = info->ehcr_offset;
24745 rtx crsave;
24747 /* ??? We might get better performance by using multiple mfocrf
24748 instructions. */
24749 crsave = gen_rtx_REG (SImode, 0);
24750 emit_insn (gen_movesi_from_cr (crsave));
24752 for (i = 0; i < 8; i++)
24753 if (!call_used_regs[CR0_REGNO + i])
24755 rtvec p = rtvec_alloc (2);
24756 RTVEC_ELT (p, 0)
24757 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
24758 RTVEC_ELT (p, 1)
24759 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
24761 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24763 RTX_FRAME_RELATED_P (insn) = 1;
24764 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
24765 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
24766 sp_reg_rtx, cr_off + sp_off));
24768 cr_off += reg_size;
24772 /* Update stack and set back pointer unless this is V.4,
24773 for which it was done previously. */
24774 if (!WORLD_SAVE_P (info) && info->push_p
24775 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
24777 rtx ptr_reg = NULL;
24778 int ptr_off = 0;
24780 /* If saving altivec regs we need to be able to address all save
24781 locations using a 16-bit offset. */
24782 if ((strategy & SAVE_INLINE_VRS) == 0
24783 || (info->altivec_size != 0
24784 && (info->altivec_save_offset + info->altivec_size - 16
24785 + info->total_size - frame_off) > 32767)
24786 || (info->vrsave_size != 0
24787 && (info->vrsave_save_offset
24788 + info->total_size - frame_off) > 32767))
24790 int sel = SAVRES_SAVE | SAVRES_VR;
24791 unsigned ptr_regno = ptr_regno_for_savres (sel);
24793 if (using_static_chain_p
24794 && ptr_regno == STATIC_CHAIN_REGNUM)
24795 ptr_regno = 12;
24796 if (REGNO (frame_reg_rtx) != ptr_regno)
24797 START_USE (ptr_regno);
24798 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24799 frame_reg_rtx = ptr_reg;
24800 ptr_off = info->altivec_save_offset + info->altivec_size;
24801 frame_off = -ptr_off;
24803 else if (REGNO (frame_reg_rtx) == 1)
24804 frame_off = info->total_size;
24805 sp_adjust = rs6000_emit_allocate_stack (info->total_size,
24806 ptr_reg, ptr_off);
24807 if (REGNO (frame_reg_rtx) == 12)
24808 sp_adjust = 0;
24809 sp_off = info->total_size;
24810 if (frame_reg_rtx != sp_reg_rtx)
24811 rs6000_emit_stack_tie (frame_reg_rtx, false);
24814 /* Set frame pointer, if needed. */
24815 if (frame_pointer_needed)
24817 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
24818 sp_reg_rtx);
24819 RTX_FRAME_RELATED_P (insn) = 1;
24822 /* Save AltiVec registers if needed. Save here because the red zone does
24823 not always include AltiVec registers. */
24824 if (!WORLD_SAVE_P (info)
24825 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
24827 int end_save = info->altivec_save_offset + info->altivec_size;
24828 int ptr_off;
24829 /* Oddly, the vector save/restore functions point r0 at the end
24830 of the save area, then use r11 or r12 to load offsets for
24831 [reg+reg] addressing. */
24832 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24833 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
24834 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24836 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24837 NOT_INUSE (0);
24838 if (scratch_regno == 12)
24839 sp_adjust = 0;
24840 if (end_save + frame_off != 0)
24842 rtx offset = GEN_INT (end_save + frame_off);
24844 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24846 else
24847 emit_move_insn (ptr_reg, frame_reg_rtx);
24849 ptr_off = -end_save;
24850 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24851 info->altivec_save_offset + ptr_off,
24852 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24853 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24854 NULL_RTX, NULL_RTX);
24855 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24857 /* The oddity mentioned above clobbered our frame reg. */
24858 emit_move_insn (frame_reg_rtx, ptr_reg);
24859 frame_off = ptr_off;
24862 else if (!WORLD_SAVE_P (info)
24863 && info->altivec_size != 0)
24865 int i;
24867 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24868 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24870 rtx areg, savereg, mem;
24871 int offset;
24873 offset = (info->altivec_save_offset + frame_off
24874 + 16 * (i - info->first_altivec_reg_save));
24876 savereg = gen_rtx_REG (V4SImode, i);
24878 NOT_INUSE (0);
24879 areg = gen_rtx_REG (Pmode, 0);
24880 emit_move_insn (areg, GEN_INT (offset));
24882 /* AltiVec addressing mode is [reg+reg]. */
24883 mem = gen_frame_mem (V4SImode,
24884 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24886 /* Rather than emitting a generic move, force use of the stvx
24887 instruction, which we always want. In particular we don't
24888 want xxpermdi/stxvd2x for little endian. */
24889 insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg));
24891 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24892 areg, GEN_INT (offset));
24896 /* VRSAVE is a bit vector representing which AltiVec registers
24897 are used. The OS uses this to determine which vector
24898 registers to save on a context switch. We need to save
24899 VRSAVE on the stack frame, add whatever AltiVec registers we
24900 used in this function, and do the corresponding magic in the
24901 epilogue. */
24903 if (!WORLD_SAVE_P (info)
24904 && info->vrsave_size != 0)
24906 rtx reg, vrsave;
24907 int offset;
24908 int save_regno;
24910 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24911 be using r12 as frame_reg_rtx and r11 as the static chain
24912 pointer for nested functions. */
24913 save_regno = 12;
24914 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24915 && !using_static_chain_p)
24916 save_regno = 11;
24917 else if (using_split_stack || REGNO (frame_reg_rtx) == 12)
24919 save_regno = 11;
24920 if (using_static_chain_p)
24921 save_regno = 0;
24924 NOT_INUSE (save_regno);
24925 reg = gen_rtx_REG (SImode, save_regno);
24926 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24927 if (TARGET_MACHO)
24928 emit_insn (gen_get_vrsave_internal (reg));
24929 else
24930 emit_insn (gen_rtx_SET (reg, vrsave));
24932 /* Save VRSAVE. */
24933 offset = info->vrsave_save_offset + frame_off;
24934 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24936 /* Include the registers in the mask. */
24937 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24939 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24942 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24943 if (!TARGET_SINGLE_PIC_BASE
24944 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24945 || (DEFAULT_ABI == ABI_V4
24946 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24947 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24949 /* If emit_load_toc_table will use the link register, we need to save
24950 it. We use R12 for this purpose because emit_load_toc_table
24951 can use register 0. This allows us to use a plain 'blr' to return
24952 from the procedure more often. */
24953 int save_LR_around_toc_setup = (TARGET_ELF
24954 && DEFAULT_ABI == ABI_V4
24955 && flag_pic
24956 && ! info->lr_save_p
24957 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24958 if (save_LR_around_toc_setup)
24960 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24961 rtx tmp = gen_rtx_REG (Pmode, 12);
24963 sp_adjust = 0;
24964 insn = emit_move_insn (tmp, lr);
24965 RTX_FRAME_RELATED_P (insn) = 1;
24967 rs6000_emit_load_toc_table (TRUE);
24969 insn = emit_move_insn (lr, tmp);
24970 add_reg_note (insn, REG_CFA_RESTORE, lr);
24971 RTX_FRAME_RELATED_P (insn) = 1;
24973 else
24974 rs6000_emit_load_toc_table (TRUE);
24977 #if TARGET_MACHO
24978 if (!TARGET_SINGLE_PIC_BASE
24979 && DEFAULT_ABI == ABI_DARWIN
24980 && flag_pic && crtl->uses_pic_offset_table)
24982 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24983 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24985 /* Save and restore LR locally around this call (in R0). */
24986 if (!info->lr_save_p)
24987 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24989 emit_insn (gen_load_macho_picbase (src));
24991 emit_move_insn (gen_rtx_REG (Pmode,
24992 RS6000_PIC_OFFSET_TABLE_REGNUM),
24993 lr);
24995 if (!info->lr_save_p)
24996 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24998 #endif
25000 /* If we need to, save the TOC register after doing the stack setup.
25001 Do not emit eh frame info for this save. The unwinder wants info,
25002 conceptually attached to instructions in this function, about
25003 register values in the caller of this function. This R2 may have
25004 already been changed from the value in the caller.
25005 We don't attempt to write accurate DWARF EH frame info for R2
25006 because code emitted by gcc for a (non-pointer) function call
25007 doesn't save and restore R2. Instead, R2 is managed out-of-line
25008 by a linker generated plt call stub when the function resides in
25009 a shared library. This behaviour is costly to describe in DWARF,
25010 both in terms of the size of DWARF info and the time taken in the
25011 unwinder to interpret it. R2 changes, apart from the
25012 calls_eh_return case earlier in this function, are handled by
25013 linux-unwind.h frob_update_context. */
25014 if (rs6000_save_toc_in_prologue_p ())
25016 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
25017 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
25020 if (using_split_stack && split_stack_arg_pointer_used_p ())
25022 /* Set up the arg pointer (r12) for -fsplit-stack code. If
25023 __morestack was called, it left the arg pointer to the old
25024 stack in r29. Otherwise, the arg pointer is the top of the
25025 current frame. */
25026 cfun->machine->split_stack_argp_used = true;
25027 if (sp_adjust)
25029 rtx r12 = gen_rtx_REG (Pmode, 12);
25030 rtx set_r12 = gen_rtx_SET (r12, sp_reg_rtx);
25031 emit_insn_before (set_r12, sp_adjust);
25033 else if (frame_off != 0 || REGNO (frame_reg_rtx) != 12)
25035 rtx r12 = gen_rtx_REG (Pmode, 12);
25036 if (frame_off == 0)
25037 emit_move_insn (r12, frame_reg_rtx);
25038 else
25039 emit_insn (gen_add3_insn (r12, frame_reg_rtx, GEN_INT (frame_off)));
25041 if (info->push_p)
25043 rtx r12 = gen_rtx_REG (Pmode, 12);
25044 rtx r29 = gen_rtx_REG (Pmode, 29);
25045 rtx cr7 = gen_rtx_REG (CCUNSmode, CR7_REGNO);
25046 rtx not_more = gen_label_rtx ();
25047 rtx jump;
25049 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
25050 gen_rtx_GEU (VOIDmode, cr7, const0_rtx),
25051 gen_rtx_LABEL_REF (VOIDmode, not_more),
25052 pc_rtx);
25053 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
25054 JUMP_LABEL (jump) = not_more;
25055 LABEL_NUSES (not_more) += 1;
25056 emit_move_insn (r12, r29);
25057 emit_label (not_more);
25062 /* Output .extern statements for the save/restore routines we use. */
25064 static void
25065 rs6000_output_savres_externs (FILE *file)
25067 rs6000_stack_t *info = rs6000_stack_info ();
25069 if (TARGET_DEBUG_STACK)
25070 debug_stack_info (info);
25072 /* Write .extern for any function we will call to save and restore
25073 fp values. */
25074 if (info->first_fp_reg_save < 64
25075 && !TARGET_MACHO
25076 && !TARGET_ELF)
25078 char *name;
25079 int regno = info->first_fp_reg_save - 32;
25081 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
25083 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
25084 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
25085 name = rs6000_savres_routine_name (info, regno, sel);
25086 fprintf (file, "\t.extern %s\n", name);
25088 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
25090 bool lr = (info->savres_strategy
25091 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25092 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25093 name = rs6000_savres_routine_name (info, regno, sel);
25094 fprintf (file, "\t.extern %s\n", name);
25099 /* Write function prologue. */
25101 static void
25102 rs6000_output_function_prologue (FILE *file,
25103 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25105 if (!cfun->is_thunk)
25106 rs6000_output_savres_externs (file);
25108 /* ELFv2 ABI r2 setup code and local entry point. This must follow
25109 immediately after the global entry point label. */
25110 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
25112 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25114 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
25115 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
25117 fputs ("\t.localentry\t", file);
25118 assemble_name (file, name);
25119 fputs (",.-", file);
25120 assemble_name (file, name);
25121 fputs ("\n", file);
25124 /* Output -mprofile-kernel code. This needs to be done here instead of
25125 in output_function_profile since it must go after the ELFv2 ABI
25126 local entry point. */
25127 if (TARGET_PROFILE_KERNEL && crtl->profile)
25129 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
25130 gcc_assert (!TARGET_32BIT);
25132 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
25134 /* In the ELFv2 ABI we have no compiler stack word. It must be
25135 the resposibility of _mcount to preserve the static chain
25136 register if required. */
25137 if (DEFAULT_ABI != ABI_ELFv2
25138 && cfun->static_chain_decl != NULL)
25140 asm_fprintf (file, "\tstd %s,24(%s)\n",
25141 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
25142 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
25143 asm_fprintf (file, "\tld %s,24(%s)\n",
25144 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
25146 else
25147 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
25150 rs6000_pic_labelno++;
25153 /* Non-zero if vmx regs are restored before the frame pop, zero if
25154 we restore after the pop when possible. */
25155 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
25157 /* Restoring cr is a two step process: loading a reg from the frame
25158 save, then moving the reg to cr. For ABI_V4 we must let the
25159 unwinder know that the stack location is no longer valid at or
25160 before the stack deallocation, but we can't emit a cfa_restore for
25161 cr at the stack deallocation like we do for other registers.
25162 The trouble is that it is possible for the move to cr to be
25163 scheduled after the stack deallocation. So say exactly where cr
25164 is located on each of the two insns. */
25166 static rtx
25167 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
25169 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
25170 rtx reg = gen_rtx_REG (SImode, regno);
25171 rtx_insn *insn = emit_move_insn (reg, mem);
25173 if (!exit_func && DEFAULT_ABI == ABI_V4)
25175 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
25176 rtx set = gen_rtx_SET (reg, cr);
25178 add_reg_note (insn, REG_CFA_REGISTER, set);
25179 RTX_FRAME_RELATED_P (insn) = 1;
25181 return reg;
25184 /* Reload CR from REG. */
25186 static void
25187 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
25189 int count = 0;
25190 int i;
25192 if (using_mfcr_multiple)
25194 for (i = 0; i < 8; i++)
25195 if (save_reg_p (CR0_REGNO + i))
25196 count++;
25197 gcc_assert (count);
25200 if (using_mfcr_multiple && count > 1)
25202 rtx_insn *insn;
25203 rtvec p;
25204 int ndx;
25206 p = rtvec_alloc (count);
25208 ndx = 0;
25209 for (i = 0; i < 8; i++)
25210 if (save_reg_p (CR0_REGNO + i))
25212 rtvec r = rtvec_alloc (2);
25213 RTVEC_ELT (r, 0) = reg;
25214 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
25215 RTVEC_ELT (p, ndx) =
25216 gen_rtx_SET (gen_rtx_REG (CCmode, CR0_REGNO + i),
25217 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
25218 ndx++;
25220 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25221 gcc_assert (ndx == count);
25223 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
25224 CR field separately. */
25225 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
25227 for (i = 0; i < 8; i++)
25228 if (save_reg_p (CR0_REGNO + i))
25229 add_reg_note (insn, REG_CFA_RESTORE,
25230 gen_rtx_REG (SImode, CR0_REGNO + i));
25232 RTX_FRAME_RELATED_P (insn) = 1;
25235 else
25236 for (i = 0; i < 8; i++)
25237 if (save_reg_p (CR0_REGNO + i))
25239 rtx insn = emit_insn (gen_movsi_to_cr_one
25240 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25242 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
25243 CR field separately, attached to the insn that in fact
25244 restores this particular CR field. */
25245 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
25247 add_reg_note (insn, REG_CFA_RESTORE,
25248 gen_rtx_REG (SImode, CR0_REGNO + i));
25250 RTX_FRAME_RELATED_P (insn) = 1;
25254 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
25255 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
25256 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25258 rtx_insn *insn = get_last_insn ();
25259 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
25261 add_reg_note (insn, REG_CFA_RESTORE, cr);
25262 RTX_FRAME_RELATED_P (insn) = 1;
25266 /* Like cr, the move to lr instruction can be scheduled after the
25267 stack deallocation, but unlike cr, its stack frame save is still
25268 valid. So we only need to emit the cfa_restore on the correct
25269 instruction. */
25271 static void
25272 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
25274 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
25275 rtx reg = gen_rtx_REG (Pmode, regno);
25277 emit_move_insn (reg, mem);
25280 static void
25281 restore_saved_lr (int regno, bool exit_func)
25283 rtx reg = gen_rtx_REG (Pmode, regno);
25284 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
25285 rtx_insn *insn = emit_move_insn (lr, reg);
25287 if (!exit_func && flag_shrink_wrap)
25289 add_reg_note (insn, REG_CFA_RESTORE, lr);
25290 RTX_FRAME_RELATED_P (insn) = 1;
25294 static rtx
25295 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
25297 if (DEFAULT_ABI == ABI_ELFv2)
25299 int i;
25300 for (i = 0; i < 8; i++)
25301 if (save_reg_p (CR0_REGNO + i))
25303 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
25304 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
25305 cfa_restores);
25308 else if (info->cr_save_p)
25309 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
25310 gen_rtx_REG (SImode, CR2_REGNO),
25311 cfa_restores);
25313 if (info->lr_save_p)
25314 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
25315 gen_rtx_REG (Pmode, LR_REGNO),
25316 cfa_restores);
25317 return cfa_restores;
25320 /* Return true if OFFSET from stack pointer can be clobbered by signals.
25321 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
25322 below stack pointer not cloberred by signals. */
25324 static inline bool
25325 offset_below_red_zone_p (HOST_WIDE_INT offset)
25327 return offset < (DEFAULT_ABI == ABI_V4
25329 : TARGET_32BIT ? -220 : -288);
25332 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
25334 static void
25335 emit_cfa_restores (rtx cfa_restores)
25337 rtx_insn *insn = get_last_insn ();
25338 rtx *loc = &REG_NOTES (insn);
25340 while (*loc)
25341 loc = &XEXP (*loc, 1);
25342 *loc = cfa_restores;
25343 RTX_FRAME_RELATED_P (insn) = 1;
25346 /* Emit function epilogue as insns. */
25348 void
25349 rs6000_emit_epilogue (int sibcall)
25351 rs6000_stack_t *info;
25352 int restoring_GPRs_inline;
25353 int restoring_FPRs_inline;
25354 int using_load_multiple;
25355 int using_mtcr_multiple;
25356 int use_backchain_to_restore_sp;
25357 int restore_lr;
25358 int strategy;
25359 HOST_WIDE_INT frame_off = 0;
25360 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
25361 rtx frame_reg_rtx = sp_reg_rtx;
25362 rtx cfa_restores = NULL_RTX;
25363 rtx insn;
25364 rtx cr_save_reg = NULL_RTX;
25365 machine_mode reg_mode = Pmode;
25366 int reg_size = TARGET_32BIT ? 4 : 8;
25367 int i;
25368 bool exit_func;
25369 unsigned ptr_regno;
25371 info = rs6000_stack_info ();
25373 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
25375 reg_mode = V2SImode;
25376 reg_size = 8;
25379 strategy = info->savres_strategy;
25380 using_load_multiple = strategy & SAVRES_MULTIPLE;
25381 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
25382 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
25383 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
25384 || rs6000_cpu == PROCESSOR_PPC603
25385 || rs6000_cpu == PROCESSOR_PPC750
25386 || optimize_size);
25387 /* Restore via the backchain when we have a large frame, since this
25388 is more efficient than an addis, addi pair. The second condition
25389 here will not trigger at the moment; We don't actually need a
25390 frame pointer for alloca, but the generic parts of the compiler
25391 give us one anyway. */
25392 use_backchain_to_restore_sp = (info->total_size + (info->lr_save_p
25393 ? info->lr_save_offset
25394 : 0) > 32767
25395 || (cfun->calls_alloca
25396 && !frame_pointer_needed));
25397 restore_lr = (info->lr_save_p
25398 && (restoring_FPRs_inline
25399 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
25400 && (restoring_GPRs_inline
25401 || info->first_fp_reg_save < 64));
25403 if (WORLD_SAVE_P (info))
25405 int i, j;
25406 char rname[30];
25407 const char *alloc_rname;
25408 rtvec p;
25410 /* eh_rest_world_r10 will return to the location saved in the LR
25411 stack slot (which is not likely to be our caller.)
25412 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
25413 rest_world is similar, except any R10 parameter is ignored.
25414 The exception-handling stuff that was here in 2.95 is no
25415 longer necessary. */
25417 p = rtvec_alloc (9
25419 + 32 - info->first_gp_reg_save
25420 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
25421 + 63 + 1 - info->first_fp_reg_save);
25423 strcpy (rname, ((crtl->calls_eh_return) ?
25424 "*eh_rest_world_r10" : "*rest_world"));
25425 alloc_rname = ggc_strdup (rname);
25427 j = 0;
25428 RTVEC_ELT (p, j++) = ret_rtx;
25429 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
25430 gen_rtx_REG (Pmode,
25431 LR_REGNO));
25432 RTVEC_ELT (p, j++)
25433 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
25434 /* The instruction pattern requires a clobber here;
25435 it is shared with the restVEC helper. */
25436 RTVEC_ELT (p, j++)
25437 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
25440 /* CR register traditionally saved as CR2. */
25441 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
25442 RTVEC_ELT (p, j++)
25443 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
25444 if (flag_shrink_wrap)
25446 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
25447 gen_rtx_REG (Pmode, LR_REGNO),
25448 cfa_restores);
25449 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25453 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25455 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25456 RTVEC_ELT (p, j++)
25457 = gen_frame_load (reg,
25458 frame_reg_rtx, info->gp_save_offset + reg_size * i);
25459 if (flag_shrink_wrap)
25460 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25462 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
25464 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
25465 RTVEC_ELT (p, j++)
25466 = gen_frame_load (reg,
25467 frame_reg_rtx, info->altivec_save_offset + 16 * i);
25468 if (flag_shrink_wrap)
25469 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25471 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
25473 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25474 ? DFmode : SFmode),
25475 info->first_fp_reg_save + i);
25476 RTVEC_ELT (p, j++)
25477 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
25478 if (flag_shrink_wrap)
25479 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25481 RTVEC_ELT (p, j++)
25482 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
25483 RTVEC_ELT (p, j++)
25484 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
25485 RTVEC_ELT (p, j++)
25486 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
25487 RTVEC_ELT (p, j++)
25488 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
25489 RTVEC_ELT (p, j++)
25490 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
25491 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25493 if (flag_shrink_wrap)
25495 REG_NOTES (insn) = cfa_restores;
25496 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25497 RTX_FRAME_RELATED_P (insn) = 1;
25499 return;
25502 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
25503 if (info->push_p)
25504 frame_off = info->total_size;
25506 /* Restore AltiVec registers if we must do so before adjusting the
25507 stack. */
25508 if (info->altivec_size != 0
25509 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25510 || (DEFAULT_ABI != ABI_V4
25511 && offset_below_red_zone_p (info->altivec_save_offset))))
25513 int i;
25514 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
25516 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
25517 if (use_backchain_to_restore_sp)
25519 int frame_regno = 11;
25521 if ((strategy & REST_INLINE_VRS) == 0)
25523 /* Of r11 and r12, select the one not clobbered by an
25524 out-of-line restore function for the frame register. */
25525 frame_regno = 11 + 12 - scratch_regno;
25527 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
25528 emit_move_insn (frame_reg_rtx,
25529 gen_rtx_MEM (Pmode, sp_reg_rtx));
25530 frame_off = 0;
25532 else if (frame_pointer_needed)
25533 frame_reg_rtx = hard_frame_pointer_rtx;
25535 if ((strategy & REST_INLINE_VRS) == 0)
25537 int end_save = info->altivec_save_offset + info->altivec_size;
25538 int ptr_off;
25539 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
25540 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
25542 if (end_save + frame_off != 0)
25544 rtx offset = GEN_INT (end_save + frame_off);
25546 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25548 else
25549 emit_move_insn (ptr_reg, frame_reg_rtx);
25551 ptr_off = -end_save;
25552 insn = rs6000_emit_savres_rtx (info, scratch_reg,
25553 info->altivec_save_offset + ptr_off,
25554 0, V4SImode, SAVRES_VR);
25556 else
25558 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25559 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25561 rtx addr, areg, mem, reg;
25563 areg = gen_rtx_REG (Pmode, 0);
25564 emit_move_insn
25565 (areg, GEN_INT (info->altivec_save_offset
25566 + frame_off
25567 + 16 * (i - info->first_altivec_reg_save)));
25569 /* AltiVec addressing mode is [reg+reg]. */
25570 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
25571 mem = gen_frame_mem (V4SImode, addr);
25573 reg = gen_rtx_REG (V4SImode, i);
25574 /* Rather than emitting a generic move, force use of the
25575 lvx instruction, which we always want. In particular
25576 we don't want lxvd2x/xxpermdi for little endian. */
25577 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
25581 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25582 if (((strategy & REST_INLINE_VRS) == 0
25583 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25584 && (flag_shrink_wrap
25585 || (offset_below_red_zone_p
25586 (info->altivec_save_offset
25587 + 16 * (i - info->first_altivec_reg_save)))))
25589 rtx reg = gen_rtx_REG (V4SImode, i);
25590 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25594 /* Restore VRSAVE if we must do so before adjusting the stack. */
25595 if (info->vrsave_size != 0
25596 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25597 || (DEFAULT_ABI != ABI_V4
25598 && offset_below_red_zone_p (info->vrsave_save_offset))))
25600 rtx reg;
25602 if (frame_reg_rtx == sp_reg_rtx)
25604 if (use_backchain_to_restore_sp)
25606 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25607 emit_move_insn (frame_reg_rtx,
25608 gen_rtx_MEM (Pmode, sp_reg_rtx));
25609 frame_off = 0;
25611 else if (frame_pointer_needed)
25612 frame_reg_rtx = hard_frame_pointer_rtx;
25615 reg = gen_rtx_REG (SImode, 12);
25616 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25617 info->vrsave_save_offset + frame_off));
25619 emit_insn (generate_set_vrsave (reg, info, 1));
25622 insn = NULL_RTX;
25623 /* If we have a large stack frame, restore the old stack pointer
25624 using the backchain. */
25625 if (use_backchain_to_restore_sp)
25627 if (frame_reg_rtx == sp_reg_rtx)
25629 /* Under V.4, don't reset the stack pointer until after we're done
25630 loading the saved registers. */
25631 if (DEFAULT_ABI == ABI_V4)
25632 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25634 insn = emit_move_insn (frame_reg_rtx,
25635 gen_rtx_MEM (Pmode, sp_reg_rtx));
25636 frame_off = 0;
25638 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25639 && DEFAULT_ABI == ABI_V4)
25640 /* frame_reg_rtx has been set up by the altivec restore. */
25642 else
25644 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
25645 frame_reg_rtx = sp_reg_rtx;
25648 /* If we have a frame pointer, we can restore the old stack pointer
25649 from it. */
25650 else if (frame_pointer_needed)
25652 frame_reg_rtx = sp_reg_rtx;
25653 if (DEFAULT_ABI == ABI_V4)
25654 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25655 /* Prevent reordering memory accesses against stack pointer restore. */
25656 else if (cfun->calls_alloca
25657 || offset_below_red_zone_p (-info->total_size))
25658 rs6000_emit_stack_tie (frame_reg_rtx, true);
25660 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
25661 GEN_INT (info->total_size)));
25662 frame_off = 0;
25664 else if (info->push_p
25665 && DEFAULT_ABI != ABI_V4
25666 && !crtl->calls_eh_return)
25668 /* Prevent reordering memory accesses against stack pointer restore. */
25669 if (cfun->calls_alloca
25670 || offset_below_red_zone_p (-info->total_size))
25671 rs6000_emit_stack_tie (frame_reg_rtx, false);
25672 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
25673 GEN_INT (info->total_size)));
25674 frame_off = 0;
25676 if (insn && frame_reg_rtx == sp_reg_rtx)
25678 if (cfa_restores)
25680 REG_NOTES (insn) = cfa_restores;
25681 cfa_restores = NULL_RTX;
25683 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25684 RTX_FRAME_RELATED_P (insn) = 1;
25687 /* Restore AltiVec registers if we have not done so already. */
25688 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25689 && info->altivec_size != 0
25690 && (DEFAULT_ABI == ABI_V4
25691 || !offset_below_red_zone_p (info->altivec_save_offset)))
25693 int i;
25695 if ((strategy & REST_INLINE_VRS) == 0)
25697 int end_save = info->altivec_save_offset + info->altivec_size;
25698 int ptr_off;
25699 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
25700 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
25701 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
25703 if (end_save + frame_off != 0)
25705 rtx offset = GEN_INT (end_save + frame_off);
25707 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
25709 else
25710 emit_move_insn (ptr_reg, frame_reg_rtx);
25712 ptr_off = -end_save;
25713 insn = rs6000_emit_savres_rtx (info, scratch_reg,
25714 info->altivec_save_offset + ptr_off,
25715 0, V4SImode, SAVRES_VR);
25716 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
25718 /* Frame reg was clobbered by out-of-line save. Restore it
25719 from ptr_reg, and if we are calling out-of-line gpr or
25720 fpr restore set up the correct pointer and offset. */
25721 unsigned newptr_regno = 1;
25722 if (!restoring_GPRs_inline)
25724 bool lr = info->gp_save_offset + info->gp_size == 0;
25725 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25726 newptr_regno = ptr_regno_for_savres (sel);
25727 end_save = info->gp_save_offset + info->gp_size;
25729 else if (!restoring_FPRs_inline)
25731 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
25732 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25733 newptr_regno = ptr_regno_for_savres (sel);
25734 end_save = info->fp_save_offset + info->fp_size;
25737 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
25738 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
25740 if (end_save + ptr_off != 0)
25742 rtx offset = GEN_INT (end_save + ptr_off);
25744 frame_off = -end_save;
25745 if (TARGET_32BIT)
25746 emit_insn (gen_addsi3_carry (frame_reg_rtx,
25747 ptr_reg, offset));
25748 else
25749 emit_insn (gen_adddi3_carry (frame_reg_rtx,
25750 ptr_reg, offset));
25752 else
25754 frame_off = ptr_off;
25755 emit_move_insn (frame_reg_rtx, ptr_reg);
25759 else
25761 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25762 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
25764 rtx addr, areg, mem, reg;
25766 areg = gen_rtx_REG (Pmode, 0);
25767 emit_move_insn
25768 (areg, GEN_INT (info->altivec_save_offset
25769 + frame_off
25770 + 16 * (i - info->first_altivec_reg_save)));
25772 /* AltiVec addressing mode is [reg+reg]. */
25773 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
25774 mem = gen_frame_mem (V4SImode, addr);
25776 reg = gen_rtx_REG (V4SImode, i);
25777 /* Rather than emitting a generic move, force use of the
25778 lvx instruction, which we always want. In particular
25779 we don't want lxvd2x/xxpermdi for little endian. */
25780 (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem));
25784 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
25785 if (((strategy & REST_INLINE_VRS) == 0
25786 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
25787 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
25789 rtx reg = gen_rtx_REG (V4SImode, i);
25790 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25794 /* Restore VRSAVE if we have not done so already. */
25795 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
25796 && info->vrsave_size != 0
25797 && (DEFAULT_ABI == ABI_V4
25798 || !offset_below_red_zone_p (info->vrsave_save_offset)))
25800 rtx reg;
25802 reg = gen_rtx_REG (SImode, 12);
25803 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25804 info->vrsave_save_offset + frame_off));
25806 emit_insn (generate_set_vrsave (reg, info, 1));
25809 /* If we exit by an out-of-line restore function on ABI_V4 then that
25810 function will deallocate the stack, so we don't need to worry
25811 about the unwinder restoring cr from an invalid stack frame
25812 location. */
25813 exit_func = (!restoring_FPRs_inline
25814 || (!restoring_GPRs_inline
25815 && info->first_fp_reg_save == 64));
25817 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
25818 *separate* slots if the routine calls __builtin_eh_return, so
25819 that they can be independently restored by the unwinder. */
25820 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
25822 int i, cr_off = info->ehcr_offset;
25824 for (i = 0; i < 8; i++)
25825 if (!call_used_regs[CR0_REGNO + i])
25827 rtx reg = gen_rtx_REG (SImode, 0);
25828 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25829 cr_off + frame_off));
25831 insn = emit_insn (gen_movsi_to_cr_one
25832 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
25834 if (!exit_func && flag_shrink_wrap)
25836 add_reg_note (insn, REG_CFA_RESTORE,
25837 gen_rtx_REG (SImode, CR0_REGNO + i));
25839 RTX_FRAME_RELATED_P (insn) = 1;
25842 cr_off += reg_size;
25846 /* Get the old lr if we saved it. If we are restoring registers
25847 out-of-line, then the out-of-line routines can do this for us. */
25848 if (restore_lr && restoring_GPRs_inline)
25849 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25851 /* Get the old cr if we saved it. */
25852 if (info->cr_save_p)
25854 unsigned cr_save_regno = 12;
25856 if (!restoring_GPRs_inline)
25858 /* Ensure we don't use the register used by the out-of-line
25859 gpr register restore below. */
25860 bool lr = info->gp_save_offset + info->gp_size == 0;
25861 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
25862 int gpr_ptr_regno = ptr_regno_for_savres (sel);
25864 if (gpr_ptr_regno == 12)
25865 cr_save_regno = 11;
25866 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
25868 else if (REGNO (frame_reg_rtx) == 12)
25869 cr_save_regno = 11;
25871 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
25872 info->cr_save_offset + frame_off,
25873 exit_func);
25876 /* Set LR here to try to overlap restores below. */
25877 if (restore_lr && restoring_GPRs_inline)
25878 restore_saved_lr (0, exit_func);
25880 /* Load exception handler data registers, if needed. */
25881 if (crtl->calls_eh_return)
25883 unsigned int i, regno;
25885 if (TARGET_AIX)
25887 rtx reg = gen_rtx_REG (reg_mode, 2);
25888 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25889 frame_off + RS6000_TOC_SAVE_SLOT));
25892 for (i = 0; ; ++i)
25894 rtx mem;
25896 regno = EH_RETURN_DATA_REGNO (i);
25897 if (regno == INVALID_REGNUM)
25898 break;
25900 /* Note: possible use of r0 here to address SPE regs. */
25901 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25902 info->ehrd_offset + frame_off
25903 + reg_size * (int) i);
25905 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25909 /* Restore GPRs. This is done as a PARALLEL if we are using
25910 the load-multiple instructions. */
25911 if (TARGET_SPE_ABI
25912 && info->spe_64bit_regs_used
25913 && info->first_gp_reg_save != 32)
25915 /* Determine whether we can address all of the registers that need
25916 to be saved with an offset from frame_reg_rtx that fits in
25917 the small const field for SPE memory instructions. */
25918 int spe_regs_addressable
25919 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25920 + reg_size * (32 - info->first_gp_reg_save - 1))
25921 && restoring_GPRs_inline);
25923 if (!spe_regs_addressable)
25925 int ool_adjust = 0;
25926 rtx old_frame_reg_rtx = frame_reg_rtx;
25927 /* Make r11 point to the start of the SPE save area. We worried about
25928 not clobbering it when we were saving registers in the prologue.
25929 There's no need to worry here because the static chain is passed
25930 anew to every function. */
25932 if (!restoring_GPRs_inline)
25933 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25934 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25935 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25936 GEN_INT (info->spe_gp_save_offset
25937 + frame_off
25938 - ool_adjust)));
25939 /* Keep the invariant that frame_reg_rtx + frame_off points
25940 at the top of the stack frame. */
25941 frame_off = -info->spe_gp_save_offset + ool_adjust;
25944 if (restoring_GPRs_inline)
25946 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25948 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25949 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25951 rtx offset, addr, mem, reg;
25953 /* We're doing all this to ensure that the immediate offset
25954 fits into the immediate field of 'evldd'. */
25955 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25957 offset = GEN_INT (spe_offset + reg_size * i);
25958 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25959 mem = gen_rtx_MEM (V2SImode, addr);
25960 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25962 emit_move_insn (reg, mem);
25965 else
25966 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25967 info->spe_gp_save_offset + frame_off,
25968 info->lr_save_offset + frame_off,
25969 reg_mode,
25970 SAVRES_GPR | SAVRES_LR);
25972 else if (!restoring_GPRs_inline)
25974 /* We are jumping to an out-of-line function. */
25975 rtx ptr_reg;
25976 int end_save = info->gp_save_offset + info->gp_size;
25977 bool can_use_exit = end_save == 0;
25978 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25979 int ptr_off;
25981 /* Emit stack reset code if we need it. */
25982 ptr_regno = ptr_regno_for_savres (sel);
25983 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25984 if (can_use_exit)
25985 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25986 else if (end_save + frame_off != 0)
25987 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25988 GEN_INT (end_save + frame_off)));
25989 else if (REGNO (frame_reg_rtx) != ptr_regno)
25990 emit_move_insn (ptr_reg, frame_reg_rtx);
25991 if (REGNO (frame_reg_rtx) == ptr_regno)
25992 frame_off = -end_save;
25994 if (can_use_exit && info->cr_save_p)
25995 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25997 ptr_off = -end_save;
25998 rs6000_emit_savres_rtx (info, ptr_reg,
25999 info->gp_save_offset + ptr_off,
26000 info->lr_save_offset + ptr_off,
26001 reg_mode, sel);
26003 else if (using_load_multiple)
26005 rtvec p;
26006 p = rtvec_alloc (32 - info->first_gp_reg_save);
26007 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26008 RTVEC_ELT (p, i)
26009 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26010 frame_reg_rtx,
26011 info->gp_save_offset + frame_off + reg_size * i);
26012 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
26014 else
26016 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
26017 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
26018 emit_insn (gen_frame_load
26019 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
26020 frame_reg_rtx,
26021 info->gp_save_offset + frame_off + reg_size * i));
26024 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
26026 /* If the frame pointer was used then we can't delay emitting
26027 a REG_CFA_DEF_CFA note. This must happen on the insn that
26028 restores the frame pointer, r31. We may have already emitted
26029 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
26030 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
26031 be harmless if emitted. */
26032 if (frame_pointer_needed)
26034 insn = get_last_insn ();
26035 add_reg_note (insn, REG_CFA_DEF_CFA,
26036 plus_constant (Pmode, frame_reg_rtx, frame_off));
26037 RTX_FRAME_RELATED_P (insn) = 1;
26040 /* Set up cfa_restores. We always need these when
26041 shrink-wrapping. If not shrink-wrapping then we only need
26042 the cfa_restore when the stack location is no longer valid.
26043 The cfa_restores must be emitted on or before the insn that
26044 invalidates the stack, and of course must not be emitted
26045 before the insn that actually does the restore. The latter
26046 is why it is a bad idea to emit the cfa_restores as a group
26047 on the last instruction here that actually does a restore:
26048 That insn may be reordered with respect to others doing
26049 restores. */
26050 if (flag_shrink_wrap
26051 && !restoring_GPRs_inline
26052 && info->first_fp_reg_save == 64)
26053 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
26055 for (i = info->first_gp_reg_save; i < 32; i++)
26056 if (!restoring_GPRs_inline
26057 || using_load_multiple
26058 || rs6000_reg_live_or_pic_offset_p (i))
26060 rtx reg = gen_rtx_REG (reg_mode, i);
26062 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26066 if (!restoring_GPRs_inline
26067 && info->first_fp_reg_save == 64)
26069 /* We are jumping to an out-of-line function. */
26070 if (cfa_restores)
26071 emit_cfa_restores (cfa_restores);
26072 return;
26075 if (restore_lr && !restoring_GPRs_inline)
26077 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
26078 restore_saved_lr (0, exit_func);
26081 /* Restore fpr's if we need to do it without calling a function. */
26082 if (restoring_FPRs_inline)
26083 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26084 if (save_reg_p (info->first_fp_reg_save + i))
26086 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
26087 ? DFmode : SFmode),
26088 info->first_fp_reg_save + i);
26089 emit_insn (gen_frame_load (reg, frame_reg_rtx,
26090 info->fp_save_offset + frame_off + 8 * i));
26091 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
26092 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
26095 /* If we saved cr, restore it here. Just those that were used. */
26096 if (info->cr_save_p)
26097 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
26099 /* If this is V.4, unwind the stack pointer after all of the loads
26100 have been done, or set up r11 if we are restoring fp out of line. */
26101 ptr_regno = 1;
26102 if (!restoring_FPRs_inline)
26104 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
26105 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
26106 ptr_regno = ptr_regno_for_savres (sel);
26109 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
26110 if (REGNO (frame_reg_rtx) == ptr_regno)
26111 frame_off = 0;
26113 if (insn && restoring_FPRs_inline)
26115 if (cfa_restores)
26117 REG_NOTES (insn) = cfa_restores;
26118 cfa_restores = NULL_RTX;
26120 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
26121 RTX_FRAME_RELATED_P (insn) = 1;
26124 if (crtl->calls_eh_return)
26126 rtx sa = EH_RETURN_STACKADJ_RTX;
26127 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
26130 if (!sibcall)
26132 rtvec p;
26133 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
26134 if (! restoring_FPRs_inline)
26136 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
26137 RTVEC_ELT (p, 0) = ret_rtx;
26139 else
26141 if (cfa_restores)
26143 /* We can't hang the cfa_restores off a simple return,
26144 since the shrink-wrap code sometimes uses an existing
26145 return. This means there might be a path from
26146 pre-prologue code to this return, and dwarf2cfi code
26147 wants the eh_frame unwinder state to be the same on
26148 all paths to any point. So we need to emit the
26149 cfa_restores before the return. For -m64 we really
26150 don't need epilogue cfa_restores at all, except for
26151 this irritating dwarf2cfi with shrink-wrap
26152 requirement; The stack red-zone means eh_frame info
26153 from the prologue telling the unwinder to restore
26154 from the stack is perfectly good right to the end of
26155 the function. */
26156 emit_insn (gen_blockage ());
26157 emit_cfa_restores (cfa_restores);
26158 cfa_restores = NULL_RTX;
26160 p = rtvec_alloc (2);
26161 RTVEC_ELT (p, 0) = simple_return_rtx;
26164 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
26165 ? gen_rtx_USE (VOIDmode,
26166 gen_rtx_REG (Pmode, LR_REGNO))
26167 : gen_rtx_CLOBBER (VOIDmode,
26168 gen_rtx_REG (Pmode, LR_REGNO)));
26170 /* If we have to restore more than two FP registers, branch to the
26171 restore function. It will return to our caller. */
26172 if (! restoring_FPRs_inline)
26174 int i;
26175 int reg;
26176 rtx sym;
26178 if (flag_shrink_wrap)
26179 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
26181 sym = rs6000_savres_routine_sym (info,
26182 SAVRES_FPR | (lr ? SAVRES_LR : 0));
26183 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
26184 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
26185 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
26187 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
26189 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
26191 RTVEC_ELT (p, i + 4)
26192 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
26193 if (flag_shrink_wrap)
26194 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
26195 cfa_restores);
26199 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
26202 if (cfa_restores)
26204 if (sibcall)
26205 /* Ensure the cfa_restores are hung off an insn that won't
26206 be reordered above other restores. */
26207 emit_insn (gen_blockage ());
26209 emit_cfa_restores (cfa_restores);
26213 /* Write function epilogue. */
26215 static void
26216 rs6000_output_function_epilogue (FILE *file,
26217 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
26219 #if TARGET_MACHO
26220 macho_branch_islands ();
26221 /* Mach-O doesn't support labels at the end of objects, so if
26222 it looks like we might want one, insert a NOP. */
26224 rtx_insn *insn = get_last_insn ();
26225 rtx_insn *deleted_debug_label = NULL;
26226 while (insn
26227 && NOTE_P (insn)
26228 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
26230 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
26231 notes only, instead set their CODE_LABEL_NUMBER to -1,
26232 otherwise there would be code generation differences
26233 in between -g and -g0. */
26234 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
26235 deleted_debug_label = insn;
26236 insn = PREV_INSN (insn);
26238 if (insn
26239 && (LABEL_P (insn)
26240 || (NOTE_P (insn)
26241 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
26242 fputs ("\tnop\n", file);
26243 else if (deleted_debug_label)
26244 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
26245 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
26246 CODE_LABEL_NUMBER (insn) = -1;
26248 #endif
26250 /* Output a traceback table here. See /usr/include/sys/debug.h for info
26251 on its format.
26253 We don't output a traceback table if -finhibit-size-directive was
26254 used. The documentation for -finhibit-size-directive reads
26255 ``don't output a @code{.size} assembler directive, or anything
26256 else that would cause trouble if the function is split in the
26257 middle, and the two halves are placed at locations far apart in
26258 memory.'' The traceback table has this property, since it
26259 includes the offset from the start of the function to the
26260 traceback table itself.
26262 System V.4 Powerpc's (and the embedded ABI derived from it) use a
26263 different traceback table. */
26264 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26265 && ! flag_inhibit_size_directive
26266 && rs6000_traceback != traceback_none && !cfun->is_thunk)
26268 const char *fname = NULL;
26269 const char *language_string = lang_hooks.name;
26270 int fixed_parms = 0, float_parms = 0, parm_info = 0;
26271 int i;
26272 int optional_tbtab;
26273 rs6000_stack_t *info = rs6000_stack_info ();
26275 if (rs6000_traceback == traceback_full)
26276 optional_tbtab = 1;
26277 else if (rs6000_traceback == traceback_part)
26278 optional_tbtab = 0;
26279 else
26280 optional_tbtab = !optimize_size && !TARGET_ELF;
26282 if (optional_tbtab)
26284 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26285 while (*fname == '.') /* V.4 encodes . in the name */
26286 fname++;
26288 /* Need label immediately before tbtab, so we can compute
26289 its offset from the function start. */
26290 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
26291 ASM_OUTPUT_LABEL (file, fname);
26294 /* The .tbtab pseudo-op can only be used for the first eight
26295 expressions, since it can't handle the possibly variable
26296 length fields that follow. However, if you omit the optional
26297 fields, the assembler outputs zeros for all optional fields
26298 anyways, giving each variable length field is minimum length
26299 (as defined in sys/debug.h). Thus we can not use the .tbtab
26300 pseudo-op at all. */
26302 /* An all-zero word flags the start of the tbtab, for debuggers
26303 that have to find it by searching forward from the entry
26304 point or from the current pc. */
26305 fputs ("\t.long 0\n", file);
26307 /* Tbtab format type. Use format type 0. */
26308 fputs ("\t.byte 0,", file);
26310 /* Language type. Unfortunately, there does not seem to be any
26311 official way to discover the language being compiled, so we
26312 use language_string.
26313 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
26314 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
26315 a number, so for now use 9. LTO, Go and JIT aren't assigned numbers
26316 either, so for now use 0. */
26317 if (lang_GNU_C ()
26318 || ! strcmp (language_string, "GNU GIMPLE")
26319 || ! strcmp (language_string, "GNU Go")
26320 || ! strcmp (language_string, "libgccjit"))
26321 i = 0;
26322 else if (! strcmp (language_string, "GNU F77")
26323 || lang_GNU_Fortran ())
26324 i = 1;
26325 else if (! strcmp (language_string, "GNU Pascal"))
26326 i = 2;
26327 else if (! strcmp (language_string, "GNU Ada"))
26328 i = 3;
26329 else if (lang_GNU_CXX ()
26330 || ! strcmp (language_string, "GNU Objective-C++"))
26331 i = 9;
26332 else if (! strcmp (language_string, "GNU Java"))
26333 i = 13;
26334 else if (! strcmp (language_string, "GNU Objective-C"))
26335 i = 14;
26336 else
26337 gcc_unreachable ();
26338 fprintf (file, "%d,", i);
26340 /* 8 single bit fields: global linkage (not set for C extern linkage,
26341 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
26342 from start of procedure stored in tbtab, internal function, function
26343 has controlled storage, function has no toc, function uses fp,
26344 function logs/aborts fp operations. */
26345 /* Assume that fp operations are used if any fp reg must be saved. */
26346 fprintf (file, "%d,",
26347 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
26349 /* 6 bitfields: function is interrupt handler, name present in
26350 proc table, function calls alloca, on condition directives
26351 (controls stack walks, 3 bits), saves condition reg, saves
26352 link reg. */
26353 /* The `function calls alloca' bit seems to be set whenever reg 31 is
26354 set up as a frame pointer, even when there is no alloca call. */
26355 fprintf (file, "%d,",
26356 ((optional_tbtab << 6)
26357 | ((optional_tbtab & frame_pointer_needed) << 5)
26358 | (info->cr_save_p << 1)
26359 | (info->lr_save_p)));
26361 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
26362 (6 bits). */
26363 fprintf (file, "%d,",
26364 (info->push_p << 7) | (64 - info->first_fp_reg_save));
26366 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
26367 fprintf (file, "%d,", (32 - first_reg_to_save ()));
26369 if (optional_tbtab)
26371 /* Compute the parameter info from the function decl argument
26372 list. */
26373 tree decl;
26374 int next_parm_info_bit = 31;
26376 for (decl = DECL_ARGUMENTS (current_function_decl);
26377 decl; decl = DECL_CHAIN (decl))
26379 rtx parameter = DECL_INCOMING_RTL (decl);
26380 machine_mode mode = GET_MODE (parameter);
26382 if (GET_CODE (parameter) == REG)
26384 if (SCALAR_FLOAT_MODE_P (mode))
26386 int bits;
26388 float_parms++;
26390 switch (mode)
26392 case SFmode:
26393 case SDmode:
26394 bits = 0x2;
26395 break;
26397 case DFmode:
26398 case DDmode:
26399 case TFmode:
26400 case TDmode:
26401 case IFmode:
26402 case KFmode:
26403 bits = 0x3;
26404 break;
26406 default:
26407 gcc_unreachable ();
26410 /* If only one bit will fit, don't or in this entry. */
26411 if (next_parm_info_bit > 0)
26412 parm_info |= (bits << (next_parm_info_bit - 1));
26413 next_parm_info_bit -= 2;
26415 else
26417 fixed_parms += ((GET_MODE_SIZE (mode)
26418 + (UNITS_PER_WORD - 1))
26419 / UNITS_PER_WORD);
26420 next_parm_info_bit -= 1;
26426 /* Number of fixed point parameters. */
26427 /* This is actually the number of words of fixed point parameters; thus
26428 an 8 byte struct counts as 2; and thus the maximum value is 8. */
26429 fprintf (file, "%d,", fixed_parms);
26431 /* 2 bitfields: number of floating point parameters (7 bits), parameters
26432 all on stack. */
26433 /* This is actually the number of fp registers that hold parameters;
26434 and thus the maximum value is 13. */
26435 /* Set parameters on stack bit if parameters are not in their original
26436 registers, regardless of whether they are on the stack? Xlc
26437 seems to set the bit when not optimizing. */
26438 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
26440 if (! optional_tbtab)
26441 return;
26443 /* Optional fields follow. Some are variable length. */
26445 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
26446 11 double float. */
26447 /* There is an entry for each parameter in a register, in the order that
26448 they occur in the parameter list. Any intervening arguments on the
26449 stack are ignored. If the list overflows a long (max possible length
26450 34 bits) then completely leave off all elements that don't fit. */
26451 /* Only emit this long if there was at least one parameter. */
26452 if (fixed_parms || float_parms)
26453 fprintf (file, "\t.long %d\n", parm_info);
26455 /* Offset from start of code to tb table. */
26456 fputs ("\t.long ", file);
26457 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
26458 RS6000_OUTPUT_BASENAME (file, fname);
26459 putc ('-', file);
26460 rs6000_output_function_entry (file, fname);
26461 putc ('\n', file);
26463 /* Interrupt handler mask. */
26464 /* Omit this long, since we never set the interrupt handler bit
26465 above. */
26467 /* Number of CTL (controlled storage) anchors. */
26468 /* Omit this long, since the has_ctl bit is never set above. */
26470 /* Displacement into stack of each CTL anchor. */
26471 /* Omit this list of longs, because there are no CTL anchors. */
26473 /* Length of function name. */
26474 if (*fname == '*')
26475 ++fname;
26476 fprintf (file, "\t.short %d\n", (int) strlen (fname));
26478 /* Function name. */
26479 assemble_string (fname, strlen (fname));
26481 /* Register for alloca automatic storage; this is always reg 31.
26482 Only emit this if the alloca bit was set above. */
26483 if (frame_pointer_needed)
26484 fputs ("\t.byte 31\n", file);
26486 fputs ("\t.align 2\n", file);
26490 /* -fsplit-stack support. */
26492 /* A SYMBOL_REF for __morestack. */
26493 static GTY(()) rtx morestack_ref;
26495 static rtx
26496 gen_add3_const (rtx rt, rtx ra, long c)
26498 if (TARGET_64BIT)
26499 return gen_adddi3 (rt, ra, GEN_INT (c));
26500 else
26501 return gen_addsi3 (rt, ra, GEN_INT (c));
26504 /* Emit -fsplit-stack prologue, which goes before the regular function
26505 prologue (at local entry point in the case of ELFv2). */
26507 void
26508 rs6000_expand_split_stack_prologue (void)
26510 rs6000_stack_t *info = rs6000_stack_info ();
26511 unsigned HOST_WIDE_INT allocate;
26512 long alloc_hi, alloc_lo;
26513 rtx r0, r1, r12, lr, ok_label, compare, jump, call_fusage;
26514 rtx_insn *insn;
26516 gcc_assert (flag_split_stack && reload_completed);
26518 if (!info->push_p)
26519 return;
26521 if (global_regs[29])
26523 error ("-fsplit-stack uses register r29");
26524 inform (DECL_SOURCE_LOCATION (global_regs_decl[29]),
26525 "conflicts with %qD", global_regs_decl[29]);
26528 allocate = info->total_size;
26529 if (allocate > (unsigned HOST_WIDE_INT) 1 << 31)
26531 sorry ("Stack frame larger than 2G is not supported for -fsplit-stack");
26532 return;
26534 if (morestack_ref == NULL_RTX)
26536 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
26537 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
26538 | SYMBOL_FLAG_FUNCTION);
26541 r0 = gen_rtx_REG (Pmode, 0);
26542 r1 = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26543 r12 = gen_rtx_REG (Pmode, 12);
26544 emit_insn (gen_load_split_stack_limit (r0));
26545 /* Always emit two insns here to calculate the requested stack,
26546 so that the linker can edit them when adjusting size for calling
26547 non-split-stack code. */
26548 alloc_hi = (-allocate + 0x8000) & ~0xffffL;
26549 alloc_lo = -allocate - alloc_hi;
26550 if (alloc_hi != 0)
26552 emit_insn (gen_add3_const (r12, r1, alloc_hi));
26553 if (alloc_lo != 0)
26554 emit_insn (gen_add3_const (r12, r12, alloc_lo));
26555 else
26556 emit_insn (gen_nop ());
26558 else
26560 emit_insn (gen_add3_const (r12, r1, alloc_lo));
26561 emit_insn (gen_nop ());
26564 compare = gen_rtx_REG (CCUNSmode, CR7_REGNO);
26565 emit_insn (gen_rtx_SET (compare, gen_rtx_COMPARE (CCUNSmode, r12, r0)));
26566 ok_label = gen_label_rtx ();
26567 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26568 gen_rtx_GEU (VOIDmode, compare, const0_rtx),
26569 gen_rtx_LABEL_REF (VOIDmode, ok_label),
26570 pc_rtx);
26571 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26572 JUMP_LABEL (jump) = ok_label;
26573 /* Mark the jump as very likely to be taken. */
26574 add_int_reg_note (jump, REG_BR_PROB,
26575 REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100);
26577 lr = gen_rtx_REG (Pmode, LR_REGNO);
26578 insn = emit_move_insn (r0, lr);
26579 RTX_FRAME_RELATED_P (insn) = 1;
26580 insn = emit_insn (gen_frame_store (r0, r1, info->lr_save_offset));
26581 RTX_FRAME_RELATED_P (insn) = 1;
26583 insn = emit_call_insn (gen_call (gen_rtx_MEM (SImode, morestack_ref),
26584 const0_rtx, const0_rtx));
26585 call_fusage = NULL_RTX;
26586 use_reg (&call_fusage, r12);
26587 add_function_usage_to (insn, call_fusage);
26588 emit_insn (gen_frame_load (r0, r1, info->lr_save_offset));
26589 insn = emit_move_insn (lr, r0);
26590 add_reg_note (insn, REG_CFA_RESTORE, lr);
26591 RTX_FRAME_RELATED_P (insn) = 1;
26592 emit_insn (gen_split_stack_return ());
26594 emit_label (ok_label);
26595 LABEL_NUSES (ok_label) = 1;
26598 /* Return the internal arg pointer used for function incoming
26599 arguments. When -fsplit-stack, the arg pointer is r12 so we need
26600 to copy it to a pseudo in order for it to be preserved over calls
26601 and suchlike. We'd really like to use a pseudo here for the
26602 internal arg pointer but data-flow analysis is not prepared to
26603 accept pseudos as live at the beginning of a function. */
26605 static rtx
26606 rs6000_internal_arg_pointer (void)
26608 if (flag_split_stack
26609 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
26610 == NULL))
26613 if (cfun->machine->split_stack_arg_pointer == NULL_RTX)
26615 rtx pat;
26617 cfun->machine->split_stack_arg_pointer = gen_reg_rtx (Pmode);
26618 REG_POINTER (cfun->machine->split_stack_arg_pointer) = 1;
26620 /* Put the pseudo initialization right after the note at the
26621 beginning of the function. */
26622 pat = gen_rtx_SET (cfun->machine->split_stack_arg_pointer,
26623 gen_rtx_REG (Pmode, 12));
26624 push_topmost_sequence ();
26625 emit_insn_after (pat, get_insns ());
26626 pop_topmost_sequence ();
26628 return plus_constant (Pmode, cfun->machine->split_stack_arg_pointer,
26629 FIRST_PARM_OFFSET (current_function_decl));
26631 return virtual_incoming_args_rtx;
26634 /* We may have to tell the dataflow pass that the split stack prologue
26635 is initializing a register. */
26637 static void
26638 rs6000_live_on_entry (bitmap regs)
26640 if (flag_split_stack)
26641 bitmap_set_bit (regs, 12);
26644 /* Emit -fsplit-stack dynamic stack allocation space check. */
26646 void
26647 rs6000_split_stack_space_check (rtx size, rtx label)
26649 rtx sp = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
26650 rtx limit = gen_reg_rtx (Pmode);
26651 rtx requested = gen_reg_rtx (Pmode);
26652 rtx cmp = gen_reg_rtx (CCUNSmode);
26653 rtx jump;
26655 emit_insn (gen_load_split_stack_limit (limit));
26656 if (CONST_INT_P (size))
26657 emit_insn (gen_add3_insn (requested, sp, GEN_INT (-INTVAL (size))));
26658 else
26660 size = force_reg (Pmode, size);
26661 emit_move_insn (requested, gen_rtx_MINUS (Pmode, sp, size));
26663 emit_insn (gen_rtx_SET (cmp, gen_rtx_COMPARE (CCUNSmode, requested, limit)));
26664 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
26665 gen_rtx_GEU (VOIDmode, cmp, const0_rtx),
26666 gen_rtx_LABEL_REF (VOIDmode, label),
26667 pc_rtx);
26668 jump = emit_jump_insn (gen_rtx_SET (pc_rtx, jump));
26669 JUMP_LABEL (jump) = label;
26672 /* A C compound statement that outputs the assembler code for a thunk
26673 function, used to implement C++ virtual function calls with
26674 multiple inheritance. The thunk acts as a wrapper around a virtual
26675 function, adjusting the implicit object parameter before handing
26676 control off to the real function.
26678 First, emit code to add the integer DELTA to the location that
26679 contains the incoming first argument. Assume that this argument
26680 contains a pointer, and is the one used to pass the `this' pointer
26681 in C++. This is the incoming argument *before* the function
26682 prologue, e.g. `%o0' on a sparc. The addition must preserve the
26683 values of all other incoming arguments.
26685 After the addition, emit code to jump to FUNCTION, which is a
26686 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
26687 not touch the return address. Hence returning from FUNCTION will
26688 return to whoever called the current `thunk'.
26690 The effect must be as if FUNCTION had been called directly with the
26691 adjusted first argument. This macro is responsible for emitting
26692 all of the code for a thunk function; output_function_prologue()
26693 and output_function_epilogue() are not invoked.
26695 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
26696 been extracted from it.) It might possibly be useful on some
26697 targets, but probably not.
26699 If you do not define this macro, the target-independent code in the
26700 C++ frontend will generate a less efficient heavyweight thunk that
26701 calls FUNCTION instead of jumping to it. The generic approach does
26702 not support varargs. */
26704 static void
26705 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
26706 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
26707 tree function)
26709 rtx this_rtx, funexp;
26710 rtx_insn *insn;
26712 reload_completed = 1;
26713 epilogue_completed = 1;
26715 /* Mark the end of the (empty) prologue. */
26716 emit_note (NOTE_INSN_PROLOGUE_END);
26718 /* Find the "this" pointer. If the function returns a structure,
26719 the structure return pointer is in r3. */
26720 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
26721 this_rtx = gen_rtx_REG (Pmode, 4);
26722 else
26723 this_rtx = gen_rtx_REG (Pmode, 3);
26725 /* Apply the constant offset, if required. */
26726 if (delta)
26727 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
26729 /* Apply the offset from the vtable, if required. */
26730 if (vcall_offset)
26732 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
26733 rtx tmp = gen_rtx_REG (Pmode, 12);
26735 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
26736 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
26738 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
26739 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
26741 else
26743 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
26745 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
26747 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
26750 /* Generate a tail call to the target function. */
26751 if (!TREE_USED (function))
26753 assemble_external (function);
26754 TREE_USED (function) = 1;
26756 funexp = XEXP (DECL_RTL (function), 0);
26757 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26759 #if TARGET_MACHO
26760 if (MACHOPIC_INDIRECT)
26761 funexp = machopic_indirect_call_target (funexp);
26762 #endif
26764 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
26765 generate sibcall RTL explicitly. */
26766 insn = emit_call_insn (
26767 gen_rtx_PARALLEL (VOIDmode,
26768 gen_rtvec (4,
26769 gen_rtx_CALL (VOIDmode,
26770 funexp, const0_rtx),
26771 gen_rtx_USE (VOIDmode, const0_rtx),
26772 gen_rtx_USE (VOIDmode,
26773 gen_rtx_REG (SImode,
26774 LR_REGNO)),
26775 simple_return_rtx)));
26776 SIBLING_CALL_P (insn) = 1;
26777 emit_barrier ();
26779 /* Ensure we have a global entry point for the thunk. ??? We could
26780 avoid that if the target routine doesn't need a global entry point,
26781 but we do not know whether this is the case at this point. */
26782 if (DEFAULT_ABI == ABI_ELFv2)
26783 cfun->machine->r2_setup_needed = true;
26785 /* Run just enough of rest_of_compilation to get the insns emitted.
26786 There's not really enough bulk here to make other passes such as
26787 instruction scheduling worth while. Note that use_thunk calls
26788 assemble_start_function and assemble_end_function. */
26789 insn = get_insns ();
26790 shorten_branches (insn);
26791 final_start_function (insn, file, 1);
26792 final (insn, file, 1);
26793 final_end_function ();
26795 reload_completed = 0;
26796 epilogue_completed = 0;
26799 /* A quick summary of the various types of 'constant-pool tables'
26800 under PowerPC:
26802 Target Flags Name One table per
26803 AIX (none) AIX TOC object file
26804 AIX -mfull-toc AIX TOC object file
26805 AIX -mminimal-toc AIX minimal TOC translation unit
26806 SVR4/EABI (none) SVR4 SDATA object file
26807 SVR4/EABI -fpic SVR4 pic object file
26808 SVR4/EABI -fPIC SVR4 PIC translation unit
26809 SVR4/EABI -mrelocatable EABI TOC function
26810 SVR4/EABI -maix AIX TOC object file
26811 SVR4/EABI -maix -mminimal-toc
26812 AIX minimal TOC translation unit
26814 Name Reg. Set by entries contains:
26815 made by addrs? fp? sum?
26817 AIX TOC 2 crt0 as Y option option
26818 AIX minimal TOC 30 prolog gcc Y Y option
26819 SVR4 SDATA 13 crt0 gcc N Y N
26820 SVR4 pic 30 prolog ld Y not yet N
26821 SVR4 PIC 30 prolog gcc Y option option
26822 EABI TOC 30 prolog gcc Y option option
26826 /* Hash functions for the hash table. */
26828 static unsigned
26829 rs6000_hash_constant (rtx k)
26831 enum rtx_code code = GET_CODE (k);
26832 machine_mode mode = GET_MODE (k);
26833 unsigned result = (code << 3) ^ mode;
26834 const char *format;
26835 int flen, fidx;
26837 format = GET_RTX_FORMAT (code);
26838 flen = strlen (format);
26839 fidx = 0;
26841 switch (code)
26843 case LABEL_REF:
26844 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
26846 case CONST_WIDE_INT:
26848 int i;
26849 flen = CONST_WIDE_INT_NUNITS (k);
26850 for (i = 0; i < flen; i++)
26851 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
26852 return result;
26855 case CONST_DOUBLE:
26856 if (mode != VOIDmode)
26857 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
26858 flen = 2;
26859 break;
26861 case CODE_LABEL:
26862 fidx = 3;
26863 break;
26865 default:
26866 break;
26869 for (; fidx < flen; fidx++)
26870 switch (format[fidx])
26872 case 's':
26874 unsigned i, len;
26875 const char *str = XSTR (k, fidx);
26876 len = strlen (str);
26877 result = result * 613 + len;
26878 for (i = 0; i < len; i++)
26879 result = result * 613 + (unsigned) str[i];
26880 break;
26882 case 'u':
26883 case 'e':
26884 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
26885 break;
26886 case 'i':
26887 case 'n':
26888 result = result * 613 + (unsigned) XINT (k, fidx);
26889 break;
26890 case 'w':
26891 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
26892 result = result * 613 + (unsigned) XWINT (k, fidx);
26893 else
26895 size_t i;
26896 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
26897 result = result * 613 + (unsigned) (XWINT (k, fidx)
26898 >> CHAR_BIT * i);
26900 break;
26901 case '0':
26902 break;
26903 default:
26904 gcc_unreachable ();
26907 return result;
26910 hashval_t
26911 toc_hasher::hash (toc_hash_struct *thc)
26913 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
26916 /* Compare H1 and H2 for equivalence. */
26918 bool
26919 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
26921 rtx r1 = h1->key;
26922 rtx r2 = h2->key;
26924 if (h1->key_mode != h2->key_mode)
26925 return 0;
26927 return rtx_equal_p (r1, r2);
26930 /* These are the names given by the C++ front-end to vtables, and
26931 vtable-like objects. Ideally, this logic should not be here;
26932 instead, there should be some programmatic way of inquiring as
26933 to whether or not an object is a vtable. */
26935 #define VTABLE_NAME_P(NAME) \
26936 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
26937 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
26938 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
26939 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
26940 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
26942 #ifdef NO_DOLLAR_IN_LABEL
26943 /* Return a GGC-allocated character string translating dollar signs in
26944 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
26946 const char *
26947 rs6000_xcoff_strip_dollar (const char *name)
26949 char *strip, *p;
26950 const char *q;
26951 size_t len;
26953 q = (const char *) strchr (name, '$');
26955 if (q == 0 || q == name)
26956 return name;
26958 len = strlen (name);
26959 strip = XALLOCAVEC (char, len + 1);
26960 strcpy (strip, name);
26961 p = strip + (q - name);
26962 while (p)
26964 *p = '_';
26965 p = strchr (p + 1, '$');
26968 return ggc_alloc_string (strip, len);
26970 #endif
26972 void
26973 rs6000_output_symbol_ref (FILE *file, rtx x)
26975 /* Currently C++ toc references to vtables can be emitted before it
26976 is decided whether the vtable is public or private. If this is
26977 the case, then the linker will eventually complain that there is
26978 a reference to an unknown section. Thus, for vtables only,
26979 we emit the TOC reference to reference the symbol and not the
26980 section. */
26981 const char *name = XSTR (x, 0);
26983 tree decl = SYMBOL_REF_DECL (x);
26984 if (decl /* sync condition with assemble_external () */
26985 && DECL_P (decl) && DECL_EXTERNAL (decl) && TREE_PUBLIC (decl)
26986 && (TREE_CODE (decl) == VAR_DECL
26987 || TREE_CODE (decl) == FUNCTION_DECL)
26988 && name[strlen (name) - 1] != ']')
26990 name = concat (name,
26991 (TREE_CODE (decl) == FUNCTION_DECL
26992 ? "[DS]" : "[UA]"),
26993 NULL);
26994 XSTR (x, 0) = name;
26997 if (VTABLE_NAME_P (name))
26999 RS6000_OUTPUT_BASENAME (file, name);
27001 else
27002 assemble_name (file, name);
27005 /* Output a TOC entry. We derive the entry name from what is being
27006 written. */
27008 void
27009 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
27011 char buf[256];
27012 const char *name = buf;
27013 rtx base = x;
27014 HOST_WIDE_INT offset = 0;
27016 gcc_assert (!TARGET_NO_TOC);
27018 /* When the linker won't eliminate them, don't output duplicate
27019 TOC entries (this happens on AIX if there is any kind of TOC,
27020 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
27021 CODE_LABELs. */
27022 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
27024 struct toc_hash_struct *h;
27026 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
27027 time because GGC is not initialized at that point. */
27028 if (toc_hash_table == NULL)
27029 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
27031 h = ggc_alloc<toc_hash_struct> ();
27032 h->key = x;
27033 h->key_mode = mode;
27034 h->labelno = labelno;
27036 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
27037 if (*found == NULL)
27038 *found = h;
27039 else /* This is indeed a duplicate.
27040 Set this label equal to that label. */
27042 fputs ("\t.set ", file);
27043 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
27044 fprintf (file, "%d,", labelno);
27045 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
27046 fprintf (file, "%d\n", ((*found)->labelno));
27048 #ifdef HAVE_AS_TLS
27049 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
27050 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
27051 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
27053 fputs ("\t.set ", file);
27054 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
27055 fprintf (file, "%d,", labelno);
27056 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
27057 fprintf (file, "%d\n", ((*found)->labelno));
27059 #endif
27060 return;
27064 /* If we're going to put a double constant in the TOC, make sure it's
27065 aligned properly when strict alignment is on. */
27066 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
27067 && STRICT_ALIGNMENT
27068 && GET_MODE_BITSIZE (mode) >= 64
27069 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
27070 ASM_OUTPUT_ALIGN (file, 3);
27073 (*targetm.asm_out.internal_label) (file, "LC", labelno);
27075 /* Handle FP constants specially. Note that if we have a minimal
27076 TOC, things we put here aren't actually in the TOC, so we can allow
27077 FP constants. */
27078 if (GET_CODE (x) == CONST_DOUBLE &&
27079 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode
27080 || GET_MODE (x) == IFmode || GET_MODE (x) == KFmode))
27082 REAL_VALUE_TYPE rv;
27083 long k[4];
27085 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
27086 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
27087 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
27088 else
27089 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
27091 if (TARGET_64BIT)
27093 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27094 fputs (DOUBLE_INT_ASM_OP, file);
27095 else
27096 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
27097 k[0] & 0xffffffff, k[1] & 0xffffffff,
27098 k[2] & 0xffffffff, k[3] & 0xffffffff);
27099 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
27100 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
27101 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
27102 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
27103 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
27104 return;
27106 else
27108 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27109 fputs ("\t.long ", file);
27110 else
27111 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
27112 k[0] & 0xffffffff, k[1] & 0xffffffff,
27113 k[2] & 0xffffffff, k[3] & 0xffffffff);
27114 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
27115 k[0] & 0xffffffff, k[1] & 0xffffffff,
27116 k[2] & 0xffffffff, k[3] & 0xffffffff);
27117 return;
27120 else if (GET_CODE (x) == CONST_DOUBLE &&
27121 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
27123 REAL_VALUE_TYPE rv;
27124 long k[2];
27126 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
27128 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
27129 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
27130 else
27131 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
27133 if (TARGET_64BIT)
27135 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27136 fputs (DOUBLE_INT_ASM_OP, file);
27137 else
27138 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
27139 k[0] & 0xffffffff, k[1] & 0xffffffff);
27140 fprintf (file, "0x%lx%08lx\n",
27141 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
27142 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
27143 return;
27145 else
27147 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27148 fputs ("\t.long ", file);
27149 else
27150 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
27151 k[0] & 0xffffffff, k[1] & 0xffffffff);
27152 fprintf (file, "0x%lx,0x%lx\n",
27153 k[0] & 0xffffffff, k[1] & 0xffffffff);
27154 return;
27157 else if (GET_CODE (x) == CONST_DOUBLE &&
27158 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
27160 REAL_VALUE_TYPE rv;
27161 long l;
27163 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
27164 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
27165 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
27166 else
27167 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
27169 if (TARGET_64BIT)
27171 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27172 fputs (DOUBLE_INT_ASM_OP, file);
27173 else
27174 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
27175 if (WORDS_BIG_ENDIAN)
27176 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
27177 else
27178 fprintf (file, "0x%lx\n", l & 0xffffffff);
27179 return;
27181 else
27183 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27184 fputs ("\t.long ", file);
27185 else
27186 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
27187 fprintf (file, "0x%lx\n", l & 0xffffffff);
27188 return;
27191 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
27193 unsigned HOST_WIDE_INT low;
27194 HOST_WIDE_INT high;
27196 low = INTVAL (x) & 0xffffffff;
27197 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
27199 /* TOC entries are always Pmode-sized, so when big-endian
27200 smaller integer constants in the TOC need to be padded.
27201 (This is still a win over putting the constants in
27202 a separate constant pool, because then we'd have
27203 to have both a TOC entry _and_ the actual constant.)
27205 For a 32-bit target, CONST_INT values are loaded and shifted
27206 entirely within `low' and can be stored in one TOC entry. */
27208 /* It would be easy to make this work, but it doesn't now. */
27209 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
27211 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
27213 low |= high << 32;
27214 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
27215 high = (HOST_WIDE_INT) low >> 32;
27216 low &= 0xffffffff;
27219 if (TARGET_64BIT)
27221 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27222 fputs (DOUBLE_INT_ASM_OP, file);
27223 else
27224 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
27225 (long) high & 0xffffffff, (long) low & 0xffffffff);
27226 fprintf (file, "0x%lx%08lx\n",
27227 (long) high & 0xffffffff, (long) low & 0xffffffff);
27228 return;
27230 else
27232 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
27234 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27235 fputs ("\t.long ", file);
27236 else
27237 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
27238 (long) high & 0xffffffff, (long) low & 0xffffffff);
27239 fprintf (file, "0x%lx,0x%lx\n",
27240 (long) high & 0xffffffff, (long) low & 0xffffffff);
27242 else
27244 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27245 fputs ("\t.long ", file);
27246 else
27247 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
27248 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
27250 return;
27254 if (GET_CODE (x) == CONST)
27256 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
27257 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
27259 base = XEXP (XEXP (x, 0), 0);
27260 offset = INTVAL (XEXP (XEXP (x, 0), 1));
27263 switch (GET_CODE (base))
27265 case SYMBOL_REF:
27266 name = XSTR (base, 0);
27267 break;
27269 case LABEL_REF:
27270 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
27271 CODE_LABEL_NUMBER (XEXP (base, 0)));
27272 break;
27274 case CODE_LABEL:
27275 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
27276 break;
27278 default:
27279 gcc_unreachable ();
27282 if (TARGET_ELF || TARGET_MINIMAL_TOC)
27283 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
27284 else
27286 fputs ("\t.tc ", file);
27287 RS6000_OUTPUT_BASENAME (file, name);
27289 if (offset < 0)
27290 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
27291 else if (offset)
27292 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
27294 /* Mark large TOC symbols on AIX with [TE] so they are mapped
27295 after other TOC symbols, reducing overflow of small TOC access
27296 to [TC] symbols. */
27297 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
27298 ? "[TE]," : "[TC],", file);
27301 /* Currently C++ toc references to vtables can be emitted before it
27302 is decided whether the vtable is public or private. If this is
27303 the case, then the linker will eventually complain that there is
27304 a TOC reference to an unknown section. Thus, for vtables only,
27305 we emit the TOC reference to reference the symbol and not the
27306 section. */
27307 if (VTABLE_NAME_P (name))
27309 RS6000_OUTPUT_BASENAME (file, name);
27310 if (offset < 0)
27311 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
27312 else if (offset > 0)
27313 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
27315 else
27316 output_addr_const (file, x);
27318 #if HAVE_AS_TLS
27319 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
27320 && SYMBOL_REF_TLS_MODEL (base) != 0)
27322 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
27323 fputs ("@le", file);
27324 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
27325 fputs ("@ie", file);
27326 /* Use global-dynamic for local-dynamic. */
27327 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
27328 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
27330 putc ('\n', file);
27331 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
27332 fputs ("\t.tc .", file);
27333 RS6000_OUTPUT_BASENAME (file, name);
27334 fputs ("[TC],", file);
27335 output_addr_const (file, x);
27336 fputs ("@m", file);
27339 #endif
27341 putc ('\n', file);
27344 /* Output an assembler pseudo-op to write an ASCII string of N characters
27345 starting at P to FILE.
27347 On the RS/6000, we have to do this using the .byte operation and
27348 write out special characters outside the quoted string.
27349 Also, the assembler is broken; very long strings are truncated,
27350 so we must artificially break them up early. */
27352 void
27353 output_ascii (FILE *file, const char *p, int n)
27355 char c;
27356 int i, count_string;
27357 const char *for_string = "\t.byte \"";
27358 const char *for_decimal = "\t.byte ";
27359 const char *to_close = NULL;
27361 count_string = 0;
27362 for (i = 0; i < n; i++)
27364 c = *p++;
27365 if (c >= ' ' && c < 0177)
27367 if (for_string)
27368 fputs (for_string, file);
27369 putc (c, file);
27371 /* Write two quotes to get one. */
27372 if (c == '"')
27374 putc (c, file);
27375 ++count_string;
27378 for_string = NULL;
27379 for_decimal = "\"\n\t.byte ";
27380 to_close = "\"\n";
27381 ++count_string;
27383 if (count_string >= 512)
27385 fputs (to_close, file);
27387 for_string = "\t.byte \"";
27388 for_decimal = "\t.byte ";
27389 to_close = NULL;
27390 count_string = 0;
27393 else
27395 if (for_decimal)
27396 fputs (for_decimal, file);
27397 fprintf (file, "%d", c);
27399 for_string = "\n\t.byte \"";
27400 for_decimal = ", ";
27401 to_close = "\n";
27402 count_string = 0;
27406 /* Now close the string if we have written one. Then end the line. */
27407 if (to_close)
27408 fputs (to_close, file);
27411 /* Generate a unique section name for FILENAME for a section type
27412 represented by SECTION_DESC. Output goes into BUF.
27414 SECTION_DESC can be any string, as long as it is different for each
27415 possible section type.
27417 We name the section in the same manner as xlc. The name begins with an
27418 underscore followed by the filename (after stripping any leading directory
27419 names) with the last period replaced by the string SECTION_DESC. If
27420 FILENAME does not contain a period, SECTION_DESC is appended to the end of
27421 the name. */
27423 void
27424 rs6000_gen_section_name (char **buf, const char *filename,
27425 const char *section_desc)
27427 const char *q, *after_last_slash, *last_period = 0;
27428 char *p;
27429 int len;
27431 after_last_slash = filename;
27432 for (q = filename; *q; q++)
27434 if (*q == '/')
27435 after_last_slash = q + 1;
27436 else if (*q == '.')
27437 last_period = q;
27440 len = strlen (after_last_slash) + strlen (section_desc) + 2;
27441 *buf = (char *) xmalloc (len);
27443 p = *buf;
27444 *p++ = '_';
27446 for (q = after_last_slash; *q; q++)
27448 if (q == last_period)
27450 strcpy (p, section_desc);
27451 p += strlen (section_desc);
27452 break;
27455 else if (ISALNUM (*q))
27456 *p++ = *q;
27459 if (last_period == 0)
27460 strcpy (p, section_desc);
27461 else
27462 *p = '\0';
27465 /* Emit profile function. */
27467 void
27468 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
27470 /* Non-standard profiling for kernels, which just saves LR then calls
27471 _mcount without worrying about arg saves. The idea is to change
27472 the function prologue as little as possible as it isn't easy to
27473 account for arg save/restore code added just for _mcount. */
27474 if (TARGET_PROFILE_KERNEL)
27475 return;
27477 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
27479 #ifndef NO_PROFILE_COUNTERS
27480 # define NO_PROFILE_COUNTERS 0
27481 #endif
27482 if (NO_PROFILE_COUNTERS)
27483 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
27484 LCT_NORMAL, VOIDmode, 0);
27485 else
27487 char buf[30];
27488 const char *label_name;
27489 rtx fun;
27491 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
27492 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
27493 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
27495 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
27496 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
27499 else if (DEFAULT_ABI == ABI_DARWIN)
27501 const char *mcount_name = RS6000_MCOUNT;
27502 int caller_addr_regno = LR_REGNO;
27504 /* Be conservative and always set this, at least for now. */
27505 crtl->uses_pic_offset_table = 1;
27507 #if TARGET_MACHO
27508 /* For PIC code, set up a stub and collect the caller's address
27509 from r0, which is where the prologue puts it. */
27510 if (MACHOPIC_INDIRECT
27511 && crtl->uses_pic_offset_table)
27512 caller_addr_regno = 0;
27513 #endif
27514 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
27515 LCT_NORMAL, VOIDmode, 1,
27516 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
27520 /* Write function profiler code. */
27522 void
27523 output_function_profiler (FILE *file, int labelno)
27525 char buf[100];
27527 switch (DEFAULT_ABI)
27529 default:
27530 gcc_unreachable ();
27532 case ABI_V4:
27533 if (!TARGET_32BIT)
27535 warning (0, "no profiling of 64-bit code for this ABI");
27536 return;
27538 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
27539 fprintf (file, "\tmflr %s\n", reg_names[0]);
27540 if (NO_PROFILE_COUNTERS)
27542 asm_fprintf (file, "\tstw %s,4(%s)\n",
27543 reg_names[0], reg_names[1]);
27545 else if (TARGET_SECURE_PLT && flag_pic)
27547 if (TARGET_LINK_STACK)
27549 char name[32];
27550 get_ppc476_thunk_name (name);
27551 asm_fprintf (file, "\tbl %s\n", name);
27553 else
27554 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
27555 asm_fprintf (file, "\tstw %s,4(%s)\n",
27556 reg_names[0], reg_names[1]);
27557 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
27558 asm_fprintf (file, "\taddis %s,%s,",
27559 reg_names[12], reg_names[12]);
27560 assemble_name (file, buf);
27561 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
27562 assemble_name (file, buf);
27563 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
27565 else if (flag_pic == 1)
27567 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
27568 asm_fprintf (file, "\tstw %s,4(%s)\n",
27569 reg_names[0], reg_names[1]);
27570 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
27571 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
27572 assemble_name (file, buf);
27573 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
27575 else if (flag_pic > 1)
27577 asm_fprintf (file, "\tstw %s,4(%s)\n",
27578 reg_names[0], reg_names[1]);
27579 /* Now, we need to get the address of the label. */
27580 if (TARGET_LINK_STACK)
27582 char name[32];
27583 get_ppc476_thunk_name (name);
27584 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
27585 assemble_name (file, buf);
27586 fputs ("-.\n1:", file);
27587 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
27588 asm_fprintf (file, "\taddi %s,%s,4\n",
27589 reg_names[11], reg_names[11]);
27591 else
27593 fputs ("\tbcl 20,31,1f\n\t.long ", file);
27594 assemble_name (file, buf);
27595 fputs ("-.\n1:", file);
27596 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
27598 asm_fprintf (file, "\tlwz %s,0(%s)\n",
27599 reg_names[0], reg_names[11]);
27600 asm_fprintf (file, "\tadd %s,%s,%s\n",
27601 reg_names[0], reg_names[0], reg_names[11]);
27603 else
27605 asm_fprintf (file, "\tlis %s,", reg_names[12]);
27606 assemble_name (file, buf);
27607 fputs ("@ha\n", file);
27608 asm_fprintf (file, "\tstw %s,4(%s)\n",
27609 reg_names[0], reg_names[1]);
27610 asm_fprintf (file, "\tla %s,", reg_names[0]);
27611 assemble_name (file, buf);
27612 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
27615 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
27616 fprintf (file, "\tbl %s%s\n",
27617 RS6000_MCOUNT, flag_pic ? "@plt" : "");
27618 break;
27620 case ABI_AIX:
27621 case ABI_ELFv2:
27622 case ABI_DARWIN:
27623 /* Don't do anything, done in output_profile_hook (). */
27624 break;
27630 /* The following variable value is the last issued insn. */
27632 static rtx last_scheduled_insn;
27634 /* The following variable helps to balance issuing of load and
27635 store instructions */
27637 static int load_store_pendulum;
27639 /* Power4 load update and store update instructions are cracked into a
27640 load or store and an integer insn which are executed in the same cycle.
27641 Branches have their own dispatch slot which does not count against the
27642 GCC issue rate, but it changes the program flow so there are no other
27643 instructions to issue in this cycle. */
27645 static int
27646 rs6000_variable_issue_1 (rtx_insn *insn, int more)
27648 last_scheduled_insn = insn;
27649 if (GET_CODE (PATTERN (insn)) == USE
27650 || GET_CODE (PATTERN (insn)) == CLOBBER)
27652 cached_can_issue_more = more;
27653 return cached_can_issue_more;
27656 if (insn_terminates_group_p (insn, current_group))
27658 cached_can_issue_more = 0;
27659 return cached_can_issue_more;
27662 /* If no reservation, but reach here */
27663 if (recog_memoized (insn) < 0)
27664 return more;
27666 if (rs6000_sched_groups)
27668 if (is_microcoded_insn (insn))
27669 cached_can_issue_more = 0;
27670 else if (is_cracked_insn (insn))
27671 cached_can_issue_more = more > 2 ? more - 2 : 0;
27672 else
27673 cached_can_issue_more = more - 1;
27675 return cached_can_issue_more;
27678 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
27679 return 0;
27681 cached_can_issue_more = more - 1;
27682 return cached_can_issue_more;
27685 static int
27686 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
27688 int r = rs6000_variable_issue_1 (insn, more);
27689 if (verbose)
27690 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
27691 return r;
27694 /* Adjust the cost of a scheduling dependency. Return the new cost of
27695 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
27697 static int
27698 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
27700 enum attr_type attr_type;
27702 if (! recog_memoized (insn))
27703 return 0;
27705 switch (REG_NOTE_KIND (link))
27707 case REG_DEP_TRUE:
27709 /* Data dependency; DEP_INSN writes a register that INSN reads
27710 some cycles later. */
27712 /* Separate a load from a narrower, dependent store. */
27713 if (rs6000_sched_groups
27714 && GET_CODE (PATTERN (insn)) == SET
27715 && GET_CODE (PATTERN (dep_insn)) == SET
27716 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
27717 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
27718 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
27719 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
27720 return cost + 14;
27722 attr_type = get_attr_type (insn);
27724 switch (attr_type)
27726 case TYPE_JMPREG:
27727 /* Tell the first scheduling pass about the latency between
27728 a mtctr and bctr (and mtlr and br/blr). The first
27729 scheduling pass will not know about this latency since
27730 the mtctr instruction, which has the latency associated
27731 to it, will be generated by reload. */
27732 return 4;
27733 case TYPE_BRANCH:
27734 /* Leave some extra cycles between a compare and its
27735 dependent branch, to inhibit expensive mispredicts. */
27736 if ((rs6000_cpu_attr == CPU_PPC603
27737 || rs6000_cpu_attr == CPU_PPC604
27738 || rs6000_cpu_attr == CPU_PPC604E
27739 || rs6000_cpu_attr == CPU_PPC620
27740 || rs6000_cpu_attr == CPU_PPC630
27741 || rs6000_cpu_attr == CPU_PPC750
27742 || rs6000_cpu_attr == CPU_PPC7400
27743 || rs6000_cpu_attr == CPU_PPC7450
27744 || rs6000_cpu_attr == CPU_PPCE5500
27745 || rs6000_cpu_attr == CPU_PPCE6500
27746 || rs6000_cpu_attr == CPU_POWER4
27747 || rs6000_cpu_attr == CPU_POWER5
27748 || rs6000_cpu_attr == CPU_POWER7
27749 || rs6000_cpu_attr == CPU_POWER8
27750 || rs6000_cpu_attr == CPU_CELL)
27751 && recog_memoized (dep_insn)
27752 && (INSN_CODE (dep_insn) >= 0))
27754 switch (get_attr_type (dep_insn))
27756 case TYPE_CMP:
27757 case TYPE_FPCOMPARE:
27758 case TYPE_CR_LOGICAL:
27759 case TYPE_DELAYED_CR:
27760 return cost + 2;
27761 case TYPE_EXTS:
27762 case TYPE_MUL:
27763 if (get_attr_dot (dep_insn) == DOT_YES)
27764 return cost + 2;
27765 else
27766 break;
27767 case TYPE_SHIFT:
27768 if (get_attr_dot (dep_insn) == DOT_YES
27769 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
27770 return cost + 2;
27771 else
27772 break;
27773 default:
27774 break;
27776 break;
27778 case TYPE_STORE:
27779 case TYPE_FPSTORE:
27780 if ((rs6000_cpu == PROCESSOR_POWER6)
27781 && recog_memoized (dep_insn)
27782 && (INSN_CODE (dep_insn) >= 0))
27785 if (GET_CODE (PATTERN (insn)) != SET)
27786 /* If this happens, we have to extend this to schedule
27787 optimally. Return default for now. */
27788 return cost;
27790 /* Adjust the cost for the case where the value written
27791 by a fixed point operation is used as the address
27792 gen value on a store. */
27793 switch (get_attr_type (dep_insn))
27795 case TYPE_LOAD:
27796 case TYPE_CNTLZ:
27798 if (! store_data_bypass_p (dep_insn, insn))
27799 return get_attr_sign_extend (dep_insn)
27800 == SIGN_EXTEND_YES ? 6 : 4;
27801 break;
27803 case TYPE_SHIFT:
27805 if (! store_data_bypass_p (dep_insn, insn))
27806 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
27807 6 : 3;
27808 break;
27810 case TYPE_INTEGER:
27811 case TYPE_ADD:
27812 case TYPE_LOGICAL:
27813 case TYPE_EXTS:
27814 case TYPE_INSERT:
27816 if (! store_data_bypass_p (dep_insn, insn))
27817 return 3;
27818 break;
27820 case TYPE_STORE:
27821 case TYPE_FPLOAD:
27822 case TYPE_FPSTORE:
27824 if (get_attr_update (dep_insn) == UPDATE_YES
27825 && ! store_data_bypass_p (dep_insn, insn))
27826 return 3;
27827 break;
27829 case TYPE_MUL:
27831 if (! store_data_bypass_p (dep_insn, insn))
27832 return 17;
27833 break;
27835 case TYPE_DIV:
27837 if (! store_data_bypass_p (dep_insn, insn))
27838 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
27839 break;
27841 default:
27842 break;
27845 break;
27847 case TYPE_LOAD:
27848 if ((rs6000_cpu == PROCESSOR_POWER6)
27849 && recog_memoized (dep_insn)
27850 && (INSN_CODE (dep_insn) >= 0))
27853 /* Adjust the cost for the case where the value written
27854 by a fixed point instruction is used within the address
27855 gen portion of a subsequent load(u)(x) */
27856 switch (get_attr_type (dep_insn))
27858 case TYPE_LOAD:
27859 case TYPE_CNTLZ:
27861 if (set_to_load_agen (dep_insn, insn))
27862 return get_attr_sign_extend (dep_insn)
27863 == SIGN_EXTEND_YES ? 6 : 4;
27864 break;
27866 case TYPE_SHIFT:
27868 if (set_to_load_agen (dep_insn, insn))
27869 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
27870 6 : 3;
27871 break;
27873 case TYPE_INTEGER:
27874 case TYPE_ADD:
27875 case TYPE_LOGICAL:
27876 case TYPE_EXTS:
27877 case TYPE_INSERT:
27879 if (set_to_load_agen (dep_insn, insn))
27880 return 3;
27881 break;
27883 case TYPE_STORE:
27884 case TYPE_FPLOAD:
27885 case TYPE_FPSTORE:
27887 if (get_attr_update (dep_insn) == UPDATE_YES
27888 && set_to_load_agen (dep_insn, insn))
27889 return 3;
27890 break;
27892 case TYPE_MUL:
27894 if (set_to_load_agen (dep_insn, insn))
27895 return 17;
27896 break;
27898 case TYPE_DIV:
27900 if (set_to_load_agen (dep_insn, insn))
27901 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
27902 break;
27904 default:
27905 break;
27908 break;
27910 case TYPE_FPLOAD:
27911 if ((rs6000_cpu == PROCESSOR_POWER6)
27912 && get_attr_update (insn) == UPDATE_NO
27913 && recog_memoized (dep_insn)
27914 && (INSN_CODE (dep_insn) >= 0)
27915 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
27916 return 2;
27918 default:
27919 break;
27922 /* Fall out to return default cost. */
27924 break;
27926 case REG_DEP_OUTPUT:
27927 /* Output dependency; DEP_INSN writes a register that INSN writes some
27928 cycles later. */
27929 if ((rs6000_cpu == PROCESSOR_POWER6)
27930 && recog_memoized (dep_insn)
27931 && (INSN_CODE (dep_insn) >= 0))
27933 attr_type = get_attr_type (insn);
27935 switch (attr_type)
27937 case TYPE_FP:
27938 if (get_attr_type (dep_insn) == TYPE_FP)
27939 return 1;
27940 break;
27941 case TYPE_FPLOAD:
27942 if (get_attr_update (insn) == UPDATE_NO
27943 && get_attr_type (dep_insn) == TYPE_MFFGPR)
27944 return 2;
27945 break;
27946 default:
27947 break;
27950 case REG_DEP_ANTI:
27951 /* Anti dependency; DEP_INSN reads a register that INSN writes some
27952 cycles later. */
27953 return 0;
27955 default:
27956 gcc_unreachable ();
27959 return cost;
27962 /* Debug version of rs6000_adjust_cost. */
27964 static int
27965 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
27966 int cost)
27968 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
27970 if (ret != cost)
27972 const char *dep;
27974 switch (REG_NOTE_KIND (link))
27976 default: dep = "unknown depencency"; break;
27977 case REG_DEP_TRUE: dep = "data dependency"; break;
27978 case REG_DEP_OUTPUT: dep = "output dependency"; break;
27979 case REG_DEP_ANTI: dep = "anti depencency"; break;
27982 fprintf (stderr,
27983 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
27984 "%s, insn:\n", ret, cost, dep);
27986 debug_rtx (insn);
27989 return ret;
27992 /* The function returns a true if INSN is microcoded.
27993 Return false otherwise. */
27995 static bool
27996 is_microcoded_insn (rtx_insn *insn)
27998 if (!insn || !NONDEBUG_INSN_P (insn)
27999 || GET_CODE (PATTERN (insn)) == USE
28000 || GET_CODE (PATTERN (insn)) == CLOBBER)
28001 return false;
28003 if (rs6000_cpu_attr == CPU_CELL)
28004 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
28006 if (rs6000_sched_groups
28007 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
28009 enum attr_type type = get_attr_type (insn);
28010 if ((type == TYPE_LOAD
28011 && get_attr_update (insn) == UPDATE_YES
28012 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
28013 || ((type == TYPE_LOAD || type == TYPE_STORE)
28014 && get_attr_update (insn) == UPDATE_YES
28015 && get_attr_indexed (insn) == INDEXED_YES)
28016 || type == TYPE_MFCR)
28017 return true;
28020 return false;
28023 /* The function returns true if INSN is cracked into 2 instructions
28024 by the processor (and therefore occupies 2 issue slots). */
28026 static bool
28027 is_cracked_insn (rtx_insn *insn)
28029 if (!insn || !NONDEBUG_INSN_P (insn)
28030 || GET_CODE (PATTERN (insn)) == USE
28031 || GET_CODE (PATTERN (insn)) == CLOBBER)
28032 return false;
28034 if (rs6000_sched_groups
28035 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
28037 enum attr_type type = get_attr_type (insn);
28038 if ((type == TYPE_LOAD
28039 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28040 && get_attr_update (insn) == UPDATE_NO)
28041 || (type == TYPE_LOAD
28042 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
28043 && get_attr_update (insn) == UPDATE_YES
28044 && get_attr_indexed (insn) == INDEXED_NO)
28045 || (type == TYPE_STORE
28046 && get_attr_update (insn) == UPDATE_YES
28047 && get_attr_indexed (insn) == INDEXED_NO)
28048 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
28049 && get_attr_update (insn) == UPDATE_YES)
28050 || type == TYPE_DELAYED_CR
28051 || (type == TYPE_EXTS
28052 && get_attr_dot (insn) == DOT_YES)
28053 || (type == TYPE_SHIFT
28054 && get_attr_dot (insn) == DOT_YES
28055 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
28056 || (type == TYPE_MUL
28057 && get_attr_dot (insn) == DOT_YES)
28058 || type == TYPE_DIV
28059 || (type == TYPE_INSERT
28060 && get_attr_size (insn) == SIZE_32))
28061 return true;
28064 return false;
28067 /* The function returns true if INSN can be issued only from
28068 the branch slot. */
28070 static bool
28071 is_branch_slot_insn (rtx_insn *insn)
28073 if (!insn || !NONDEBUG_INSN_P (insn)
28074 || GET_CODE (PATTERN (insn)) == USE
28075 || GET_CODE (PATTERN (insn)) == CLOBBER)
28076 return false;
28078 if (rs6000_sched_groups)
28080 enum attr_type type = get_attr_type (insn);
28081 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
28082 return true;
28083 return false;
28086 return false;
28089 /* The function returns true if out_inst sets a value that is
28090 used in the address generation computation of in_insn */
28091 static bool
28092 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
28094 rtx out_set, in_set;
28096 /* For performance reasons, only handle the simple case where
28097 both loads are a single_set. */
28098 out_set = single_set (out_insn);
28099 if (out_set)
28101 in_set = single_set (in_insn);
28102 if (in_set)
28103 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
28106 return false;
28109 /* Try to determine base/offset/size parts of the given MEM.
28110 Return true if successful, false if all the values couldn't
28111 be determined.
28113 This function only looks for REG or REG+CONST address forms.
28114 REG+REG address form will return false. */
28116 static bool
28117 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
28118 HOST_WIDE_INT *size)
28120 rtx addr_rtx;
28121 if MEM_SIZE_KNOWN_P (mem)
28122 *size = MEM_SIZE (mem);
28123 else
28124 return false;
28126 addr_rtx = (XEXP (mem, 0));
28127 if (GET_CODE (addr_rtx) == PRE_MODIFY)
28128 addr_rtx = XEXP (addr_rtx, 1);
28130 *offset = 0;
28131 while (GET_CODE (addr_rtx) == PLUS
28132 && CONST_INT_P (XEXP (addr_rtx, 1)))
28134 *offset += INTVAL (XEXP (addr_rtx, 1));
28135 addr_rtx = XEXP (addr_rtx, 0);
28137 if (!REG_P (addr_rtx))
28138 return false;
28140 *base = addr_rtx;
28141 return true;
28144 /* The function returns true if the target storage location of
28145 mem1 is adjacent to the target storage location of mem2 */
28146 /* Return 1 if memory locations are adjacent. */
28148 static bool
28149 adjacent_mem_locations (rtx mem1, rtx mem2)
28151 rtx reg1, reg2;
28152 HOST_WIDE_INT off1, size1, off2, size2;
28154 if (get_memref_parts (mem1, &reg1, &off1, &size1)
28155 && get_memref_parts (mem2, &reg2, &off2, &size2))
28156 return ((REGNO (reg1) == REGNO (reg2))
28157 && ((off1 + size1 == off2)
28158 || (off2 + size2 == off1)));
28160 return false;
28163 /* This function returns true if it can be determined that the two MEM
28164 locations overlap by at least 1 byte based on base reg/offset/size. */
28166 static bool
28167 mem_locations_overlap (rtx mem1, rtx mem2)
28169 rtx reg1, reg2;
28170 HOST_WIDE_INT off1, size1, off2, size2;
28172 if (get_memref_parts (mem1, &reg1, &off1, &size1)
28173 && get_memref_parts (mem2, &reg2, &off2, &size2))
28174 return ((REGNO (reg1) == REGNO (reg2))
28175 && (((off1 <= off2) && (off1 + size1 > off2))
28176 || ((off2 <= off1) && (off2 + size2 > off1))));
28178 return false;
28181 /* A C statement (sans semicolon) to update the integer scheduling
28182 priority INSN_PRIORITY (INSN). Increase the priority to execute the
28183 INSN earlier, reduce the priority to execute INSN later. Do not
28184 define this macro if you do not need to adjust the scheduling
28185 priorities of insns. */
28187 static int
28188 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
28190 rtx load_mem, str_mem;
28191 /* On machines (like the 750) which have asymmetric integer units,
28192 where one integer unit can do multiply and divides and the other
28193 can't, reduce the priority of multiply/divide so it is scheduled
28194 before other integer operations. */
28196 #if 0
28197 if (! INSN_P (insn))
28198 return priority;
28200 if (GET_CODE (PATTERN (insn)) == USE)
28201 return priority;
28203 switch (rs6000_cpu_attr) {
28204 case CPU_PPC750:
28205 switch (get_attr_type (insn))
28207 default:
28208 break;
28210 case TYPE_MUL:
28211 case TYPE_DIV:
28212 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
28213 priority, priority);
28214 if (priority >= 0 && priority < 0x01000000)
28215 priority >>= 3;
28216 break;
28219 #endif
28221 if (insn_must_be_first_in_group (insn)
28222 && reload_completed
28223 && current_sched_info->sched_max_insns_priority
28224 && rs6000_sched_restricted_insns_priority)
28227 /* Prioritize insns that can be dispatched only in the first
28228 dispatch slot. */
28229 if (rs6000_sched_restricted_insns_priority == 1)
28230 /* Attach highest priority to insn. This means that in
28231 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
28232 precede 'priority' (critical path) considerations. */
28233 return current_sched_info->sched_max_insns_priority;
28234 else if (rs6000_sched_restricted_insns_priority == 2)
28235 /* Increase priority of insn by a minimal amount. This means that in
28236 haifa-sched.c:ready_sort(), only 'priority' (critical path)
28237 considerations precede dispatch-slot restriction considerations. */
28238 return (priority + 1);
28241 if (rs6000_cpu == PROCESSOR_POWER6
28242 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
28243 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
28244 /* Attach highest priority to insn if the scheduler has just issued two
28245 stores and this instruction is a load, or two loads and this instruction
28246 is a store. Power6 wants loads and stores scheduled alternately
28247 when possible */
28248 return current_sched_info->sched_max_insns_priority;
28250 return priority;
28253 /* Return true if the instruction is nonpipelined on the Cell. */
28254 static bool
28255 is_nonpipeline_insn (rtx_insn *insn)
28257 enum attr_type type;
28258 if (!insn || !NONDEBUG_INSN_P (insn)
28259 || GET_CODE (PATTERN (insn)) == USE
28260 || GET_CODE (PATTERN (insn)) == CLOBBER)
28261 return false;
28263 type = get_attr_type (insn);
28264 if (type == TYPE_MUL
28265 || type == TYPE_DIV
28266 || type == TYPE_SDIV
28267 || type == TYPE_DDIV
28268 || type == TYPE_SSQRT
28269 || type == TYPE_DSQRT
28270 || type == TYPE_MFCR
28271 || type == TYPE_MFCRF
28272 || type == TYPE_MFJMPR)
28274 return true;
28276 return false;
28280 /* Return how many instructions the machine can issue per cycle. */
28282 static int
28283 rs6000_issue_rate (void)
28285 /* Unless scheduling for register pressure, use issue rate of 1 for
28286 first scheduling pass to decrease degradation. */
28287 if (!reload_completed && !flag_sched_pressure)
28288 return 1;
28290 switch (rs6000_cpu_attr) {
28291 case CPU_RS64A:
28292 case CPU_PPC601: /* ? */
28293 case CPU_PPC7450:
28294 return 3;
28295 case CPU_PPC440:
28296 case CPU_PPC603:
28297 case CPU_PPC750:
28298 case CPU_PPC7400:
28299 case CPU_PPC8540:
28300 case CPU_PPC8548:
28301 case CPU_CELL:
28302 case CPU_PPCE300C2:
28303 case CPU_PPCE300C3:
28304 case CPU_PPCE500MC:
28305 case CPU_PPCE500MC64:
28306 case CPU_PPCE5500:
28307 case CPU_PPCE6500:
28308 case CPU_TITAN:
28309 return 2;
28310 case CPU_PPC476:
28311 case CPU_PPC604:
28312 case CPU_PPC604E:
28313 case CPU_PPC620:
28314 case CPU_PPC630:
28315 return 4;
28316 case CPU_POWER4:
28317 case CPU_POWER5:
28318 case CPU_POWER6:
28319 case CPU_POWER7:
28320 return 5;
28321 case CPU_POWER8:
28322 return 7;
28323 default:
28324 return 1;
28328 /* Return how many instructions to look ahead for better insn
28329 scheduling. */
28331 static int
28332 rs6000_use_sched_lookahead (void)
28334 switch (rs6000_cpu_attr)
28336 case CPU_PPC8540:
28337 case CPU_PPC8548:
28338 return 4;
28340 case CPU_CELL:
28341 return (reload_completed ? 8 : 0);
28343 default:
28344 return 0;
28348 /* We are choosing insn from the ready queue. Return zero if INSN can be
28349 chosen. */
28350 static int
28351 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
28353 if (ready_index == 0)
28354 return 0;
28356 if (rs6000_cpu_attr != CPU_CELL)
28357 return 0;
28359 gcc_assert (insn != NULL_RTX && INSN_P (insn));
28361 if (!reload_completed
28362 || is_nonpipeline_insn (insn)
28363 || is_microcoded_insn (insn))
28364 return 1;
28366 return 0;
28369 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
28370 and return true. */
28372 static bool
28373 find_mem_ref (rtx pat, rtx *mem_ref)
28375 const char * fmt;
28376 int i, j;
28378 /* stack_tie does not produce any real memory traffic. */
28379 if (tie_operand (pat, VOIDmode))
28380 return false;
28382 if (GET_CODE (pat) == MEM)
28384 *mem_ref = pat;
28385 return true;
28388 /* Recursively process the pattern. */
28389 fmt = GET_RTX_FORMAT (GET_CODE (pat));
28391 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
28393 if (fmt[i] == 'e')
28395 if (find_mem_ref (XEXP (pat, i), mem_ref))
28396 return true;
28398 else if (fmt[i] == 'E')
28399 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
28401 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
28402 return true;
28406 return false;
28409 /* Determine if PAT is a PATTERN of a load insn. */
28411 static bool
28412 is_load_insn1 (rtx pat, rtx *load_mem)
28414 if (!pat || pat == NULL_RTX)
28415 return false;
28417 if (GET_CODE (pat) == SET)
28418 return find_mem_ref (SET_SRC (pat), load_mem);
28420 if (GET_CODE (pat) == PARALLEL)
28422 int i;
28424 for (i = 0; i < XVECLEN (pat, 0); i++)
28425 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
28426 return true;
28429 return false;
28432 /* Determine if INSN loads from memory. */
28434 static bool
28435 is_load_insn (rtx insn, rtx *load_mem)
28437 if (!insn || !INSN_P (insn))
28438 return false;
28440 if (CALL_P (insn))
28441 return false;
28443 return is_load_insn1 (PATTERN (insn), load_mem);
28446 /* Determine if PAT is a PATTERN of a store insn. */
28448 static bool
28449 is_store_insn1 (rtx pat, rtx *str_mem)
28451 if (!pat || pat == NULL_RTX)
28452 return false;
28454 if (GET_CODE (pat) == SET)
28455 return find_mem_ref (SET_DEST (pat), str_mem);
28457 if (GET_CODE (pat) == PARALLEL)
28459 int i;
28461 for (i = 0; i < XVECLEN (pat, 0); i++)
28462 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
28463 return true;
28466 return false;
28469 /* Determine if INSN stores to memory. */
28471 static bool
28472 is_store_insn (rtx insn, rtx *str_mem)
28474 if (!insn || !INSN_P (insn))
28475 return false;
28477 return is_store_insn1 (PATTERN (insn), str_mem);
28480 /* Returns whether the dependence between INSN and NEXT is considered
28481 costly by the given target. */
28483 static bool
28484 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
28486 rtx insn;
28487 rtx next;
28488 rtx load_mem, str_mem;
28490 /* If the flag is not enabled - no dependence is considered costly;
28491 allow all dependent insns in the same group.
28492 This is the most aggressive option. */
28493 if (rs6000_sched_costly_dep == no_dep_costly)
28494 return false;
28496 /* If the flag is set to 1 - a dependence is always considered costly;
28497 do not allow dependent instructions in the same group.
28498 This is the most conservative option. */
28499 if (rs6000_sched_costly_dep == all_deps_costly)
28500 return true;
28502 insn = DEP_PRO (dep);
28503 next = DEP_CON (dep);
28505 if (rs6000_sched_costly_dep == store_to_load_dep_costly
28506 && is_load_insn (next, &load_mem)
28507 && is_store_insn (insn, &str_mem))
28508 /* Prevent load after store in the same group. */
28509 return true;
28511 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
28512 && is_load_insn (next, &load_mem)
28513 && is_store_insn (insn, &str_mem)
28514 && DEP_TYPE (dep) == REG_DEP_TRUE
28515 && mem_locations_overlap(str_mem, load_mem))
28516 /* Prevent load after store in the same group if it is a true
28517 dependence. */
28518 return true;
28520 /* The flag is set to X; dependences with latency >= X are considered costly,
28521 and will not be scheduled in the same group. */
28522 if (rs6000_sched_costly_dep <= max_dep_latency
28523 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
28524 return true;
28526 return false;
28529 /* Return the next insn after INSN that is found before TAIL is reached,
28530 skipping any "non-active" insns - insns that will not actually occupy
28531 an issue slot. Return NULL_RTX if such an insn is not found. */
28533 static rtx_insn *
28534 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
28536 if (insn == NULL_RTX || insn == tail)
28537 return NULL;
28539 while (1)
28541 insn = NEXT_INSN (insn);
28542 if (insn == NULL_RTX || insn == tail)
28543 return NULL;
28545 if (CALL_P (insn)
28546 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
28547 || (NONJUMP_INSN_P (insn)
28548 && GET_CODE (PATTERN (insn)) != USE
28549 && GET_CODE (PATTERN (insn)) != CLOBBER
28550 && INSN_CODE (insn) != CODE_FOR_stack_tie))
28551 break;
28553 return insn;
28556 /* We are about to begin issuing insns for this clock cycle. */
28558 static int
28559 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
28560 rtx_insn **ready ATTRIBUTE_UNUSED,
28561 int *pn_ready ATTRIBUTE_UNUSED,
28562 int clock_var ATTRIBUTE_UNUSED)
28564 int n_ready = *pn_ready;
28566 if (sched_verbose)
28567 fprintf (dump, "// rs6000_sched_reorder :\n");
28569 /* Reorder the ready list, if the second to last ready insn
28570 is a nonepipeline insn. */
28571 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
28573 if (is_nonpipeline_insn (ready[n_ready - 1])
28574 && (recog_memoized (ready[n_ready - 2]) > 0))
28575 /* Simply swap first two insns. */
28576 std::swap (ready[n_ready - 1], ready[n_ready - 2]);
28579 if (rs6000_cpu == PROCESSOR_POWER6)
28580 load_store_pendulum = 0;
28582 return rs6000_issue_rate ();
28585 /* Like rs6000_sched_reorder, but called after issuing each insn. */
28587 static int
28588 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
28589 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
28591 if (sched_verbose)
28592 fprintf (dump, "// rs6000_sched_reorder2 :\n");
28594 /* For Power6, we need to handle some special cases to try and keep the
28595 store queue from overflowing and triggering expensive flushes.
28597 This code monitors how load and store instructions are being issued
28598 and skews the ready list one way or the other to increase the likelihood
28599 that a desired instruction is issued at the proper time.
28601 A couple of things are done. First, we maintain a "load_store_pendulum"
28602 to track the current state of load/store issue.
28604 - If the pendulum is at zero, then no loads or stores have been
28605 issued in the current cycle so we do nothing.
28607 - If the pendulum is 1, then a single load has been issued in this
28608 cycle and we attempt to locate another load in the ready list to
28609 issue with it.
28611 - If the pendulum is -2, then two stores have already been
28612 issued in this cycle, so we increase the priority of the first load
28613 in the ready list to increase it's likelihood of being chosen first
28614 in the next cycle.
28616 - If the pendulum is -1, then a single store has been issued in this
28617 cycle and we attempt to locate another store in the ready list to
28618 issue with it, preferring a store to an adjacent memory location to
28619 facilitate store pairing in the store queue.
28621 - If the pendulum is 2, then two loads have already been
28622 issued in this cycle, so we increase the priority of the first store
28623 in the ready list to increase it's likelihood of being chosen first
28624 in the next cycle.
28626 - If the pendulum < -2 or > 2, then do nothing.
28628 Note: This code covers the most common scenarios. There exist non
28629 load/store instructions which make use of the LSU and which
28630 would need to be accounted for to strictly model the behavior
28631 of the machine. Those instructions are currently unaccounted
28632 for to help minimize compile time overhead of this code.
28634 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
28636 int pos;
28637 int i;
28638 rtx_insn *tmp;
28639 rtx load_mem, str_mem;
28641 if (is_store_insn (last_scheduled_insn, &str_mem))
28642 /* Issuing a store, swing the load_store_pendulum to the left */
28643 load_store_pendulum--;
28644 else if (is_load_insn (last_scheduled_insn, &load_mem))
28645 /* Issuing a load, swing the load_store_pendulum to the right */
28646 load_store_pendulum++;
28647 else
28648 return cached_can_issue_more;
28650 /* If the pendulum is balanced, or there is only one instruction on
28651 the ready list, then all is well, so return. */
28652 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
28653 return cached_can_issue_more;
28655 if (load_store_pendulum == 1)
28657 /* A load has been issued in this cycle. Scan the ready list
28658 for another load to issue with it */
28659 pos = *pn_ready-1;
28661 while (pos >= 0)
28663 if (is_load_insn (ready[pos], &load_mem))
28665 /* Found a load. Move it to the head of the ready list,
28666 and adjust it's priority so that it is more likely to
28667 stay there */
28668 tmp = ready[pos];
28669 for (i=pos; i<*pn_ready-1; i++)
28670 ready[i] = ready[i + 1];
28671 ready[*pn_ready-1] = tmp;
28673 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
28674 INSN_PRIORITY (tmp)++;
28675 break;
28677 pos--;
28680 else if (load_store_pendulum == -2)
28682 /* Two stores have been issued in this cycle. Increase the
28683 priority of the first load in the ready list to favor it for
28684 issuing in the next cycle. */
28685 pos = *pn_ready-1;
28687 while (pos >= 0)
28689 if (is_load_insn (ready[pos], &load_mem)
28690 && !sel_sched_p ()
28691 && INSN_PRIORITY_KNOWN (ready[pos]))
28693 INSN_PRIORITY (ready[pos])++;
28695 /* Adjust the pendulum to account for the fact that a load
28696 was found and increased in priority. This is to prevent
28697 increasing the priority of multiple loads */
28698 load_store_pendulum--;
28700 break;
28702 pos--;
28705 else if (load_store_pendulum == -1)
28707 /* A store has been issued in this cycle. Scan the ready list for
28708 another store to issue with it, preferring a store to an adjacent
28709 memory location */
28710 int first_store_pos = -1;
28712 pos = *pn_ready-1;
28714 while (pos >= 0)
28716 if (is_store_insn (ready[pos], &str_mem))
28718 rtx str_mem2;
28719 /* Maintain the index of the first store found on the
28720 list */
28721 if (first_store_pos == -1)
28722 first_store_pos = pos;
28724 if (is_store_insn (last_scheduled_insn, &str_mem2)
28725 && adjacent_mem_locations (str_mem, str_mem2))
28727 /* Found an adjacent store. Move it to the head of the
28728 ready list, and adjust it's priority so that it is
28729 more likely to stay there */
28730 tmp = ready[pos];
28731 for (i=pos; i<*pn_ready-1; i++)
28732 ready[i] = ready[i + 1];
28733 ready[*pn_ready-1] = tmp;
28735 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
28736 INSN_PRIORITY (tmp)++;
28738 first_store_pos = -1;
28740 break;
28743 pos--;
28746 if (first_store_pos >= 0)
28748 /* An adjacent store wasn't found, but a non-adjacent store was,
28749 so move the non-adjacent store to the front of the ready
28750 list, and adjust its priority so that it is more likely to
28751 stay there. */
28752 tmp = ready[first_store_pos];
28753 for (i=first_store_pos; i<*pn_ready-1; i++)
28754 ready[i] = ready[i + 1];
28755 ready[*pn_ready-1] = tmp;
28756 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
28757 INSN_PRIORITY (tmp)++;
28760 else if (load_store_pendulum == 2)
28762 /* Two loads have been issued in this cycle. Increase the priority
28763 of the first store in the ready list to favor it for issuing in
28764 the next cycle. */
28765 pos = *pn_ready-1;
28767 while (pos >= 0)
28769 if (is_store_insn (ready[pos], &str_mem)
28770 && !sel_sched_p ()
28771 && INSN_PRIORITY_KNOWN (ready[pos]))
28773 INSN_PRIORITY (ready[pos])++;
28775 /* Adjust the pendulum to account for the fact that a store
28776 was found and increased in priority. This is to prevent
28777 increasing the priority of multiple stores */
28778 load_store_pendulum++;
28780 break;
28782 pos--;
28787 return cached_can_issue_more;
28790 /* Return whether the presence of INSN causes a dispatch group termination
28791 of group WHICH_GROUP.
28793 If WHICH_GROUP == current_group, this function will return true if INSN
28794 causes the termination of the current group (i.e, the dispatch group to
28795 which INSN belongs). This means that INSN will be the last insn in the
28796 group it belongs to.
28798 If WHICH_GROUP == previous_group, this function will return true if INSN
28799 causes the termination of the previous group (i.e, the dispatch group that
28800 precedes the group to which INSN belongs). This means that INSN will be
28801 the first insn in the group it belongs to). */
28803 static bool
28804 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
28806 bool first, last;
28808 if (! insn)
28809 return false;
28811 first = insn_must_be_first_in_group (insn);
28812 last = insn_must_be_last_in_group (insn);
28814 if (first && last)
28815 return true;
28817 if (which_group == current_group)
28818 return last;
28819 else if (which_group == previous_group)
28820 return first;
28822 return false;
28826 static bool
28827 insn_must_be_first_in_group (rtx_insn *insn)
28829 enum attr_type type;
28831 if (!insn
28832 || NOTE_P (insn)
28833 || DEBUG_INSN_P (insn)
28834 || GET_CODE (PATTERN (insn)) == USE
28835 || GET_CODE (PATTERN (insn)) == CLOBBER)
28836 return false;
28838 switch (rs6000_cpu)
28840 case PROCESSOR_POWER5:
28841 if (is_cracked_insn (insn))
28842 return true;
28843 case PROCESSOR_POWER4:
28844 if (is_microcoded_insn (insn))
28845 return true;
28847 if (!rs6000_sched_groups)
28848 return false;
28850 type = get_attr_type (insn);
28852 switch (type)
28854 case TYPE_MFCR:
28855 case TYPE_MFCRF:
28856 case TYPE_MTCR:
28857 case TYPE_DELAYED_CR:
28858 case TYPE_CR_LOGICAL:
28859 case TYPE_MTJMPR:
28860 case TYPE_MFJMPR:
28861 case TYPE_DIV:
28862 case TYPE_LOAD_L:
28863 case TYPE_STORE_C:
28864 case TYPE_ISYNC:
28865 case TYPE_SYNC:
28866 return true;
28867 default:
28868 break;
28870 break;
28871 case PROCESSOR_POWER6:
28872 type = get_attr_type (insn);
28874 switch (type)
28876 case TYPE_EXTS:
28877 case TYPE_CNTLZ:
28878 case TYPE_TRAP:
28879 case TYPE_MUL:
28880 case TYPE_INSERT:
28881 case TYPE_FPCOMPARE:
28882 case TYPE_MFCR:
28883 case TYPE_MTCR:
28884 case TYPE_MFJMPR:
28885 case TYPE_MTJMPR:
28886 case TYPE_ISYNC:
28887 case TYPE_SYNC:
28888 case TYPE_LOAD_L:
28889 case TYPE_STORE_C:
28890 return true;
28891 case TYPE_SHIFT:
28892 if (get_attr_dot (insn) == DOT_NO
28893 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
28894 return true;
28895 else
28896 break;
28897 case TYPE_DIV:
28898 if (get_attr_size (insn) == SIZE_32)
28899 return true;
28900 else
28901 break;
28902 case TYPE_LOAD:
28903 case TYPE_STORE:
28904 case TYPE_FPLOAD:
28905 case TYPE_FPSTORE:
28906 if (get_attr_update (insn) == UPDATE_YES)
28907 return true;
28908 else
28909 break;
28910 default:
28911 break;
28913 break;
28914 case PROCESSOR_POWER7:
28915 type = get_attr_type (insn);
28917 switch (type)
28919 case TYPE_CR_LOGICAL:
28920 case TYPE_MFCR:
28921 case TYPE_MFCRF:
28922 case TYPE_MTCR:
28923 case TYPE_DIV:
28924 case TYPE_ISYNC:
28925 case TYPE_LOAD_L:
28926 case TYPE_STORE_C:
28927 case TYPE_MFJMPR:
28928 case TYPE_MTJMPR:
28929 return true;
28930 case TYPE_MUL:
28931 case TYPE_SHIFT:
28932 case TYPE_EXTS:
28933 if (get_attr_dot (insn) == DOT_YES)
28934 return true;
28935 else
28936 break;
28937 case TYPE_LOAD:
28938 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28939 || get_attr_update (insn) == UPDATE_YES)
28940 return true;
28941 else
28942 break;
28943 case TYPE_STORE:
28944 case TYPE_FPLOAD:
28945 case TYPE_FPSTORE:
28946 if (get_attr_update (insn) == UPDATE_YES)
28947 return true;
28948 else
28949 break;
28950 default:
28951 break;
28953 break;
28954 case PROCESSOR_POWER8:
28955 type = get_attr_type (insn);
28957 switch (type)
28959 case TYPE_CR_LOGICAL:
28960 case TYPE_DELAYED_CR:
28961 case TYPE_MFCR:
28962 case TYPE_MFCRF:
28963 case TYPE_MTCR:
28964 case TYPE_SYNC:
28965 case TYPE_ISYNC:
28966 case TYPE_LOAD_L:
28967 case TYPE_STORE_C:
28968 case TYPE_VECSTORE:
28969 case TYPE_MFJMPR:
28970 case TYPE_MTJMPR:
28971 return true;
28972 case TYPE_SHIFT:
28973 case TYPE_EXTS:
28974 case TYPE_MUL:
28975 if (get_attr_dot (insn) == DOT_YES)
28976 return true;
28977 else
28978 break;
28979 case TYPE_LOAD:
28980 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28981 || get_attr_update (insn) == UPDATE_YES)
28982 return true;
28983 else
28984 break;
28985 case TYPE_STORE:
28986 if (get_attr_update (insn) == UPDATE_YES
28987 && get_attr_indexed (insn) == INDEXED_YES)
28988 return true;
28989 else
28990 break;
28991 default:
28992 break;
28994 break;
28995 default:
28996 break;
28999 return false;
29002 static bool
29003 insn_must_be_last_in_group (rtx_insn *insn)
29005 enum attr_type type;
29007 if (!insn
29008 || NOTE_P (insn)
29009 || DEBUG_INSN_P (insn)
29010 || GET_CODE (PATTERN (insn)) == USE
29011 || GET_CODE (PATTERN (insn)) == CLOBBER)
29012 return false;
29014 switch (rs6000_cpu) {
29015 case PROCESSOR_POWER4:
29016 case PROCESSOR_POWER5:
29017 if (is_microcoded_insn (insn))
29018 return true;
29020 if (is_branch_slot_insn (insn))
29021 return true;
29023 break;
29024 case PROCESSOR_POWER6:
29025 type = get_attr_type (insn);
29027 switch (type)
29029 case TYPE_EXTS:
29030 case TYPE_CNTLZ:
29031 case TYPE_TRAP:
29032 case TYPE_MUL:
29033 case TYPE_FPCOMPARE:
29034 case TYPE_MFCR:
29035 case TYPE_MTCR:
29036 case TYPE_MFJMPR:
29037 case TYPE_MTJMPR:
29038 case TYPE_ISYNC:
29039 case TYPE_SYNC:
29040 case TYPE_LOAD_L:
29041 case TYPE_STORE_C:
29042 return true;
29043 case TYPE_SHIFT:
29044 if (get_attr_dot (insn) == DOT_NO
29045 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
29046 return true;
29047 else
29048 break;
29049 case TYPE_DIV:
29050 if (get_attr_size (insn) == SIZE_32)
29051 return true;
29052 else
29053 break;
29054 default:
29055 break;
29057 break;
29058 case PROCESSOR_POWER7:
29059 type = get_attr_type (insn);
29061 switch (type)
29063 case TYPE_ISYNC:
29064 case TYPE_SYNC:
29065 case TYPE_LOAD_L:
29066 case TYPE_STORE_C:
29067 return true;
29068 case TYPE_LOAD:
29069 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29070 && get_attr_update (insn) == UPDATE_YES)
29071 return true;
29072 else
29073 break;
29074 case TYPE_STORE:
29075 if (get_attr_update (insn) == UPDATE_YES
29076 && get_attr_indexed (insn) == INDEXED_YES)
29077 return true;
29078 else
29079 break;
29080 default:
29081 break;
29083 break;
29084 case PROCESSOR_POWER8:
29085 type = get_attr_type (insn);
29087 switch (type)
29089 case TYPE_MFCR:
29090 case TYPE_MTCR:
29091 case TYPE_ISYNC:
29092 case TYPE_SYNC:
29093 case TYPE_LOAD_L:
29094 case TYPE_STORE_C:
29095 return true;
29096 case TYPE_LOAD:
29097 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
29098 && get_attr_update (insn) == UPDATE_YES)
29099 return true;
29100 else
29101 break;
29102 case TYPE_STORE:
29103 if (get_attr_update (insn) == UPDATE_YES
29104 && get_attr_indexed (insn) == INDEXED_YES)
29105 return true;
29106 else
29107 break;
29108 default:
29109 break;
29111 break;
29112 default:
29113 break;
29116 return false;
29119 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
29120 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
29122 static bool
29123 is_costly_group (rtx *group_insns, rtx next_insn)
29125 int i;
29126 int issue_rate = rs6000_issue_rate ();
29128 for (i = 0; i < issue_rate; i++)
29130 sd_iterator_def sd_it;
29131 dep_t dep;
29132 rtx insn = group_insns[i];
29134 if (!insn)
29135 continue;
29137 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
29139 rtx next = DEP_CON (dep);
29141 if (next == next_insn
29142 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
29143 return true;
29147 return false;
29150 /* Utility of the function redefine_groups.
29151 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
29152 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
29153 to keep it "far" (in a separate group) from GROUP_INSNS, following
29154 one of the following schemes, depending on the value of the flag
29155 -minsert_sched_nops = X:
29156 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
29157 in order to force NEXT_INSN into a separate group.
29158 (2) X < sched_finish_regroup_exact: insert exactly X nops.
29159 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
29160 insertion (has a group just ended, how many vacant issue slots remain in the
29161 last group, and how many dispatch groups were encountered so far). */
29163 static int
29164 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
29165 rtx_insn *next_insn, bool *group_end, int can_issue_more,
29166 int *group_count)
29168 rtx nop;
29169 bool force;
29170 int issue_rate = rs6000_issue_rate ();
29171 bool end = *group_end;
29172 int i;
29174 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
29175 return can_issue_more;
29177 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
29178 return can_issue_more;
29180 force = is_costly_group (group_insns, next_insn);
29181 if (!force)
29182 return can_issue_more;
29184 if (sched_verbose > 6)
29185 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
29186 *group_count ,can_issue_more);
29188 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
29190 if (*group_end)
29191 can_issue_more = 0;
29193 /* Since only a branch can be issued in the last issue_slot, it is
29194 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
29195 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
29196 in this case the last nop will start a new group and the branch
29197 will be forced to the new group. */
29198 if (can_issue_more && !is_branch_slot_insn (next_insn))
29199 can_issue_more--;
29201 /* Do we have a special group ending nop? */
29202 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
29203 || rs6000_cpu_attr == CPU_POWER8)
29205 nop = gen_group_ending_nop ();
29206 emit_insn_before (nop, next_insn);
29207 can_issue_more = 0;
29209 else
29210 while (can_issue_more > 0)
29212 nop = gen_nop ();
29213 emit_insn_before (nop, next_insn);
29214 can_issue_more--;
29217 *group_end = true;
29218 return 0;
29221 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
29223 int n_nops = rs6000_sched_insert_nops;
29225 /* Nops can't be issued from the branch slot, so the effective
29226 issue_rate for nops is 'issue_rate - 1'. */
29227 if (can_issue_more == 0)
29228 can_issue_more = issue_rate;
29229 can_issue_more--;
29230 if (can_issue_more == 0)
29232 can_issue_more = issue_rate - 1;
29233 (*group_count)++;
29234 end = true;
29235 for (i = 0; i < issue_rate; i++)
29237 group_insns[i] = 0;
29241 while (n_nops > 0)
29243 nop = gen_nop ();
29244 emit_insn_before (nop, next_insn);
29245 if (can_issue_more == issue_rate - 1) /* new group begins */
29246 end = false;
29247 can_issue_more--;
29248 if (can_issue_more == 0)
29250 can_issue_more = issue_rate - 1;
29251 (*group_count)++;
29252 end = true;
29253 for (i = 0; i < issue_rate; i++)
29255 group_insns[i] = 0;
29258 n_nops--;
29261 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
29262 can_issue_more++;
29264 /* Is next_insn going to start a new group? */
29265 *group_end
29266 = (end
29267 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
29268 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
29269 || (can_issue_more < issue_rate &&
29270 insn_terminates_group_p (next_insn, previous_group)));
29271 if (*group_end && end)
29272 (*group_count)--;
29274 if (sched_verbose > 6)
29275 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
29276 *group_count, can_issue_more);
29277 return can_issue_more;
29280 return can_issue_more;
29283 /* This function tries to synch the dispatch groups that the compiler "sees"
29284 with the dispatch groups that the processor dispatcher is expected to
29285 form in practice. It tries to achieve this synchronization by forcing the
29286 estimated processor grouping on the compiler (as opposed to the function
29287 'pad_goups' which tries to force the scheduler's grouping on the processor).
29289 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
29290 examines the (estimated) dispatch groups that will be formed by the processor
29291 dispatcher. It marks these group boundaries to reflect the estimated
29292 processor grouping, overriding the grouping that the scheduler had marked.
29293 Depending on the value of the flag '-minsert-sched-nops' this function can
29294 force certain insns into separate groups or force a certain distance between
29295 them by inserting nops, for example, if there exists a "costly dependence"
29296 between the insns.
29298 The function estimates the group boundaries that the processor will form as
29299 follows: It keeps track of how many vacant issue slots are available after
29300 each insn. A subsequent insn will start a new group if one of the following
29301 4 cases applies:
29302 - no more vacant issue slots remain in the current dispatch group.
29303 - only the last issue slot, which is the branch slot, is vacant, but the next
29304 insn is not a branch.
29305 - only the last 2 or less issue slots, including the branch slot, are vacant,
29306 which means that a cracked insn (which occupies two issue slots) can't be
29307 issued in this group.
29308 - less than 'issue_rate' slots are vacant, and the next insn always needs to
29309 start a new group. */
29311 static int
29312 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
29313 rtx_insn *tail)
29315 rtx_insn *insn, *next_insn;
29316 int issue_rate;
29317 int can_issue_more;
29318 int slot, i;
29319 bool group_end;
29320 int group_count = 0;
29321 rtx *group_insns;
29323 /* Initialize. */
29324 issue_rate = rs6000_issue_rate ();
29325 group_insns = XALLOCAVEC (rtx, issue_rate);
29326 for (i = 0; i < issue_rate; i++)
29328 group_insns[i] = 0;
29330 can_issue_more = issue_rate;
29331 slot = 0;
29332 insn = get_next_active_insn (prev_head_insn, tail);
29333 group_end = false;
29335 while (insn != NULL_RTX)
29337 slot = (issue_rate - can_issue_more);
29338 group_insns[slot] = insn;
29339 can_issue_more =
29340 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
29341 if (insn_terminates_group_p (insn, current_group))
29342 can_issue_more = 0;
29344 next_insn = get_next_active_insn (insn, tail);
29345 if (next_insn == NULL_RTX)
29346 return group_count + 1;
29348 /* Is next_insn going to start a new group? */
29349 group_end
29350 = (can_issue_more == 0
29351 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
29352 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
29353 || (can_issue_more < issue_rate &&
29354 insn_terminates_group_p (next_insn, previous_group)));
29356 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
29357 next_insn, &group_end, can_issue_more,
29358 &group_count);
29360 if (group_end)
29362 group_count++;
29363 can_issue_more = 0;
29364 for (i = 0; i < issue_rate; i++)
29366 group_insns[i] = 0;
29370 if (GET_MODE (next_insn) == TImode && can_issue_more)
29371 PUT_MODE (next_insn, VOIDmode);
29372 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
29373 PUT_MODE (next_insn, TImode);
29375 insn = next_insn;
29376 if (can_issue_more == 0)
29377 can_issue_more = issue_rate;
29378 } /* while */
29380 return group_count;
29383 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
29384 dispatch group boundaries that the scheduler had marked. Pad with nops
29385 any dispatch groups which have vacant issue slots, in order to force the
29386 scheduler's grouping on the processor dispatcher. The function
29387 returns the number of dispatch groups found. */
29389 static int
29390 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
29391 rtx_insn *tail)
29393 rtx_insn *insn, *next_insn;
29394 rtx nop;
29395 int issue_rate;
29396 int can_issue_more;
29397 int group_end;
29398 int group_count = 0;
29400 /* Initialize issue_rate. */
29401 issue_rate = rs6000_issue_rate ();
29402 can_issue_more = issue_rate;
29404 insn = get_next_active_insn (prev_head_insn, tail);
29405 next_insn = get_next_active_insn (insn, tail);
29407 while (insn != NULL_RTX)
29409 can_issue_more =
29410 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
29412 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
29414 if (next_insn == NULL_RTX)
29415 break;
29417 if (group_end)
29419 /* If the scheduler had marked group termination at this location
29420 (between insn and next_insn), and neither insn nor next_insn will
29421 force group termination, pad the group with nops to force group
29422 termination. */
29423 if (can_issue_more
29424 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
29425 && !insn_terminates_group_p (insn, current_group)
29426 && !insn_terminates_group_p (next_insn, previous_group))
29428 if (!is_branch_slot_insn (next_insn))
29429 can_issue_more--;
29431 while (can_issue_more)
29433 nop = gen_nop ();
29434 emit_insn_before (nop, next_insn);
29435 can_issue_more--;
29439 can_issue_more = issue_rate;
29440 group_count++;
29443 insn = next_insn;
29444 next_insn = get_next_active_insn (insn, tail);
29447 return group_count;
29450 /* We're beginning a new block. Initialize data structures as necessary. */
29452 static void
29453 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
29454 int sched_verbose ATTRIBUTE_UNUSED,
29455 int max_ready ATTRIBUTE_UNUSED)
29457 last_scheduled_insn = NULL_RTX;
29458 load_store_pendulum = 0;
29461 /* The following function is called at the end of scheduling BB.
29462 After reload, it inserts nops at insn group bundling. */
29464 static void
29465 rs6000_sched_finish (FILE *dump, int sched_verbose)
29467 int n_groups;
29469 if (sched_verbose)
29470 fprintf (dump, "=== Finishing schedule.\n");
29472 if (reload_completed && rs6000_sched_groups)
29474 /* Do not run sched_finish hook when selective scheduling enabled. */
29475 if (sel_sched_p ())
29476 return;
29478 if (rs6000_sched_insert_nops == sched_finish_none)
29479 return;
29481 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
29482 n_groups = pad_groups (dump, sched_verbose,
29483 current_sched_info->prev_head,
29484 current_sched_info->next_tail);
29485 else
29486 n_groups = redefine_groups (dump, sched_verbose,
29487 current_sched_info->prev_head,
29488 current_sched_info->next_tail);
29490 if (sched_verbose >= 6)
29492 fprintf (dump, "ngroups = %d\n", n_groups);
29493 print_rtl (dump, current_sched_info->prev_head);
29494 fprintf (dump, "Done finish_sched\n");
29499 struct _rs6000_sched_context
29501 short cached_can_issue_more;
29502 rtx last_scheduled_insn;
29503 int load_store_pendulum;
29506 typedef struct _rs6000_sched_context rs6000_sched_context_def;
29507 typedef rs6000_sched_context_def *rs6000_sched_context_t;
29509 /* Allocate store for new scheduling context. */
29510 static void *
29511 rs6000_alloc_sched_context (void)
29513 return xmalloc (sizeof (rs6000_sched_context_def));
29516 /* If CLEAN_P is true then initializes _SC with clean data,
29517 and from the global context otherwise. */
29518 static void
29519 rs6000_init_sched_context (void *_sc, bool clean_p)
29521 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
29523 if (clean_p)
29525 sc->cached_can_issue_more = 0;
29526 sc->last_scheduled_insn = NULL_RTX;
29527 sc->load_store_pendulum = 0;
29529 else
29531 sc->cached_can_issue_more = cached_can_issue_more;
29532 sc->last_scheduled_insn = last_scheduled_insn;
29533 sc->load_store_pendulum = load_store_pendulum;
29537 /* Sets the global scheduling context to the one pointed to by _SC. */
29538 static void
29539 rs6000_set_sched_context (void *_sc)
29541 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
29543 gcc_assert (sc != NULL);
29545 cached_can_issue_more = sc->cached_can_issue_more;
29546 last_scheduled_insn = sc->last_scheduled_insn;
29547 load_store_pendulum = sc->load_store_pendulum;
29550 /* Free _SC. */
29551 static void
29552 rs6000_free_sched_context (void *_sc)
29554 gcc_assert (_sc != NULL);
29556 free (_sc);
29560 /* Length in units of the trampoline for entering a nested function. */
29563 rs6000_trampoline_size (void)
29565 int ret = 0;
29567 switch (DEFAULT_ABI)
29569 default:
29570 gcc_unreachable ();
29572 case ABI_AIX:
29573 ret = (TARGET_32BIT) ? 12 : 24;
29574 break;
29576 case ABI_ELFv2:
29577 gcc_assert (!TARGET_32BIT);
29578 ret = 32;
29579 break;
29581 case ABI_DARWIN:
29582 case ABI_V4:
29583 ret = (TARGET_32BIT) ? 40 : 48;
29584 break;
29587 return ret;
29590 /* Emit RTL insns to initialize the variable parts of a trampoline.
29591 FNADDR is an RTX for the address of the function's pure code.
29592 CXT is an RTX for the static chain value for the function. */
29594 static void
29595 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
29597 int regsize = (TARGET_32BIT) ? 4 : 8;
29598 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
29599 rtx ctx_reg = force_reg (Pmode, cxt);
29600 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
29602 switch (DEFAULT_ABI)
29604 default:
29605 gcc_unreachable ();
29607 /* Under AIX, just build the 3 word function descriptor */
29608 case ABI_AIX:
29610 rtx fnmem, fn_reg, toc_reg;
29612 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
29613 error ("You cannot take the address of a nested function if you use "
29614 "the -mno-pointers-to-nested-functions option.");
29616 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
29617 fn_reg = gen_reg_rtx (Pmode);
29618 toc_reg = gen_reg_rtx (Pmode);
29620 /* Macro to shorten the code expansions below. */
29621 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
29623 m_tramp = replace_equiv_address (m_tramp, addr);
29625 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
29626 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
29627 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
29628 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
29629 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
29631 # undef MEM_PLUS
29633 break;
29635 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
29636 case ABI_ELFv2:
29637 case ABI_DARWIN:
29638 case ABI_V4:
29639 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
29640 LCT_NORMAL, VOIDmode, 4,
29641 addr, Pmode,
29642 GEN_INT (rs6000_trampoline_size ()), SImode,
29643 fnaddr, Pmode,
29644 ctx_reg, Pmode);
29645 break;
29650 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
29651 identifier as an argument, so the front end shouldn't look it up. */
29653 static bool
29654 rs6000_attribute_takes_identifier_p (const_tree attr_id)
29656 return is_attribute_p ("altivec", attr_id);
29659 /* Handle the "altivec" attribute. The attribute may have
29660 arguments as follows:
29662 __attribute__((altivec(vector__)))
29663 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
29664 __attribute__((altivec(bool__))) (always followed by 'unsigned')
29666 and may appear more than once (e.g., 'vector bool char') in a
29667 given declaration. */
29669 static tree
29670 rs6000_handle_altivec_attribute (tree *node,
29671 tree name ATTRIBUTE_UNUSED,
29672 tree args,
29673 int flags ATTRIBUTE_UNUSED,
29674 bool *no_add_attrs)
29676 tree type = *node, result = NULL_TREE;
29677 machine_mode mode;
29678 int unsigned_p;
29679 char altivec_type
29680 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
29681 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
29682 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
29683 : '?');
29685 while (POINTER_TYPE_P (type)
29686 || TREE_CODE (type) == FUNCTION_TYPE
29687 || TREE_CODE (type) == METHOD_TYPE
29688 || TREE_CODE (type) == ARRAY_TYPE)
29689 type = TREE_TYPE (type);
29691 mode = TYPE_MODE (type);
29693 /* Check for invalid AltiVec type qualifiers. */
29694 if (type == long_double_type_node)
29695 error ("use of %<long double%> in AltiVec types is invalid");
29696 else if (type == boolean_type_node)
29697 error ("use of boolean types in AltiVec types is invalid");
29698 else if (TREE_CODE (type) == COMPLEX_TYPE)
29699 error ("use of %<complex%> in AltiVec types is invalid");
29700 else if (DECIMAL_FLOAT_MODE_P (mode))
29701 error ("use of decimal floating point types in AltiVec types is invalid");
29702 else if (!TARGET_VSX)
29704 if (type == long_unsigned_type_node || type == long_integer_type_node)
29706 if (TARGET_64BIT)
29707 error ("use of %<long%> in AltiVec types is invalid for "
29708 "64-bit code without -mvsx");
29709 else if (rs6000_warn_altivec_long)
29710 warning (0, "use of %<long%> in AltiVec types is deprecated; "
29711 "use %<int%>");
29713 else if (type == long_long_unsigned_type_node
29714 || type == long_long_integer_type_node)
29715 error ("use of %<long long%> in AltiVec types is invalid without "
29716 "-mvsx");
29717 else if (type == double_type_node)
29718 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
29721 switch (altivec_type)
29723 case 'v':
29724 unsigned_p = TYPE_UNSIGNED (type);
29725 switch (mode)
29727 case TImode:
29728 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
29729 break;
29730 case DImode:
29731 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
29732 break;
29733 case SImode:
29734 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
29735 break;
29736 case HImode:
29737 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
29738 break;
29739 case QImode:
29740 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
29741 break;
29742 case SFmode: result = V4SF_type_node; break;
29743 case DFmode: result = V2DF_type_node; break;
29744 /* If the user says 'vector int bool', we may be handed the 'bool'
29745 attribute _before_ the 'vector' attribute, and so select the
29746 proper type in the 'b' case below. */
29747 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
29748 case V2DImode: case V2DFmode:
29749 result = type;
29750 default: break;
29752 break;
29753 case 'b':
29754 switch (mode)
29756 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
29757 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
29758 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
29759 case QImode: case V16QImode: result = bool_V16QI_type_node;
29760 default: break;
29762 break;
29763 case 'p':
29764 switch (mode)
29766 case V8HImode: result = pixel_V8HI_type_node;
29767 default: break;
29769 default: break;
29772 /* Propagate qualifiers attached to the element type
29773 onto the vector type. */
29774 if (result && result != type && TYPE_QUALS (type))
29775 result = build_qualified_type (result, TYPE_QUALS (type));
29777 *no_add_attrs = true; /* No need to hang on to the attribute. */
29779 if (result)
29780 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
29782 return NULL_TREE;
29785 /* AltiVec defines four built-in scalar types that serve as vector
29786 elements; we must teach the compiler how to mangle them. */
29788 static const char *
29789 rs6000_mangle_type (const_tree type)
29791 type = TYPE_MAIN_VARIANT (type);
29793 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
29794 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
29795 return NULL;
29797 if (type == bool_char_type_node) return "U6__boolc";
29798 if (type == bool_short_type_node) return "U6__bools";
29799 if (type == pixel_type_node) return "u7__pixel";
29800 if (type == bool_int_type_node) return "U6__booli";
29801 if (type == bool_long_type_node) return "U6__booll";
29803 /* Mangle IBM extended float long double as `g' (__float128) on
29804 powerpc*-linux where long-double-64 previously was the default. */
29805 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
29806 && TARGET_ELF
29807 && TARGET_LONG_DOUBLE_128
29808 && !TARGET_IEEEQUAD)
29809 return "g";
29811 /* For all other types, use normal C++ mangling. */
29812 return NULL;
29815 /* Handle a "longcall" or "shortcall" attribute; arguments as in
29816 struct attribute_spec.handler. */
29818 static tree
29819 rs6000_handle_longcall_attribute (tree *node, tree name,
29820 tree args ATTRIBUTE_UNUSED,
29821 int flags ATTRIBUTE_UNUSED,
29822 bool *no_add_attrs)
29824 if (TREE_CODE (*node) != FUNCTION_TYPE
29825 && TREE_CODE (*node) != FIELD_DECL
29826 && TREE_CODE (*node) != TYPE_DECL)
29828 warning (OPT_Wattributes, "%qE attribute only applies to functions",
29829 name);
29830 *no_add_attrs = true;
29833 return NULL_TREE;
29836 /* Set longcall attributes on all functions declared when
29837 rs6000_default_long_calls is true. */
29838 static void
29839 rs6000_set_default_type_attributes (tree type)
29841 if (rs6000_default_long_calls
29842 && (TREE_CODE (type) == FUNCTION_TYPE
29843 || TREE_CODE (type) == METHOD_TYPE))
29844 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
29845 NULL_TREE,
29846 TYPE_ATTRIBUTES (type));
29848 #if TARGET_MACHO
29849 darwin_set_default_type_attributes (type);
29850 #endif
29853 /* Return a reference suitable for calling a function with the
29854 longcall attribute. */
29857 rs6000_longcall_ref (rtx call_ref)
29859 const char *call_name;
29860 tree node;
29862 if (GET_CODE (call_ref) != SYMBOL_REF)
29863 return call_ref;
29865 /* System V adds '.' to the internal name, so skip them. */
29866 call_name = XSTR (call_ref, 0);
29867 if (*call_name == '.')
29869 while (*call_name == '.')
29870 call_name++;
29872 node = get_identifier (call_name);
29873 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
29876 return force_reg (Pmode, call_ref);
29879 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
29880 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
29881 #endif
29883 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
29884 struct attribute_spec.handler. */
29885 static tree
29886 rs6000_handle_struct_attribute (tree *node, tree name,
29887 tree args ATTRIBUTE_UNUSED,
29888 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
29890 tree *type = NULL;
29891 if (DECL_P (*node))
29893 if (TREE_CODE (*node) == TYPE_DECL)
29894 type = &TREE_TYPE (*node);
29896 else
29897 type = node;
29899 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
29900 || TREE_CODE (*type) == UNION_TYPE)))
29902 warning (OPT_Wattributes, "%qE attribute ignored", name);
29903 *no_add_attrs = true;
29906 else if ((is_attribute_p ("ms_struct", name)
29907 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
29908 || ((is_attribute_p ("gcc_struct", name)
29909 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
29911 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
29912 name);
29913 *no_add_attrs = true;
29916 return NULL_TREE;
29919 static bool
29920 rs6000_ms_bitfield_layout_p (const_tree record_type)
29922 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
29923 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
29924 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
29927 #ifdef USING_ELFOS_H
29929 /* A get_unnamed_section callback, used for switching to toc_section. */
29931 static void
29932 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29934 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29935 && TARGET_MINIMAL_TOC
29936 && !TARGET_RELOCATABLE)
29938 if (!toc_initialized)
29940 toc_initialized = 1;
29941 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29942 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
29943 fprintf (asm_out_file, "\t.tc ");
29944 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
29945 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29946 fprintf (asm_out_file, "\n");
29948 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29949 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29950 fprintf (asm_out_file, " = .+32768\n");
29952 else
29953 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29955 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29956 && !TARGET_RELOCATABLE)
29957 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
29958 else
29960 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29961 if (!toc_initialized)
29963 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
29964 fprintf (asm_out_file, " = .+32768\n");
29965 toc_initialized = 1;
29970 /* Implement TARGET_ASM_INIT_SECTIONS. */
29972 static void
29973 rs6000_elf_asm_init_sections (void)
29975 toc_section
29976 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
29978 sdata2_section
29979 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
29980 SDATA2_SECTION_ASM_OP);
29983 /* Implement TARGET_SELECT_RTX_SECTION. */
29985 static section *
29986 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
29987 unsigned HOST_WIDE_INT align)
29989 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29990 return toc_section;
29991 else
29992 return default_elf_select_rtx_section (mode, x, align);
29995 /* For a SYMBOL_REF, set generic flags and then perform some
29996 target-specific processing.
29998 When the AIX ABI is requested on a non-AIX system, replace the
29999 function name with the real name (with a leading .) rather than the
30000 function descriptor name. This saves a lot of overriding code to
30001 read the prefixes. */
30003 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
30004 static void
30005 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
30007 default_encode_section_info (decl, rtl, first);
30009 if (first
30010 && TREE_CODE (decl) == FUNCTION_DECL
30011 && !TARGET_AIX
30012 && DEFAULT_ABI == ABI_AIX)
30014 rtx sym_ref = XEXP (rtl, 0);
30015 size_t len = strlen (XSTR (sym_ref, 0));
30016 char *str = XALLOCAVEC (char, len + 2);
30017 str[0] = '.';
30018 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
30019 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
30023 static inline bool
30024 compare_section_name (const char *section, const char *templ)
30026 int len;
30028 len = strlen (templ);
30029 return (strncmp (section, templ, len) == 0
30030 && (section[len] == 0 || section[len] == '.'));
30033 bool
30034 rs6000_elf_in_small_data_p (const_tree decl)
30036 if (rs6000_sdata == SDATA_NONE)
30037 return false;
30039 /* We want to merge strings, so we never consider them small data. */
30040 if (TREE_CODE (decl) == STRING_CST)
30041 return false;
30043 /* Functions are never in the small data area. */
30044 if (TREE_CODE (decl) == FUNCTION_DECL)
30045 return false;
30047 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
30049 const char *section = DECL_SECTION_NAME (decl);
30050 if (compare_section_name (section, ".sdata")
30051 || compare_section_name (section, ".sdata2")
30052 || compare_section_name (section, ".gnu.linkonce.s")
30053 || compare_section_name (section, ".sbss")
30054 || compare_section_name (section, ".sbss2")
30055 || compare_section_name (section, ".gnu.linkonce.sb")
30056 || strcmp (section, ".PPC.EMB.sdata0") == 0
30057 || strcmp (section, ".PPC.EMB.sbss0") == 0)
30058 return true;
30060 else
30062 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
30064 if (size > 0
30065 && size <= g_switch_value
30066 /* If it's not public, and we're not going to reference it there,
30067 there's no need to put it in the small data section. */
30068 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
30069 return true;
30072 return false;
30075 #endif /* USING_ELFOS_H */
30077 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
30079 static bool
30080 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
30082 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
30085 /* Do not place thread-local symbols refs in the object blocks. */
30087 static bool
30088 rs6000_use_blocks_for_decl_p (const_tree decl)
30090 return !DECL_THREAD_LOCAL_P (decl);
30093 /* Return a REG that occurs in ADDR with coefficient 1.
30094 ADDR can be effectively incremented by incrementing REG.
30096 r0 is special and we must not select it as an address
30097 register by this routine since our caller will try to
30098 increment the returned register via an "la" instruction. */
30101 find_addr_reg (rtx addr)
30103 while (GET_CODE (addr) == PLUS)
30105 if (GET_CODE (XEXP (addr, 0)) == REG
30106 && REGNO (XEXP (addr, 0)) != 0)
30107 addr = XEXP (addr, 0);
30108 else if (GET_CODE (XEXP (addr, 1)) == REG
30109 && REGNO (XEXP (addr, 1)) != 0)
30110 addr = XEXP (addr, 1);
30111 else if (CONSTANT_P (XEXP (addr, 0)))
30112 addr = XEXP (addr, 1);
30113 else if (CONSTANT_P (XEXP (addr, 1)))
30114 addr = XEXP (addr, 0);
30115 else
30116 gcc_unreachable ();
30118 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
30119 return addr;
30122 void
30123 rs6000_fatal_bad_address (rtx op)
30125 fatal_insn ("bad address", op);
30128 #if TARGET_MACHO
30130 typedef struct branch_island_d {
30131 tree function_name;
30132 tree label_name;
30133 int line_number;
30134 } branch_island;
30137 static vec<branch_island, va_gc> *branch_islands;
30139 /* Remember to generate a branch island for far calls to the given
30140 function. */
30142 static void
30143 add_compiler_branch_island (tree label_name, tree function_name,
30144 int line_number)
30146 branch_island bi = {function_name, label_name, line_number};
30147 vec_safe_push (branch_islands, bi);
30150 /* Generate far-jump branch islands for everything recorded in
30151 branch_islands. Invoked immediately after the last instruction of
30152 the epilogue has been emitted; the branch islands must be appended
30153 to, and contiguous with, the function body. Mach-O stubs are
30154 generated in machopic_output_stub(). */
30156 static void
30157 macho_branch_islands (void)
30159 char tmp_buf[512];
30161 while (!vec_safe_is_empty (branch_islands))
30163 branch_island *bi = &branch_islands->last ();
30164 const char *label = IDENTIFIER_POINTER (bi->label_name);
30165 const char *name = IDENTIFIER_POINTER (bi->function_name);
30166 char name_buf[512];
30167 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
30168 if (name[0] == '*' || name[0] == '&')
30169 strcpy (name_buf, name+1);
30170 else
30172 name_buf[0] = '_';
30173 strcpy (name_buf+1, name);
30175 strcpy (tmp_buf, "\n");
30176 strcat (tmp_buf, label);
30177 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
30178 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
30179 dbxout_stabd (N_SLINE, bi->line_number);
30180 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
30181 if (flag_pic)
30183 if (TARGET_LINK_STACK)
30185 char name[32];
30186 get_ppc476_thunk_name (name);
30187 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
30188 strcat (tmp_buf, name);
30189 strcat (tmp_buf, "\n");
30190 strcat (tmp_buf, label);
30191 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
30193 else
30195 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
30196 strcat (tmp_buf, label);
30197 strcat (tmp_buf, "_pic\n");
30198 strcat (tmp_buf, label);
30199 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
30202 strcat (tmp_buf, "\taddis r11,r11,ha16(");
30203 strcat (tmp_buf, name_buf);
30204 strcat (tmp_buf, " - ");
30205 strcat (tmp_buf, label);
30206 strcat (tmp_buf, "_pic)\n");
30208 strcat (tmp_buf, "\tmtlr r0\n");
30210 strcat (tmp_buf, "\taddi r12,r11,lo16(");
30211 strcat (tmp_buf, name_buf);
30212 strcat (tmp_buf, " - ");
30213 strcat (tmp_buf, label);
30214 strcat (tmp_buf, "_pic)\n");
30216 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
30218 else
30220 strcat (tmp_buf, ":\nlis r12,hi16(");
30221 strcat (tmp_buf, name_buf);
30222 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
30223 strcat (tmp_buf, name_buf);
30224 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
30226 output_asm_insn (tmp_buf, 0);
30227 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
30228 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
30229 dbxout_stabd (N_SLINE, bi->line_number);
30230 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
30231 branch_islands->pop ();
30235 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
30236 already there or not. */
30238 static int
30239 no_previous_def (tree function_name)
30241 branch_island *bi;
30242 unsigned ix;
30244 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
30245 if (function_name == bi->function_name)
30246 return 0;
30247 return 1;
30250 /* GET_PREV_LABEL gets the label name from the previous definition of
30251 the function. */
30253 static tree
30254 get_prev_label (tree function_name)
30256 branch_island *bi;
30257 unsigned ix;
30259 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
30260 if (function_name == bi->function_name)
30261 return bi->label_name;
30262 return NULL_TREE;
30265 /* INSN is either a function call or a millicode call. It may have an
30266 unconditional jump in its delay slot.
30268 CALL_DEST is the routine we are calling. */
30270 char *
30271 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
30272 int cookie_operand_number)
30274 static char buf[256];
30275 if (darwin_emit_branch_islands
30276 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
30277 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
30279 tree labelname;
30280 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
30282 if (no_previous_def (funname))
30284 rtx label_rtx = gen_label_rtx ();
30285 char *label_buf, temp_buf[256];
30286 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
30287 CODE_LABEL_NUMBER (label_rtx));
30288 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
30289 labelname = get_identifier (label_buf);
30290 add_compiler_branch_island (labelname, funname, insn_line (insn));
30292 else
30293 labelname = get_prev_label (funname);
30295 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
30296 instruction will reach 'foo', otherwise link as 'bl L42'".
30297 "L42" should be a 'branch island', that will do a far jump to
30298 'foo'. Branch islands are generated in
30299 macho_branch_islands(). */
30300 sprintf (buf, "jbsr %%z%d,%.246s",
30301 dest_operand_number, IDENTIFIER_POINTER (labelname));
30303 else
30304 sprintf (buf, "bl %%z%d", dest_operand_number);
30305 return buf;
30308 /* Generate PIC and indirect symbol stubs. */
30310 void
30311 machopic_output_stub (FILE *file, const char *symb, const char *stub)
30313 unsigned int length;
30314 char *symbol_name, *lazy_ptr_name;
30315 char *local_label_0;
30316 static int label = 0;
30318 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
30319 symb = (*targetm.strip_name_encoding) (symb);
30322 length = strlen (symb);
30323 symbol_name = XALLOCAVEC (char, length + 32);
30324 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
30326 lazy_ptr_name = XALLOCAVEC (char, length + 32);
30327 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
30329 if (flag_pic == 2)
30330 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
30331 else
30332 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
30334 if (flag_pic == 2)
30336 fprintf (file, "\t.align 5\n");
30338 fprintf (file, "%s:\n", stub);
30339 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30341 label++;
30342 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
30343 sprintf (local_label_0, "\"L%011d$spb\"", label);
30345 fprintf (file, "\tmflr r0\n");
30346 if (TARGET_LINK_STACK)
30348 char name[32];
30349 get_ppc476_thunk_name (name);
30350 fprintf (file, "\tbl %s\n", name);
30351 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
30353 else
30355 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
30356 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
30358 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
30359 lazy_ptr_name, local_label_0);
30360 fprintf (file, "\tmtlr r0\n");
30361 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
30362 (TARGET_64BIT ? "ldu" : "lwzu"),
30363 lazy_ptr_name, local_label_0);
30364 fprintf (file, "\tmtctr r12\n");
30365 fprintf (file, "\tbctr\n");
30367 else
30369 fprintf (file, "\t.align 4\n");
30371 fprintf (file, "%s:\n", stub);
30372 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30374 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
30375 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
30376 (TARGET_64BIT ? "ldu" : "lwzu"),
30377 lazy_ptr_name);
30378 fprintf (file, "\tmtctr r12\n");
30379 fprintf (file, "\tbctr\n");
30382 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
30383 fprintf (file, "%s:\n", lazy_ptr_name);
30384 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
30385 fprintf (file, "%sdyld_stub_binding_helper\n",
30386 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
30389 /* Legitimize PIC addresses. If the address is already
30390 position-independent, we return ORIG. Newly generated
30391 position-independent addresses go into a reg. This is REG if non
30392 zero, otherwise we allocate register(s) as necessary. */
30394 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
30397 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
30398 rtx reg)
30400 rtx base, offset;
30402 if (reg == NULL && ! reload_in_progress && ! reload_completed)
30403 reg = gen_reg_rtx (Pmode);
30405 if (GET_CODE (orig) == CONST)
30407 rtx reg_temp;
30409 if (GET_CODE (XEXP (orig, 0)) == PLUS
30410 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
30411 return orig;
30413 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
30415 /* Use a different reg for the intermediate value, as
30416 it will be marked UNCHANGING. */
30417 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
30418 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
30419 Pmode, reg_temp);
30420 offset =
30421 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
30422 Pmode, reg);
30424 if (GET_CODE (offset) == CONST_INT)
30426 if (SMALL_INT (offset))
30427 return plus_constant (Pmode, base, INTVAL (offset));
30428 else if (! reload_in_progress && ! reload_completed)
30429 offset = force_reg (Pmode, offset);
30430 else
30432 rtx mem = force_const_mem (Pmode, orig);
30433 return machopic_legitimize_pic_address (mem, Pmode, reg);
30436 return gen_rtx_PLUS (Pmode, base, offset);
30439 /* Fall back on generic machopic code. */
30440 return machopic_legitimize_pic_address (orig, mode, reg);
30443 /* Output a .machine directive for the Darwin assembler, and call
30444 the generic start_file routine. */
30446 static void
30447 rs6000_darwin_file_start (void)
30449 static const struct
30451 const char *arg;
30452 const char *name;
30453 HOST_WIDE_INT if_set;
30454 } mapping[] = {
30455 { "ppc64", "ppc64", MASK_64BIT },
30456 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
30457 { "power4", "ppc970", 0 },
30458 { "G5", "ppc970", 0 },
30459 { "7450", "ppc7450", 0 },
30460 { "7400", "ppc7400", MASK_ALTIVEC },
30461 { "G4", "ppc7400", 0 },
30462 { "750", "ppc750", 0 },
30463 { "740", "ppc750", 0 },
30464 { "G3", "ppc750", 0 },
30465 { "604e", "ppc604e", 0 },
30466 { "604", "ppc604", 0 },
30467 { "603e", "ppc603", 0 },
30468 { "603", "ppc603", 0 },
30469 { "601", "ppc601", 0 },
30470 { NULL, "ppc", 0 } };
30471 const char *cpu_id = "";
30472 size_t i;
30474 rs6000_file_start ();
30475 darwin_file_start ();
30477 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
30479 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
30480 cpu_id = rs6000_default_cpu;
30482 if (global_options_set.x_rs6000_cpu_index)
30483 cpu_id = processor_target_table[rs6000_cpu_index].name;
30485 /* Look through the mapping array. Pick the first name that either
30486 matches the argument, has a bit set in IF_SET that is also set
30487 in the target flags, or has a NULL name. */
30489 i = 0;
30490 while (mapping[i].arg != NULL
30491 && strcmp (mapping[i].arg, cpu_id) != 0
30492 && (mapping[i].if_set & rs6000_isa_flags) == 0)
30493 i++;
30495 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
30498 #endif /* TARGET_MACHO */
30500 #if TARGET_ELF
30501 static int
30502 rs6000_elf_reloc_rw_mask (void)
30504 if (flag_pic)
30505 return 3;
30506 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
30507 return 2;
30508 else
30509 return 0;
30512 /* Record an element in the table of global constructors. SYMBOL is
30513 a SYMBOL_REF of the function to be called; PRIORITY is a number
30514 between 0 and MAX_INIT_PRIORITY.
30516 This differs from default_named_section_asm_out_constructor in
30517 that we have special handling for -mrelocatable. */
30519 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
30520 static void
30521 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
30523 const char *section = ".ctors";
30524 char buf[16];
30526 if (priority != DEFAULT_INIT_PRIORITY)
30528 sprintf (buf, ".ctors.%.5u",
30529 /* Invert the numbering so the linker puts us in the proper
30530 order; constructors are run from right to left, and the
30531 linker sorts in increasing order. */
30532 MAX_INIT_PRIORITY - priority);
30533 section = buf;
30536 switch_to_section (get_section (section, SECTION_WRITE, NULL));
30537 assemble_align (POINTER_SIZE);
30539 if (TARGET_RELOCATABLE)
30541 fputs ("\t.long (", asm_out_file);
30542 output_addr_const (asm_out_file, symbol);
30543 fputs (")@fixup\n", asm_out_file);
30545 else
30546 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
30549 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
30550 static void
30551 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
30553 const char *section = ".dtors";
30554 char buf[16];
30556 if (priority != DEFAULT_INIT_PRIORITY)
30558 sprintf (buf, ".dtors.%.5u",
30559 /* Invert the numbering so the linker puts us in the proper
30560 order; constructors are run from right to left, and the
30561 linker sorts in increasing order. */
30562 MAX_INIT_PRIORITY - priority);
30563 section = buf;
30566 switch_to_section (get_section (section, SECTION_WRITE, NULL));
30567 assemble_align (POINTER_SIZE);
30569 if (TARGET_RELOCATABLE)
30571 fputs ("\t.long (", asm_out_file);
30572 output_addr_const (asm_out_file, symbol);
30573 fputs (")@fixup\n", asm_out_file);
30575 else
30576 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
30579 void
30580 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
30582 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
30584 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
30585 ASM_OUTPUT_LABEL (file, name);
30586 fputs (DOUBLE_INT_ASM_OP, file);
30587 rs6000_output_function_entry (file, name);
30588 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
30589 if (DOT_SYMBOLS)
30591 fputs ("\t.size\t", file);
30592 assemble_name (file, name);
30593 fputs (",24\n\t.type\t.", file);
30594 assemble_name (file, name);
30595 fputs (",@function\n", file);
30596 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
30598 fputs ("\t.globl\t.", file);
30599 assemble_name (file, name);
30600 putc ('\n', file);
30603 else
30604 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
30605 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
30606 rs6000_output_function_entry (file, name);
30607 fputs (":\n", file);
30608 return;
30611 if (TARGET_RELOCATABLE
30612 && !TARGET_SECURE_PLT
30613 && (get_pool_size () != 0 || crtl->profile)
30614 && uses_TOC ())
30616 char buf[256];
30618 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
30620 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
30621 fprintf (file, "\t.long ");
30622 assemble_name (file, buf);
30623 putc ('-', file);
30624 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
30625 assemble_name (file, buf);
30626 putc ('\n', file);
30629 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
30630 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
30632 if (DEFAULT_ABI == ABI_AIX)
30634 const char *desc_name, *orig_name;
30636 orig_name = (*targetm.strip_name_encoding) (name);
30637 desc_name = orig_name;
30638 while (*desc_name == '.')
30639 desc_name++;
30641 if (TREE_PUBLIC (decl))
30642 fprintf (file, "\t.globl %s\n", desc_name);
30644 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
30645 fprintf (file, "%s:\n", desc_name);
30646 fprintf (file, "\t.long %s\n", orig_name);
30647 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
30648 fputs ("\t.long 0\n", file);
30649 fprintf (file, "\t.previous\n");
30651 ASM_OUTPUT_LABEL (file, name);
30654 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
30655 static void
30656 rs6000_elf_file_end (void)
30658 #ifdef HAVE_AS_GNU_ATTRIBUTE
30659 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
30661 if (rs6000_passes_float)
30662 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
30663 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
30664 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
30665 : 2));
30666 if (rs6000_passes_vector)
30667 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
30668 (TARGET_ALTIVEC_ABI ? 2
30669 : TARGET_SPE_ABI ? 3
30670 : 1));
30671 if (rs6000_returns_struct)
30672 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
30673 aix_struct_return ? 2 : 1);
30675 #endif
30676 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
30677 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
30678 file_end_indicate_exec_stack ();
30679 #endif
30681 if (flag_split_stack)
30682 file_end_indicate_split_stack ();
30684 #endif
30686 #if TARGET_XCOFF
30687 static void
30688 rs6000_xcoff_asm_output_anchor (rtx symbol)
30690 char buffer[100];
30692 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
30693 SYMBOL_REF_BLOCK_OFFSET (symbol));
30694 fprintf (asm_out_file, "%s", SET_ASM_OP);
30695 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
30696 fprintf (asm_out_file, ",");
30697 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
30698 fprintf (asm_out_file, "\n");
30701 static void
30702 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
30704 fputs (GLOBAL_ASM_OP, stream);
30705 RS6000_OUTPUT_BASENAME (stream, name);
30706 putc ('\n', stream);
30709 /* A get_unnamed_decl callback, used for read-only sections. PTR
30710 points to the section string variable. */
30712 static void
30713 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
30715 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
30716 *(const char *const *) directive,
30717 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
30720 /* Likewise for read-write sections. */
30722 static void
30723 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
30725 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
30726 *(const char *const *) directive,
30727 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
30730 static void
30731 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
30733 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
30734 *(const char *const *) directive,
30735 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
30738 /* A get_unnamed_section callback, used for switching to toc_section. */
30740 static void
30741 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
30743 if (TARGET_MINIMAL_TOC)
30745 /* toc_section is always selected at least once from
30746 rs6000_xcoff_file_start, so this is guaranteed to
30747 always be defined once and only once in each file. */
30748 if (!toc_initialized)
30750 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
30751 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
30752 toc_initialized = 1;
30754 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
30755 (TARGET_32BIT ? "" : ",3"));
30757 else
30758 fputs ("\t.toc\n", asm_out_file);
30761 /* Implement TARGET_ASM_INIT_SECTIONS. */
30763 static void
30764 rs6000_xcoff_asm_init_sections (void)
30766 read_only_data_section
30767 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
30768 &xcoff_read_only_section_name);
30770 private_data_section
30771 = get_unnamed_section (SECTION_WRITE,
30772 rs6000_xcoff_output_readwrite_section_asm_op,
30773 &xcoff_private_data_section_name);
30775 tls_data_section
30776 = get_unnamed_section (SECTION_TLS,
30777 rs6000_xcoff_output_tls_section_asm_op,
30778 &xcoff_tls_data_section_name);
30780 tls_private_data_section
30781 = get_unnamed_section (SECTION_TLS,
30782 rs6000_xcoff_output_tls_section_asm_op,
30783 &xcoff_private_data_section_name);
30785 read_only_private_data_section
30786 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
30787 &xcoff_private_data_section_name);
30789 toc_section
30790 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
30792 readonly_data_section = read_only_data_section;
30795 static int
30796 rs6000_xcoff_reloc_rw_mask (void)
30798 return 3;
30801 static void
30802 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
30803 tree decl ATTRIBUTE_UNUSED)
30805 int smclass;
30806 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
30808 if (flags & SECTION_CODE)
30809 smclass = 0;
30810 else if (flags & SECTION_TLS)
30811 smclass = 3;
30812 else if (flags & SECTION_WRITE)
30813 smclass = 2;
30814 else
30815 smclass = 1;
30817 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
30818 (flags & SECTION_CODE) ? "." : "",
30819 name, suffix[smclass], flags & SECTION_ENTSIZE);
30822 #define IN_NAMED_SECTION(DECL) \
30823 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
30824 && DECL_SECTION_NAME (DECL) != NULL)
30826 static section *
30827 rs6000_xcoff_select_section (tree decl, int reloc,
30828 unsigned HOST_WIDE_INT align)
30830 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
30831 named section. */
30832 if (align > BIGGEST_ALIGNMENT)
30834 resolve_unique_section (decl, reloc, true);
30835 if (IN_NAMED_SECTION (decl))
30836 return get_named_section (decl, NULL, reloc);
30839 if (decl_readonly_section (decl, reloc))
30841 if (TREE_PUBLIC (decl))
30842 return read_only_data_section;
30843 else
30844 return read_only_private_data_section;
30846 else
30848 #if HAVE_AS_TLS
30849 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30851 if (TREE_PUBLIC (decl))
30852 return tls_data_section;
30853 else if (bss_initializer_p (decl))
30855 /* Convert to COMMON to emit in BSS. */
30856 DECL_COMMON (decl) = 1;
30857 return tls_comm_section;
30859 else
30860 return tls_private_data_section;
30862 else
30863 #endif
30864 if (TREE_PUBLIC (decl))
30865 return data_section;
30866 else
30867 return private_data_section;
30871 static void
30872 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
30874 const char *name;
30876 /* Use select_section for private data and uninitialized data with
30877 alignment <= BIGGEST_ALIGNMENT. */
30878 if (!TREE_PUBLIC (decl)
30879 || DECL_COMMON (decl)
30880 || (DECL_INITIAL (decl) == NULL_TREE
30881 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
30882 || DECL_INITIAL (decl) == error_mark_node
30883 || (flag_zero_initialized_in_bss
30884 && initializer_zerop (DECL_INITIAL (decl))))
30885 return;
30887 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
30888 name = (*targetm.strip_name_encoding) (name);
30889 set_decl_section_name (decl, name);
30892 /* Select section for constant in constant pool.
30894 On RS/6000, all constants are in the private read-only data area.
30895 However, if this is being placed in the TOC it must be output as a
30896 toc entry. */
30898 static section *
30899 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
30900 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
30902 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
30903 return toc_section;
30904 else
30905 return read_only_private_data_section;
30908 /* Remove any trailing [DS] or the like from the symbol name. */
30910 static const char *
30911 rs6000_xcoff_strip_name_encoding (const char *name)
30913 size_t len;
30914 if (*name == '*')
30915 name++;
30916 len = strlen (name);
30917 if (name[len - 1] == ']')
30918 return ggc_alloc_string (name, len - 4);
30919 else
30920 return name;
30923 /* Section attributes. AIX is always PIC. */
30925 static unsigned int
30926 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
30928 unsigned int align;
30929 unsigned int flags = default_section_type_flags (decl, name, reloc);
30931 /* Align to at least UNIT size. */
30932 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
30933 align = MIN_UNITS_PER_WORD;
30934 else
30935 /* Increase alignment of large objects if not already stricter. */
30936 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
30937 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
30938 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
30940 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
30943 /* Output at beginning of assembler file.
30945 Initialize the section names for the RS/6000 at this point.
30947 Specify filename, including full path, to assembler.
30949 We want to go into the TOC section so at least one .toc will be emitted.
30950 Also, in order to output proper .bs/.es pairs, we need at least one static
30951 [RW] section emitted.
30953 Finally, declare mcount when profiling to make the assembler happy. */
30955 static void
30956 rs6000_xcoff_file_start (void)
30958 rs6000_gen_section_name (&xcoff_bss_section_name,
30959 main_input_filename, ".bss_");
30960 rs6000_gen_section_name (&xcoff_private_data_section_name,
30961 main_input_filename, ".rw_");
30962 rs6000_gen_section_name (&xcoff_read_only_section_name,
30963 main_input_filename, ".ro_");
30964 rs6000_gen_section_name (&xcoff_tls_data_section_name,
30965 main_input_filename, ".tls_");
30966 rs6000_gen_section_name (&xcoff_tbss_section_name,
30967 main_input_filename, ".tbss_[UL]");
30969 fputs ("\t.file\t", asm_out_file);
30970 output_quoted_string (asm_out_file, main_input_filename);
30971 fputc ('\n', asm_out_file);
30972 if (write_symbols != NO_DEBUG)
30973 switch_to_section (private_data_section);
30974 switch_to_section (text_section);
30975 if (profile_flag)
30976 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
30977 rs6000_file_start ();
30980 /* Output at end of assembler file.
30981 On the RS/6000, referencing data should automatically pull in text. */
30983 static void
30984 rs6000_xcoff_file_end (void)
30986 switch_to_section (text_section);
30987 fputs ("_section_.text:\n", asm_out_file);
30988 switch_to_section (data_section);
30989 fputs (TARGET_32BIT
30990 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
30991 asm_out_file);
30994 struct declare_alias_data
30996 FILE *file;
30997 bool function_descriptor;
31000 /* Declare alias N. A helper function for for_node_and_aliases. */
31002 static bool
31003 rs6000_declare_alias (struct symtab_node *n, void *d)
31005 struct declare_alias_data *data = (struct declare_alias_data *)d;
31006 /* Main symbol is output specially, because varasm machinery does part of
31007 the job for us - we do not need to declare .globl/lglobs and such. */
31008 if (!n->alias || n->weakref)
31009 return false;
31011 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
31012 return false;
31014 /* Prevent assemble_alias from trying to use .set pseudo operation
31015 that does not behave as expected by the middle-end. */
31016 TREE_ASM_WRITTEN (n->decl) = true;
31018 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
31019 char *buffer = (char *) alloca (strlen (name) + 2);
31020 char *p;
31021 int dollar_inside = 0;
31023 strcpy (buffer, name);
31024 p = strchr (buffer, '$');
31025 while (p) {
31026 *p = '_';
31027 dollar_inside++;
31028 p = strchr (p + 1, '$');
31030 if (TREE_PUBLIC (n->decl))
31032 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
31034 if (dollar_inside) {
31035 if (data->function_descriptor)
31036 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
31037 else
31038 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
31040 if (data->function_descriptor)
31041 fputs ("\t.globl .", data->file);
31042 else
31043 fputs ("\t.globl ", data->file);
31044 RS6000_OUTPUT_BASENAME (data->file, buffer);
31045 putc ('\n', data->file);
31047 #ifdef ASM_WEAKEN_DECL
31048 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
31049 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
31050 #endif
31052 else
31054 if (dollar_inside)
31056 if (data->function_descriptor)
31057 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
31058 else
31059 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
31061 if (data->function_descriptor)
31062 fputs ("\t.lglobl .", data->file);
31063 else
31064 fputs ("\t.lglobl ", data->file);
31065 RS6000_OUTPUT_BASENAME (data->file, buffer);
31066 putc ('\n', data->file);
31068 if (data->function_descriptor)
31069 fputs (".", data->file);
31070 RS6000_OUTPUT_BASENAME (data->file, buffer);
31071 fputs (":\n", data->file);
31072 return false;
31075 /* This macro produces the initial definition of a function name.
31076 On the RS/6000, we need to place an extra '.' in the function name and
31077 output the function descriptor.
31078 Dollar signs are converted to underscores.
31080 The csect for the function will have already been created when
31081 text_section was selected. We do have to go back to that csect, however.
31083 The third and fourth parameters to the .function pseudo-op (16 and 044)
31084 are placeholders which no longer have any use.
31086 Because AIX assembler's .set command has unexpected semantics, we output
31087 all aliases as alternative labels in front of the definition. */
31089 void
31090 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
31092 char *buffer = (char *) alloca (strlen (name) + 1);
31093 char *p;
31094 int dollar_inside = 0;
31095 struct declare_alias_data data = {file, false};
31097 strcpy (buffer, name);
31098 p = strchr (buffer, '$');
31099 while (p) {
31100 *p = '_';
31101 dollar_inside++;
31102 p = strchr (p + 1, '$');
31104 if (TREE_PUBLIC (decl))
31106 if (!RS6000_WEAK || !DECL_WEAK (decl))
31108 if (dollar_inside) {
31109 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
31110 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
31112 fputs ("\t.globl .", file);
31113 RS6000_OUTPUT_BASENAME (file, buffer);
31114 putc ('\n', file);
31117 else
31119 if (dollar_inside) {
31120 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
31121 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
31123 fputs ("\t.lglobl .", file);
31124 RS6000_OUTPUT_BASENAME (file, buffer);
31125 putc ('\n', file);
31127 fputs ("\t.csect ", file);
31128 RS6000_OUTPUT_BASENAME (file, buffer);
31129 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
31130 RS6000_OUTPUT_BASENAME (file, buffer);
31131 fputs (":\n", file);
31132 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
31133 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
31134 RS6000_OUTPUT_BASENAME (file, buffer);
31135 fputs (", TOC[tc0], 0\n", file);
31136 in_section = NULL;
31137 switch_to_section (function_section (decl));
31138 putc ('.', file);
31139 RS6000_OUTPUT_BASENAME (file, buffer);
31140 fputs (":\n", file);
31141 data.function_descriptor = true;
31142 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
31143 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
31144 xcoffout_declare_function (file, decl, buffer);
31145 return;
31148 /* This macro produces the initial definition of a object (variable) name.
31149 Because AIX assembler's .set command has unexpected semantics, we output
31150 all aliases as alternative labels in front of the definition. */
31152 void
31153 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
31155 struct declare_alias_data data = {file, false};
31156 RS6000_OUTPUT_BASENAME (file, name);
31157 fputs (":\n", file);
31158 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
31161 /* Overide the default 'SYMBOL-.' syntax with AIX compatible 'SYMBOL-$'. */
31163 void
31164 rs6000_asm_output_dwarf_pcrel (FILE *file, int size, const char *label)
31166 fputs (integer_asm_op (size, FALSE), file);
31167 assemble_name (file, label);
31168 fputs ("-$", file);
31171 /* Output a symbol offset relative to the dbase for the current object.
31172 We use __gcc_unwind_dbase as an arbitrary base for dbase and assume
31173 signed offsets.
31175 __gcc_unwind_dbase is embedded in all executables/libraries through
31176 libgcc/config/rs6000/crtdbase.S. */
31178 void
31179 rs6000_asm_output_dwarf_datarel (FILE *file, int size, const char *label)
31181 fputs (integer_asm_op (size, FALSE), file);
31182 assemble_name (file, label);
31183 fputs("-__gcc_unwind_dbase", file);
31186 #ifdef HAVE_AS_TLS
31187 static void
31188 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
31190 rtx symbol;
31191 int flags;
31193 default_encode_section_info (decl, rtl, first);
31195 /* Careful not to prod global register variables. */
31196 if (!MEM_P (rtl))
31197 return;
31198 symbol = XEXP (rtl, 0);
31199 if (GET_CODE (symbol) != SYMBOL_REF)
31200 return;
31202 flags = SYMBOL_REF_FLAGS (symbol);
31204 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
31205 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
31207 SYMBOL_REF_FLAGS (symbol) = flags;
31209 #endif /* HAVE_AS_TLS */
31210 #endif /* TARGET_XCOFF */
31212 /* Compute a (partial) cost for rtx X. Return true if the complete
31213 cost has been computed, and false if subexpressions should be
31214 scanned. In either case, *TOTAL contains the cost result. */
31216 static bool
31217 rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
31218 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
31220 int code = GET_CODE (x);
31222 switch (code)
31224 /* On the RS/6000, if it is valid in the insn, it is free. */
31225 case CONST_INT:
31226 if (((outer_code == SET
31227 || outer_code == PLUS
31228 || outer_code == MINUS)
31229 && (satisfies_constraint_I (x)
31230 || satisfies_constraint_L (x)))
31231 || (outer_code == AND
31232 && (satisfies_constraint_K (x)
31233 || (mode == SImode
31234 ? satisfies_constraint_L (x)
31235 : satisfies_constraint_J (x))))
31236 || ((outer_code == IOR || outer_code == XOR)
31237 && (satisfies_constraint_K (x)
31238 || (mode == SImode
31239 ? satisfies_constraint_L (x)
31240 : satisfies_constraint_J (x))))
31241 || outer_code == ASHIFT
31242 || outer_code == ASHIFTRT
31243 || outer_code == LSHIFTRT
31244 || outer_code == ROTATE
31245 || outer_code == ROTATERT
31246 || outer_code == ZERO_EXTRACT
31247 || (outer_code == MULT
31248 && satisfies_constraint_I (x))
31249 || ((outer_code == DIV || outer_code == UDIV
31250 || outer_code == MOD || outer_code == UMOD)
31251 && exact_log2 (INTVAL (x)) >= 0)
31252 || (outer_code == COMPARE
31253 && (satisfies_constraint_I (x)
31254 || satisfies_constraint_K (x)))
31255 || ((outer_code == EQ || outer_code == NE)
31256 && (satisfies_constraint_I (x)
31257 || satisfies_constraint_K (x)
31258 || (mode == SImode
31259 ? satisfies_constraint_L (x)
31260 : satisfies_constraint_J (x))))
31261 || (outer_code == GTU
31262 && satisfies_constraint_I (x))
31263 || (outer_code == LTU
31264 && satisfies_constraint_P (x)))
31266 *total = 0;
31267 return true;
31269 else if ((outer_code == PLUS
31270 && reg_or_add_cint_operand (x, VOIDmode))
31271 || (outer_code == MINUS
31272 && reg_or_sub_cint_operand (x, VOIDmode))
31273 || ((outer_code == SET
31274 || outer_code == IOR
31275 || outer_code == XOR)
31276 && (INTVAL (x)
31277 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
31279 *total = COSTS_N_INSNS (1);
31280 return true;
31282 /* FALLTHRU */
31284 case CONST_DOUBLE:
31285 case CONST_WIDE_INT:
31286 case CONST:
31287 case HIGH:
31288 case SYMBOL_REF:
31289 case MEM:
31290 /* When optimizing for size, MEM should be slightly more expensive
31291 than generating address, e.g., (plus (reg) (const)).
31292 L1 cache latency is about two instructions. */
31293 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
31294 return true;
31296 case LABEL_REF:
31297 *total = 0;
31298 return true;
31300 case PLUS:
31301 case MINUS:
31302 if (FLOAT_MODE_P (mode))
31303 *total = rs6000_cost->fp;
31304 else
31305 *total = COSTS_N_INSNS (1);
31306 return false;
31308 case MULT:
31309 if (GET_CODE (XEXP (x, 1)) == CONST_INT
31310 && satisfies_constraint_I (XEXP (x, 1)))
31312 if (INTVAL (XEXP (x, 1)) >= -256
31313 && INTVAL (XEXP (x, 1)) <= 255)
31314 *total = rs6000_cost->mulsi_const9;
31315 else
31316 *total = rs6000_cost->mulsi_const;
31318 else if (mode == SFmode)
31319 *total = rs6000_cost->fp;
31320 else if (FLOAT_MODE_P (mode))
31321 *total = rs6000_cost->dmul;
31322 else if (mode == DImode)
31323 *total = rs6000_cost->muldi;
31324 else
31325 *total = rs6000_cost->mulsi;
31326 return false;
31328 case FMA:
31329 if (mode == SFmode)
31330 *total = rs6000_cost->fp;
31331 else
31332 *total = rs6000_cost->dmul;
31333 break;
31335 case DIV:
31336 case MOD:
31337 if (FLOAT_MODE_P (mode))
31339 *total = mode == DFmode ? rs6000_cost->ddiv
31340 : rs6000_cost->sdiv;
31341 return false;
31343 /* FALLTHRU */
31345 case UDIV:
31346 case UMOD:
31347 if (GET_CODE (XEXP (x, 1)) == CONST_INT
31348 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
31350 if (code == DIV || code == MOD)
31351 /* Shift, addze */
31352 *total = COSTS_N_INSNS (2);
31353 else
31354 /* Shift */
31355 *total = COSTS_N_INSNS (1);
31357 else
31359 if (GET_MODE (XEXP (x, 1)) == DImode)
31360 *total = rs6000_cost->divdi;
31361 else
31362 *total = rs6000_cost->divsi;
31364 /* Add in shift and subtract for MOD. */
31365 if (code == MOD || code == UMOD)
31366 *total += COSTS_N_INSNS (2);
31367 return false;
31369 case CTZ:
31370 case FFS:
31371 *total = COSTS_N_INSNS (4);
31372 return false;
31374 case POPCOUNT:
31375 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
31376 return false;
31378 case PARITY:
31379 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
31380 return false;
31382 case NOT:
31383 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
31384 *total = 0;
31385 else
31386 *total = COSTS_N_INSNS (1);
31387 return false;
31389 case AND:
31390 if (CONST_INT_P (XEXP (x, 1)))
31392 rtx left = XEXP (x, 0);
31393 rtx_code left_code = GET_CODE (left);
31395 /* rotate-and-mask: 1 insn. */
31396 if ((left_code == ROTATE
31397 || left_code == ASHIFT
31398 || left_code == LSHIFTRT)
31399 && rs6000_is_valid_shift_mask (XEXP (x, 1), left, mode))
31401 *total = rtx_cost (XEXP (left, 0), mode, left_code, 0, speed);
31402 if (!CONST_INT_P (XEXP (left, 1)))
31403 *total += rtx_cost (XEXP (left, 1), SImode, left_code, 1, speed);
31404 *total += COSTS_N_INSNS (1);
31405 return true;
31408 /* rotate-and-mask (no rotate), andi., andis.: 1 insn. */
31409 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
31410 if (rs6000_is_valid_and_mask (XEXP (x, 1), mode)
31411 || (val & 0xffff) == val
31412 || (val & 0xffff0000) == val
31413 || ((val & 0xffff) == 0 && mode == SImode))
31415 *total = rtx_cost (left, mode, AND, 0, speed);
31416 *total += COSTS_N_INSNS (1);
31417 return true;
31420 /* 2 insns. */
31421 if (rs6000_is_valid_2insn_and (XEXP (x, 1), mode))
31423 *total = rtx_cost (left, mode, AND, 0, speed);
31424 *total += COSTS_N_INSNS (2);
31425 return true;
31429 *total = COSTS_N_INSNS (1);
31430 return false;
31432 case IOR:
31433 /* FIXME */
31434 *total = COSTS_N_INSNS (1);
31435 return true;
31437 case CLZ:
31438 case XOR:
31439 case ZERO_EXTRACT:
31440 *total = COSTS_N_INSNS (1);
31441 return false;
31443 case ASHIFT:
31444 case ASHIFTRT:
31445 case LSHIFTRT:
31446 case ROTATE:
31447 case ROTATERT:
31448 /* Handle mul_highpart. */
31449 if (outer_code == TRUNCATE
31450 && GET_CODE (XEXP (x, 0)) == MULT)
31452 if (mode == DImode)
31453 *total = rs6000_cost->muldi;
31454 else
31455 *total = rs6000_cost->mulsi;
31456 return true;
31458 else if (outer_code == AND)
31459 *total = 0;
31460 else
31461 *total = COSTS_N_INSNS (1);
31462 return false;
31464 case SIGN_EXTEND:
31465 case ZERO_EXTEND:
31466 if (GET_CODE (XEXP (x, 0)) == MEM)
31467 *total = 0;
31468 else
31469 *total = COSTS_N_INSNS (1);
31470 return false;
31472 case COMPARE:
31473 case NEG:
31474 case ABS:
31475 if (!FLOAT_MODE_P (mode))
31477 *total = COSTS_N_INSNS (1);
31478 return false;
31480 /* FALLTHRU */
31482 case FLOAT:
31483 case UNSIGNED_FLOAT:
31484 case FIX:
31485 case UNSIGNED_FIX:
31486 case FLOAT_TRUNCATE:
31487 *total = rs6000_cost->fp;
31488 return false;
31490 case FLOAT_EXTEND:
31491 if (mode == DFmode)
31492 *total = rs6000_cost->sfdf_convert;
31493 else
31494 *total = rs6000_cost->fp;
31495 return false;
31497 case UNSPEC:
31498 switch (XINT (x, 1))
31500 case UNSPEC_FRSP:
31501 *total = rs6000_cost->fp;
31502 return true;
31504 default:
31505 break;
31507 break;
31509 case CALL:
31510 case IF_THEN_ELSE:
31511 if (!speed)
31513 *total = COSTS_N_INSNS (1);
31514 return true;
31516 else if (FLOAT_MODE_P (mode)
31517 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
31519 *total = rs6000_cost->fp;
31520 return false;
31522 break;
31524 case NE:
31525 case EQ:
31526 case GTU:
31527 case LTU:
31528 /* Carry bit requires mode == Pmode.
31529 NEG or PLUS already counted so only add one. */
31530 if (mode == Pmode
31531 && (outer_code == NEG || outer_code == PLUS))
31533 *total = COSTS_N_INSNS (1);
31534 return true;
31536 if (outer_code == SET)
31538 if (XEXP (x, 1) == const0_rtx)
31540 if (TARGET_ISEL && !TARGET_MFCRF)
31541 *total = COSTS_N_INSNS (8);
31542 else
31543 *total = COSTS_N_INSNS (2);
31544 return true;
31546 else
31548 *total = COSTS_N_INSNS (3);
31549 return false;
31552 /* FALLTHRU */
31554 case GT:
31555 case LT:
31556 case UNORDERED:
31557 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
31559 if (TARGET_ISEL && !TARGET_MFCRF)
31560 *total = COSTS_N_INSNS (8);
31561 else
31562 *total = COSTS_N_INSNS (2);
31563 return true;
31565 /* CC COMPARE. */
31566 if (outer_code == COMPARE)
31568 *total = 0;
31569 return true;
31571 break;
31573 default:
31574 break;
31577 return false;
31580 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
31582 static bool
31583 rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
31584 int opno, int *total, bool speed)
31586 bool ret = rs6000_rtx_costs (x, mode, outer_code, opno, total, speed);
31588 fprintf (stderr,
31589 "\nrs6000_rtx_costs, return = %s, mode = %s, outer_code = %s, "
31590 "opno = %d, total = %d, speed = %s, x:\n",
31591 ret ? "complete" : "scan inner",
31592 GET_MODE_NAME (mode),
31593 GET_RTX_NAME (outer_code),
31594 opno,
31595 *total,
31596 speed ? "true" : "false");
31598 debug_rtx (x);
31600 return ret;
31603 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
31605 static int
31606 rs6000_debug_address_cost (rtx x, machine_mode mode,
31607 addr_space_t as, bool speed)
31609 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
31611 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
31612 ret, speed ? "true" : "false");
31613 debug_rtx (x);
31615 return ret;
31619 /* A C expression returning the cost of moving data from a register of class
31620 CLASS1 to one of CLASS2. */
31622 static int
31623 rs6000_register_move_cost (machine_mode mode,
31624 reg_class_t from, reg_class_t to)
31626 int ret;
31628 if (TARGET_DEBUG_COST)
31629 dbg_cost_ctrl++;
31631 /* Moves from/to GENERAL_REGS. */
31632 if (reg_classes_intersect_p (to, GENERAL_REGS)
31633 || reg_classes_intersect_p (from, GENERAL_REGS))
31635 reg_class_t rclass = from;
31637 if (! reg_classes_intersect_p (to, GENERAL_REGS))
31638 rclass = to;
31640 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
31641 ret = (rs6000_memory_move_cost (mode, rclass, false)
31642 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
31644 /* It's more expensive to move CR_REGS than CR0_REGS because of the
31645 shift. */
31646 else if (rclass == CR_REGS)
31647 ret = 4;
31649 /* For those processors that have slow LR/CTR moves, make them more
31650 expensive than memory in order to bias spills to memory .*/
31651 else if ((rs6000_cpu == PROCESSOR_POWER6
31652 || rs6000_cpu == PROCESSOR_POWER7
31653 || rs6000_cpu == PROCESSOR_POWER8)
31654 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
31655 ret = 6 * hard_regno_nregs[0][mode];
31657 else
31658 /* A move will cost one instruction per GPR moved. */
31659 ret = 2 * hard_regno_nregs[0][mode];
31662 /* If we have VSX, we can easily move between FPR or Altivec registers. */
31663 else if (VECTOR_MEM_VSX_P (mode)
31664 && reg_classes_intersect_p (to, VSX_REGS)
31665 && reg_classes_intersect_p (from, VSX_REGS))
31666 ret = 2 * hard_regno_nregs[32][mode];
31668 /* Moving between two similar registers is just one instruction. */
31669 else if (reg_classes_intersect_p (to, from))
31670 ret = (FLOAT128_2REG_P (mode)) ? 4 : 2;
31672 /* Everything else has to go through GENERAL_REGS. */
31673 else
31674 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
31675 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
31677 if (TARGET_DEBUG_COST)
31679 if (dbg_cost_ctrl == 1)
31680 fprintf (stderr,
31681 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
31682 ret, GET_MODE_NAME (mode), reg_class_names[from],
31683 reg_class_names[to]);
31684 dbg_cost_ctrl--;
31687 return ret;
31690 /* A C expressions returning the cost of moving data of MODE from a register to
31691 or from memory. */
31693 static int
31694 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
31695 bool in ATTRIBUTE_UNUSED)
31697 int ret;
31699 if (TARGET_DEBUG_COST)
31700 dbg_cost_ctrl++;
31702 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
31703 ret = 4 * hard_regno_nregs[0][mode];
31704 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
31705 || reg_classes_intersect_p (rclass, VSX_REGS)))
31706 ret = 4 * hard_regno_nregs[32][mode];
31707 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
31708 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
31709 else
31710 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
31712 if (TARGET_DEBUG_COST)
31714 if (dbg_cost_ctrl == 1)
31715 fprintf (stderr,
31716 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
31717 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
31718 dbg_cost_ctrl--;
31721 return ret;
31724 /* Returns a code for a target-specific builtin that implements
31725 reciprocal of the function, or NULL_TREE if not available. */
31727 static tree
31728 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
31729 bool sqrt ATTRIBUTE_UNUSED)
31731 if (optimize_insn_for_size_p ())
31732 return NULL_TREE;
31734 if (md_fn)
31735 switch (fn)
31737 case VSX_BUILTIN_XVSQRTDP:
31738 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
31739 return NULL_TREE;
31741 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
31743 case VSX_BUILTIN_XVSQRTSP:
31744 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
31745 return NULL_TREE;
31747 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
31749 default:
31750 return NULL_TREE;
31753 else
31754 switch (fn)
31756 case BUILT_IN_SQRT:
31757 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
31758 return NULL_TREE;
31760 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
31762 case BUILT_IN_SQRTF:
31763 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
31764 return NULL_TREE;
31766 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
31768 default:
31769 return NULL_TREE;
31773 /* Load up a constant. If the mode is a vector mode, splat the value across
31774 all of the vector elements. */
31776 static rtx
31777 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
31779 rtx reg;
31781 if (mode == SFmode || mode == DFmode)
31783 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
31784 reg = force_reg (mode, d);
31786 else if (mode == V4SFmode)
31788 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
31789 rtvec v = gen_rtvec (4, d, d, d, d);
31790 reg = gen_reg_rtx (mode);
31791 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
31793 else if (mode == V2DFmode)
31795 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
31796 rtvec v = gen_rtvec (2, d, d);
31797 reg = gen_reg_rtx (mode);
31798 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
31800 else
31801 gcc_unreachable ();
31803 return reg;
31806 /* Generate an FMA instruction. */
31808 static void
31809 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
31811 machine_mode mode = GET_MODE (target);
31812 rtx dst;
31814 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
31815 gcc_assert (dst != NULL);
31817 if (dst != target)
31818 emit_move_insn (target, dst);
31821 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
31823 static void
31824 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
31826 machine_mode mode = GET_MODE (target);
31827 rtx dst;
31829 /* Altivec does not support fms directly;
31830 generate in terms of fma in that case. */
31831 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
31832 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
31833 else
31835 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
31836 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
31838 gcc_assert (dst != NULL);
31840 if (dst != target)
31841 emit_move_insn (target, dst);
31844 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
31846 static void
31847 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
31849 machine_mode mode = GET_MODE (dst);
31850 rtx r;
31852 /* This is a tad more complicated, since the fnma_optab is for
31853 a different expression: fma(-m1, m2, a), which is the same
31854 thing except in the case of signed zeros.
31856 Fortunately we know that if FMA is supported that FNMSUB is
31857 also supported in the ISA. Just expand it directly. */
31859 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
31861 r = gen_rtx_NEG (mode, a);
31862 r = gen_rtx_FMA (mode, m1, m2, r);
31863 r = gen_rtx_NEG (mode, r);
31864 emit_insn (gen_rtx_SET (dst, r));
31867 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
31868 add a reg_note saying that this was a division. Support both scalar and
31869 vector divide. Assumes no trapping math and finite arguments. */
31871 void
31872 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
31874 machine_mode mode = GET_MODE (dst);
31875 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
31876 int i;
31878 /* Low precision estimates guarantee 5 bits of accuracy. High
31879 precision estimates guarantee 14 bits of accuracy. SFmode
31880 requires 23 bits of accuracy. DFmode requires 52 bits of
31881 accuracy. Each pass at least doubles the accuracy, leading
31882 to the following. */
31883 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
31884 if (mode == DFmode || mode == V2DFmode)
31885 passes++;
31887 enum insn_code code = optab_handler (smul_optab, mode);
31888 insn_gen_fn gen_mul = GEN_FCN (code);
31890 gcc_assert (code != CODE_FOR_nothing);
31892 one = rs6000_load_constant_and_splat (mode, dconst1);
31894 /* x0 = 1./d estimate */
31895 x0 = gen_reg_rtx (mode);
31896 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
31897 UNSPEC_FRES)));
31899 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
31900 if (passes > 1) {
31902 /* e0 = 1. - d * x0 */
31903 e0 = gen_reg_rtx (mode);
31904 rs6000_emit_nmsub (e0, d, x0, one);
31906 /* x1 = x0 + e0 * x0 */
31907 x1 = gen_reg_rtx (mode);
31908 rs6000_emit_madd (x1, e0, x0, x0);
31910 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
31911 ++i, xprev = xnext, eprev = enext) {
31913 /* enext = eprev * eprev */
31914 enext = gen_reg_rtx (mode);
31915 emit_insn (gen_mul (enext, eprev, eprev));
31917 /* xnext = xprev + enext * xprev */
31918 xnext = gen_reg_rtx (mode);
31919 rs6000_emit_madd (xnext, enext, xprev, xprev);
31922 } else
31923 xprev = x0;
31925 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
31927 /* u = n * xprev */
31928 u = gen_reg_rtx (mode);
31929 emit_insn (gen_mul (u, n, xprev));
31931 /* v = n - (d * u) */
31932 v = gen_reg_rtx (mode);
31933 rs6000_emit_nmsub (v, d, u, n);
31935 /* dst = (v * xprev) + u */
31936 rs6000_emit_madd (dst, v, xprev, u);
31938 if (note_p)
31939 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
31942 /* Newton-Raphson approximation of single/double-precision floating point
31943 rsqrt. Assumes no trapping math and finite arguments. */
31945 void
31946 rs6000_emit_swrsqrt (rtx dst, rtx src)
31948 machine_mode mode = GET_MODE (src);
31949 rtx x0 = gen_reg_rtx (mode);
31950 rtx y = gen_reg_rtx (mode);
31952 /* Low precision estimates guarantee 5 bits of accuracy. High
31953 precision estimates guarantee 14 bits of accuracy. SFmode
31954 requires 23 bits of accuracy. DFmode requires 52 bits of
31955 accuracy. Each pass at least doubles the accuracy, leading
31956 to the following. */
31957 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
31958 if (mode == DFmode || mode == V2DFmode)
31959 passes++;
31961 REAL_VALUE_TYPE dconst3_2;
31962 int i;
31963 rtx halfthree;
31964 enum insn_code code = optab_handler (smul_optab, mode);
31965 insn_gen_fn gen_mul = GEN_FCN (code);
31967 gcc_assert (code != CODE_FOR_nothing);
31969 /* Load up the constant 1.5 either as a scalar, or as a vector. */
31970 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
31971 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
31973 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
31975 /* x0 = rsqrt estimate */
31976 emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
31977 UNSPEC_RSQRT)));
31979 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
31980 rs6000_emit_msub (y, src, halfthree, src);
31982 for (i = 0; i < passes; i++)
31984 rtx x1 = gen_reg_rtx (mode);
31985 rtx u = gen_reg_rtx (mode);
31986 rtx v = gen_reg_rtx (mode);
31988 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
31989 emit_insn (gen_mul (u, x0, x0));
31990 rs6000_emit_nmsub (v, y, u, halfthree);
31991 emit_insn (gen_mul (x1, x0, v));
31992 x0 = x1;
31995 emit_move_insn (dst, x0);
31996 return;
31999 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
32000 (Power7) targets. DST is the target, and SRC is the argument operand. */
32002 void
32003 rs6000_emit_popcount (rtx dst, rtx src)
32005 machine_mode mode = GET_MODE (dst);
32006 rtx tmp1, tmp2;
32008 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
32009 if (TARGET_POPCNTD)
32011 if (mode == SImode)
32012 emit_insn (gen_popcntdsi2 (dst, src));
32013 else
32014 emit_insn (gen_popcntddi2 (dst, src));
32015 return;
32018 tmp1 = gen_reg_rtx (mode);
32020 if (mode == SImode)
32022 emit_insn (gen_popcntbsi2 (tmp1, src));
32023 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
32024 NULL_RTX, 0);
32025 tmp2 = force_reg (SImode, tmp2);
32026 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
32028 else
32030 emit_insn (gen_popcntbdi2 (tmp1, src));
32031 tmp2 = expand_mult (DImode, tmp1,
32032 GEN_INT ((HOST_WIDE_INT)
32033 0x01010101 << 32 | 0x01010101),
32034 NULL_RTX, 0);
32035 tmp2 = force_reg (DImode, tmp2);
32036 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
32041 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
32042 target, and SRC is the argument operand. */
32044 void
32045 rs6000_emit_parity (rtx dst, rtx src)
32047 machine_mode mode = GET_MODE (dst);
32048 rtx tmp;
32050 tmp = gen_reg_rtx (mode);
32052 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
32053 if (TARGET_CMPB)
32055 if (mode == SImode)
32057 emit_insn (gen_popcntbsi2 (tmp, src));
32058 emit_insn (gen_paritysi2_cmpb (dst, tmp));
32060 else
32062 emit_insn (gen_popcntbdi2 (tmp, src));
32063 emit_insn (gen_paritydi2_cmpb (dst, tmp));
32065 return;
32068 if (mode == SImode)
32070 /* Is mult+shift >= shift+xor+shift+xor? */
32071 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
32073 rtx tmp1, tmp2, tmp3, tmp4;
32075 tmp1 = gen_reg_rtx (SImode);
32076 emit_insn (gen_popcntbsi2 (tmp1, src));
32078 tmp2 = gen_reg_rtx (SImode);
32079 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
32080 tmp3 = gen_reg_rtx (SImode);
32081 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
32083 tmp4 = gen_reg_rtx (SImode);
32084 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
32085 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
32087 else
32088 rs6000_emit_popcount (tmp, src);
32089 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
32091 else
32093 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
32094 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
32096 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
32098 tmp1 = gen_reg_rtx (DImode);
32099 emit_insn (gen_popcntbdi2 (tmp1, src));
32101 tmp2 = gen_reg_rtx (DImode);
32102 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
32103 tmp3 = gen_reg_rtx (DImode);
32104 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
32106 tmp4 = gen_reg_rtx (DImode);
32107 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
32108 tmp5 = gen_reg_rtx (DImode);
32109 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
32111 tmp6 = gen_reg_rtx (DImode);
32112 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
32113 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
32115 else
32116 rs6000_emit_popcount (tmp, src);
32117 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
32121 /* Expand an Altivec constant permutation for little endian mode.
32122 There are two issues: First, the two input operands must be
32123 swapped so that together they form a double-wide array in LE
32124 order. Second, the vperm instruction has surprising behavior
32125 in LE mode: it interprets the elements of the source vectors
32126 in BE mode ("left to right") and interprets the elements of
32127 the destination vector in LE mode ("right to left"). To
32128 correct for this, we must subtract each element of the permute
32129 control vector from 31.
32131 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
32132 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
32133 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
32134 serve as the permute control vector. Then, in BE mode,
32136 vperm 9,10,11,12
32138 places the desired result in vr9. However, in LE mode the
32139 vector contents will be
32141 vr10 = 00000003 00000002 00000001 00000000
32142 vr11 = 00000007 00000006 00000005 00000004
32144 The result of the vperm using the same permute control vector is
32146 vr9 = 05000000 07000000 01000000 03000000
32148 That is, the leftmost 4 bytes of vr10 are interpreted as the
32149 source for the rightmost 4 bytes of vr9, and so on.
32151 If we change the permute control vector to
32153 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
32155 and issue
32157 vperm 9,11,10,12
32159 we get the desired
32161 vr9 = 00000006 00000004 00000002 00000000. */
32163 void
32164 altivec_expand_vec_perm_const_le (rtx operands[4])
32166 unsigned int i;
32167 rtx perm[16];
32168 rtx constv, unspec;
32169 rtx target = operands[0];
32170 rtx op0 = operands[1];
32171 rtx op1 = operands[2];
32172 rtx sel = operands[3];
32174 /* Unpack and adjust the constant selector. */
32175 for (i = 0; i < 16; ++i)
32177 rtx e = XVECEXP (sel, 0, i);
32178 unsigned int elt = 31 - (INTVAL (e) & 31);
32179 perm[i] = GEN_INT (elt);
32182 /* Expand to a permute, swapping the inputs and using the
32183 adjusted selector. */
32184 if (!REG_P (op0))
32185 op0 = force_reg (V16QImode, op0);
32186 if (!REG_P (op1))
32187 op1 = force_reg (V16QImode, op1);
32189 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
32190 constv = force_reg (V16QImode, constv);
32191 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
32192 UNSPEC_VPERM);
32193 if (!REG_P (target))
32195 rtx tmp = gen_reg_rtx (V16QImode);
32196 emit_move_insn (tmp, unspec);
32197 unspec = tmp;
32200 emit_move_insn (target, unspec);
32203 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
32204 permute control vector. But here it's not a constant, so we must
32205 generate a vector NAND or NOR to do the adjustment. */
32207 void
32208 altivec_expand_vec_perm_le (rtx operands[4])
32210 rtx notx, iorx, unspec;
32211 rtx target = operands[0];
32212 rtx op0 = operands[1];
32213 rtx op1 = operands[2];
32214 rtx sel = operands[3];
32215 rtx tmp = target;
32216 rtx norreg = gen_reg_rtx (V16QImode);
32217 machine_mode mode = GET_MODE (target);
32219 /* Get everything in regs so the pattern matches. */
32220 if (!REG_P (op0))
32221 op0 = force_reg (mode, op0);
32222 if (!REG_P (op1))
32223 op1 = force_reg (mode, op1);
32224 if (!REG_P (sel))
32225 sel = force_reg (V16QImode, sel);
32226 if (!REG_P (target))
32227 tmp = gen_reg_rtx (mode);
32229 /* Invert the selector with a VNAND if available, else a VNOR.
32230 The VNAND is preferred for future fusion opportunities. */
32231 notx = gen_rtx_NOT (V16QImode, sel);
32232 iorx = (TARGET_P8_VECTOR
32233 ? gen_rtx_IOR (V16QImode, notx, notx)
32234 : gen_rtx_AND (V16QImode, notx, notx));
32235 emit_insn (gen_rtx_SET (norreg, iorx));
32237 /* Permute with operands reversed and adjusted selector. */
32238 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
32239 UNSPEC_VPERM);
32241 /* Copy into target, possibly by way of a register. */
32242 if (!REG_P (target))
32244 emit_move_insn (tmp, unspec);
32245 unspec = tmp;
32248 emit_move_insn (target, unspec);
32251 /* Expand an Altivec constant permutation. Return true if we match
32252 an efficient implementation; false to fall back to VPERM. */
32254 bool
32255 altivec_expand_vec_perm_const (rtx operands[4])
32257 struct altivec_perm_insn {
32258 HOST_WIDE_INT mask;
32259 enum insn_code impl;
32260 unsigned char perm[16];
32262 static const struct altivec_perm_insn patterns[] = {
32263 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
32264 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
32265 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
32266 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
32267 { OPTION_MASK_ALTIVEC,
32268 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
32269 : CODE_FOR_altivec_vmrglb_direct),
32270 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
32271 { OPTION_MASK_ALTIVEC,
32272 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
32273 : CODE_FOR_altivec_vmrglh_direct),
32274 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
32275 { OPTION_MASK_ALTIVEC,
32276 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
32277 : CODE_FOR_altivec_vmrglw_direct),
32278 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
32279 { OPTION_MASK_ALTIVEC,
32280 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
32281 : CODE_FOR_altivec_vmrghb_direct),
32282 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
32283 { OPTION_MASK_ALTIVEC,
32284 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
32285 : CODE_FOR_altivec_vmrghh_direct),
32286 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
32287 { OPTION_MASK_ALTIVEC,
32288 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
32289 : CODE_FOR_altivec_vmrghw_direct),
32290 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
32291 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
32292 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
32293 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
32294 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
32297 unsigned int i, j, elt, which;
32298 unsigned char perm[16];
32299 rtx target, op0, op1, sel, x;
32300 bool one_vec;
32302 target = operands[0];
32303 op0 = operands[1];
32304 op1 = operands[2];
32305 sel = operands[3];
32307 /* Unpack the constant selector. */
32308 for (i = which = 0; i < 16; ++i)
32310 rtx e = XVECEXP (sel, 0, i);
32311 elt = INTVAL (e) & 31;
32312 which |= (elt < 16 ? 1 : 2);
32313 perm[i] = elt;
32316 /* Simplify the constant selector based on operands. */
32317 switch (which)
32319 default:
32320 gcc_unreachable ();
32322 case 3:
32323 one_vec = false;
32324 if (!rtx_equal_p (op0, op1))
32325 break;
32326 /* FALLTHRU */
32328 case 2:
32329 for (i = 0; i < 16; ++i)
32330 perm[i] &= 15;
32331 op0 = op1;
32332 one_vec = true;
32333 break;
32335 case 1:
32336 op1 = op0;
32337 one_vec = true;
32338 break;
32341 /* Look for splat patterns. */
32342 if (one_vec)
32344 elt = perm[0];
32346 for (i = 0; i < 16; ++i)
32347 if (perm[i] != elt)
32348 break;
32349 if (i == 16)
32351 if (!BYTES_BIG_ENDIAN)
32352 elt = 15 - elt;
32353 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
32354 return true;
32357 if (elt % 2 == 0)
32359 for (i = 0; i < 16; i += 2)
32360 if (perm[i] != elt || perm[i + 1] != elt + 1)
32361 break;
32362 if (i == 16)
32364 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
32365 x = gen_reg_rtx (V8HImode);
32366 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
32367 GEN_INT (field)));
32368 emit_move_insn (target, gen_lowpart (V16QImode, x));
32369 return true;
32373 if (elt % 4 == 0)
32375 for (i = 0; i < 16; i += 4)
32376 if (perm[i] != elt
32377 || perm[i + 1] != elt + 1
32378 || perm[i + 2] != elt + 2
32379 || perm[i + 3] != elt + 3)
32380 break;
32381 if (i == 16)
32383 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
32384 x = gen_reg_rtx (V4SImode);
32385 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
32386 GEN_INT (field)));
32387 emit_move_insn (target, gen_lowpart (V16QImode, x));
32388 return true;
32393 /* Look for merge and pack patterns. */
32394 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
32396 bool swapped;
32398 if ((patterns[j].mask & rs6000_isa_flags) == 0)
32399 continue;
32401 elt = patterns[j].perm[0];
32402 if (perm[0] == elt)
32403 swapped = false;
32404 else if (perm[0] == elt + 16)
32405 swapped = true;
32406 else
32407 continue;
32408 for (i = 1; i < 16; ++i)
32410 elt = patterns[j].perm[i];
32411 if (swapped)
32412 elt = (elt >= 16 ? elt - 16 : elt + 16);
32413 else if (one_vec && elt >= 16)
32414 elt -= 16;
32415 if (perm[i] != elt)
32416 break;
32418 if (i == 16)
32420 enum insn_code icode = patterns[j].impl;
32421 machine_mode omode = insn_data[icode].operand[0].mode;
32422 machine_mode imode = insn_data[icode].operand[1].mode;
32424 /* For little-endian, don't use vpkuwum and vpkuhum if the
32425 underlying vector type is not V4SI and V8HI, respectively.
32426 For example, using vpkuwum with a V8HI picks up the even
32427 halfwords (BE numbering) when the even halfwords (LE
32428 numbering) are what we need. */
32429 if (!BYTES_BIG_ENDIAN
32430 && icode == CODE_FOR_altivec_vpkuwum_direct
32431 && ((GET_CODE (op0) == REG
32432 && GET_MODE (op0) != V4SImode)
32433 || (GET_CODE (op0) == SUBREG
32434 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
32435 continue;
32436 if (!BYTES_BIG_ENDIAN
32437 && icode == CODE_FOR_altivec_vpkuhum_direct
32438 && ((GET_CODE (op0) == REG
32439 && GET_MODE (op0) != V8HImode)
32440 || (GET_CODE (op0) == SUBREG
32441 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
32442 continue;
32444 /* For little-endian, the two input operands must be swapped
32445 (or swapped back) to ensure proper right-to-left numbering
32446 from 0 to 2N-1. */
32447 if (swapped ^ !BYTES_BIG_ENDIAN)
32448 std::swap (op0, op1);
32449 if (imode != V16QImode)
32451 op0 = gen_lowpart (imode, op0);
32452 op1 = gen_lowpart (imode, op1);
32454 if (omode == V16QImode)
32455 x = target;
32456 else
32457 x = gen_reg_rtx (omode);
32458 emit_insn (GEN_FCN (icode) (x, op0, op1));
32459 if (omode != V16QImode)
32460 emit_move_insn (target, gen_lowpart (V16QImode, x));
32461 return true;
32465 if (!BYTES_BIG_ENDIAN)
32467 altivec_expand_vec_perm_const_le (operands);
32468 return true;
32471 return false;
32474 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
32475 Return true if we match an efficient implementation. */
32477 static bool
32478 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
32479 unsigned char perm0, unsigned char perm1)
32481 rtx x;
32483 /* If both selectors come from the same operand, fold to single op. */
32484 if ((perm0 & 2) == (perm1 & 2))
32486 if (perm0 & 2)
32487 op0 = op1;
32488 else
32489 op1 = op0;
32491 /* If both operands are equal, fold to simpler permutation. */
32492 if (rtx_equal_p (op0, op1))
32494 perm0 = perm0 & 1;
32495 perm1 = (perm1 & 1) + 2;
32497 /* If the first selector comes from the second operand, swap. */
32498 else if (perm0 & 2)
32500 if (perm1 & 2)
32501 return false;
32502 perm0 -= 2;
32503 perm1 += 2;
32504 std::swap (op0, op1);
32506 /* If the second selector does not come from the second operand, fail. */
32507 else if ((perm1 & 2) == 0)
32508 return false;
32510 /* Success! */
32511 if (target != NULL)
32513 machine_mode vmode, dmode;
32514 rtvec v;
32516 vmode = GET_MODE (target);
32517 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
32518 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
32519 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
32520 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
32521 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
32522 emit_insn (gen_rtx_SET (target, x));
32524 return true;
32527 bool
32528 rs6000_expand_vec_perm_const (rtx operands[4])
32530 rtx target, op0, op1, sel;
32531 unsigned char perm0, perm1;
32533 target = operands[0];
32534 op0 = operands[1];
32535 op1 = operands[2];
32536 sel = operands[3];
32538 /* Unpack the constant selector. */
32539 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
32540 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
32542 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
32545 /* Test whether a constant permutation is supported. */
32547 static bool
32548 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
32549 const unsigned char *sel)
32551 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
32552 if (TARGET_ALTIVEC)
32553 return true;
32555 /* Check for ps_merge* or evmerge* insns. */
32556 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
32557 || (TARGET_SPE && vmode == V2SImode))
32559 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
32560 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
32561 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
32564 return false;
32567 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
32569 static void
32570 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
32571 machine_mode vmode, unsigned nelt, rtx perm[])
32573 machine_mode imode;
32574 rtx x;
32576 imode = vmode;
32577 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
32579 imode = mode_for_size (GET_MODE_UNIT_BITSIZE (vmode), MODE_INT, 0);
32580 imode = mode_for_vector (imode, nelt);
32583 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
32584 x = expand_vec_perm (vmode, op0, op1, x, target);
32585 if (x != target)
32586 emit_move_insn (target, x);
32589 /* Expand an extract even operation. */
32591 void
32592 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
32594 machine_mode vmode = GET_MODE (target);
32595 unsigned i, nelt = GET_MODE_NUNITS (vmode);
32596 rtx perm[16];
32598 for (i = 0; i < nelt; i++)
32599 perm[i] = GEN_INT (i * 2);
32601 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
32604 /* Expand a vector interleave operation. */
32606 void
32607 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
32609 machine_mode vmode = GET_MODE (target);
32610 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
32611 rtx perm[16];
32613 high = (highp ? 0 : nelt / 2);
32614 for (i = 0; i < nelt / 2; i++)
32616 perm[i * 2] = GEN_INT (i + high);
32617 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
32620 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
32623 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
32624 void
32625 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
32627 HOST_WIDE_INT hwi_scale (scale);
32628 REAL_VALUE_TYPE r_pow;
32629 rtvec v = rtvec_alloc (2);
32630 rtx elt;
32631 rtx scale_vec = gen_reg_rtx (V2DFmode);
32632 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
32633 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
32634 RTVEC_ELT (v, 0) = elt;
32635 RTVEC_ELT (v, 1) = elt;
32636 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
32637 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
32640 /* Return an RTX representing where to find the function value of a
32641 function returning MODE. */
32642 static rtx
32643 rs6000_complex_function_value (machine_mode mode)
32645 unsigned int regno;
32646 rtx r1, r2;
32647 machine_mode inner = GET_MODE_INNER (mode);
32648 unsigned int inner_bytes = GET_MODE_UNIT_SIZE (mode);
32650 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
32651 regno = FP_ARG_RETURN;
32652 else
32654 regno = GP_ARG_RETURN;
32656 /* 32-bit is OK since it'll go in r3/r4. */
32657 if (TARGET_32BIT && inner_bytes >= 4)
32658 return gen_rtx_REG (mode, regno);
32661 if (inner_bytes >= 8)
32662 return gen_rtx_REG (mode, regno);
32664 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
32665 const0_rtx);
32666 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
32667 GEN_INT (inner_bytes));
32668 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
32671 /* Return an rtx describing a return value of MODE as a PARALLEL
32672 in N_ELTS registers, each of mode ELT_MODE, starting at REGNO,
32673 stride REG_STRIDE. */
32675 static rtx
32676 rs6000_parallel_return (machine_mode mode,
32677 int n_elts, machine_mode elt_mode,
32678 unsigned int regno, unsigned int reg_stride)
32680 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
32682 int i;
32683 for (i = 0; i < n_elts; i++)
32685 rtx r = gen_rtx_REG (elt_mode, regno);
32686 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
32687 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
32688 regno += reg_stride;
32691 return par;
32694 /* Target hook for TARGET_FUNCTION_VALUE.
32696 On the SPE, both FPs and vectors are returned in r3.
32698 On RS/6000 an integer value is in r3 and a floating-point value is in
32699 fp1, unless -msoft-float. */
32701 static rtx
32702 rs6000_function_value (const_tree valtype,
32703 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
32704 bool outgoing ATTRIBUTE_UNUSED)
32706 machine_mode mode;
32707 unsigned int regno;
32708 machine_mode elt_mode;
32709 int n_elts;
32711 /* Special handling for structs in darwin64. */
32712 if (TARGET_MACHO
32713 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
32715 CUMULATIVE_ARGS valcum;
32716 rtx valret;
32718 valcum.words = 0;
32719 valcum.fregno = FP_ARG_MIN_REG;
32720 valcum.vregno = ALTIVEC_ARG_MIN_REG;
32721 /* Do a trial code generation as if this were going to be passed as
32722 an argument; if any part goes in memory, we return NULL. */
32723 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
32724 if (valret)
32725 return valret;
32726 /* Otherwise fall through to standard ABI rules. */
32729 mode = TYPE_MODE (valtype);
32731 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
32732 if (rs6000_discover_homogeneous_aggregate (mode, valtype, &elt_mode, &n_elts))
32734 int first_reg, n_regs;
32736 if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (elt_mode))
32738 /* _Decimal128 must use even/odd register pairs. */
32739 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
32740 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
32742 else
32744 first_reg = ALTIVEC_ARG_RETURN;
32745 n_regs = 1;
32748 return rs6000_parallel_return (mode, n_elts, elt_mode, first_reg, n_regs);
32751 /* Some return value types need be split in -mpowerpc64, 32bit ABI. */
32752 if (TARGET_32BIT && TARGET_POWERPC64)
32753 switch (mode)
32755 default:
32756 break;
32757 case DImode:
32758 case SCmode:
32759 case DCmode:
32760 case TCmode:
32761 int count = GET_MODE_SIZE (mode) / 4;
32762 return rs6000_parallel_return (mode, count, SImode, GP_ARG_RETURN, 1);
32765 if ((INTEGRAL_TYPE_P (valtype)
32766 && GET_MODE_BITSIZE (mode) < (TARGET_32BIT ? 32 : 64))
32767 || POINTER_TYPE_P (valtype))
32768 mode = TARGET_32BIT ? SImode : DImode;
32770 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
32771 /* _Decimal128 must use an even/odd register pair. */
32772 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
32773 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS
32774 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
32775 regno = FP_ARG_RETURN;
32776 else if (TREE_CODE (valtype) == COMPLEX_TYPE
32777 && targetm.calls.split_complex_arg)
32778 return rs6000_complex_function_value (mode);
32779 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
32780 return register is used in both cases, and we won't see V2DImode/V2DFmode
32781 for pure altivec, combine the two cases. */
32782 else if ((TREE_CODE (valtype) == VECTOR_TYPE || FLOAT128_VECTOR_P (mode))
32783 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
32784 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
32785 regno = ALTIVEC_ARG_RETURN;
32786 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
32787 && (mode == DFmode || mode == DCmode
32788 || FLOAT128_IBM_P (mode) || mode == TCmode))
32789 return spe_build_register_parallel (mode, GP_ARG_RETURN);
32790 else
32791 regno = GP_ARG_RETURN;
32793 return gen_rtx_REG (mode, regno);
32796 /* Define how to find the value returned by a library function
32797 assuming the value has mode MODE. */
32799 rs6000_libcall_value (machine_mode mode)
32801 unsigned int regno;
32803 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
32804 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
32805 return rs6000_parallel_return (mode, 2, SImode, GP_ARG_RETURN, 1);
32807 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
32808 /* _Decimal128 must use an even/odd register pair. */
32809 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
32810 else if (SCALAR_FLOAT_MODE_NOT_VECTOR_P (mode)
32811 && TARGET_HARD_FLOAT && TARGET_FPRS
32812 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
32813 regno = FP_ARG_RETURN;
32814 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
32815 return register is used in both cases, and we won't see V2DImode/V2DFmode
32816 for pure altivec, combine the two cases. */
32817 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
32818 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
32819 regno = ALTIVEC_ARG_RETURN;
32820 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
32821 return rs6000_complex_function_value (mode);
32822 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
32823 && (mode == DFmode || mode == DCmode
32824 || FLOAT128_IBM_P (mode) || mode == TCmode))
32825 return spe_build_register_parallel (mode, GP_ARG_RETURN);
32826 else
32827 regno = GP_ARG_RETURN;
32829 return gen_rtx_REG (mode, regno);
32833 /* Return true if we use LRA instead of reload pass. */
32834 static bool
32835 rs6000_lra_p (void)
32837 return rs6000_lra_flag;
32840 /* Given FROM and TO register numbers, say whether this elimination is allowed.
32841 Frame pointer elimination is automatically handled.
32843 For the RS/6000, if frame pointer elimination is being done, we would like
32844 to convert ap into fp, not sp.
32846 We need r30 if -mminimal-toc was specified, and there are constant pool
32847 references. */
32849 static bool
32850 rs6000_can_eliminate (const int from, const int to)
32852 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
32853 ? ! frame_pointer_needed
32854 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
32855 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
32856 : true);
32859 /* Define the offset between two registers, FROM to be eliminated and its
32860 replacement TO, at the start of a routine. */
32861 HOST_WIDE_INT
32862 rs6000_initial_elimination_offset (int from, int to)
32864 rs6000_stack_t *info = rs6000_stack_info ();
32865 HOST_WIDE_INT offset;
32867 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
32868 offset = info->push_p ? 0 : -info->total_size;
32869 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
32871 offset = info->push_p ? 0 : -info->total_size;
32872 if (FRAME_GROWS_DOWNWARD)
32873 offset += info->fixed_size + info->vars_size + info->parm_size;
32875 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
32876 offset = FRAME_GROWS_DOWNWARD
32877 ? info->fixed_size + info->vars_size + info->parm_size
32878 : 0;
32879 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
32880 offset = info->total_size;
32881 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
32882 offset = info->push_p ? info->total_size : 0;
32883 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
32884 offset = 0;
32885 else
32886 gcc_unreachable ();
32888 return offset;
32891 static rtx
32892 rs6000_dwarf_register_span (rtx reg)
32894 rtx parts[8];
32895 int i, words;
32896 unsigned regno = REGNO (reg);
32897 machine_mode mode = GET_MODE (reg);
32899 if (TARGET_SPE
32900 && regno < 32
32901 && (SPE_VECTOR_MODE (GET_MODE (reg))
32902 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
32903 && mode != SFmode && mode != SDmode && mode != SCmode)))
32905 else
32906 return NULL_RTX;
32908 regno = REGNO (reg);
32910 /* The duality of the SPE register size wreaks all kinds of havoc.
32911 This is a way of distinguishing r0 in 32-bits from r0 in
32912 64-bits. */
32913 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
32914 gcc_assert (words <= 4);
32915 for (i = 0; i < words; i++, regno++)
32917 if (BYTES_BIG_ENDIAN)
32919 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
32920 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
32922 else
32924 parts[2 * i] = gen_rtx_REG (SImode, regno);
32925 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
32929 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
32932 /* Fill in sizes for SPE register high parts in table used by unwinder. */
32934 static void
32935 rs6000_init_dwarf_reg_sizes_extra (tree address)
32937 if (TARGET_SPE)
32939 int i;
32940 machine_mode mode = TYPE_MODE (char_type_node);
32941 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
32942 rtx mem = gen_rtx_MEM (BLKmode, addr);
32943 rtx value = gen_int_mode (4, mode);
32945 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
32947 int column = DWARF_REG_TO_UNWIND_COLUMN
32948 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
32949 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
32951 emit_move_insn (adjust_address (mem, mode, offset), value);
32955 if (TARGET_MACHO && ! TARGET_ALTIVEC)
32957 int i;
32958 machine_mode mode = TYPE_MODE (char_type_node);
32959 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
32960 rtx mem = gen_rtx_MEM (BLKmode, addr);
32961 rtx value = gen_int_mode (16, mode);
32963 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
32964 The unwinder still needs to know the size of Altivec registers. */
32966 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
32968 int column = DWARF_REG_TO_UNWIND_COLUMN
32969 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
32970 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
32972 emit_move_insn (adjust_address (mem, mode, offset), value);
32977 /* Map internal gcc register numbers to debug format register numbers.
32978 FORMAT specifies the type of debug register number to use:
32979 0 -- debug information, except for frame-related sections
32980 1 -- DWARF .debug_frame section
32981 2 -- DWARF .eh_frame section */
32983 unsigned int
32984 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
32986 /* We never use the GCC internal number for SPE high registers.
32987 Those are mapped to the 1200..1231 range for all debug formats. */
32988 if (SPE_HIGH_REGNO_P (regno))
32989 return regno - FIRST_SPE_HIGH_REGNO + 1200;
32991 /* Except for the above, we use the internal number for non-DWARF
32992 debug information, and also for .eh_frame. */
32993 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
32994 return regno;
32996 /* On some platforms, we use the standard DWARF register
32997 numbering for .debug_info and .debug_frame. */
32998 #ifdef RS6000_USE_DWARF_NUMBERING
32999 if (regno <= 63)
33000 return regno;
33001 if (regno == LR_REGNO)
33002 return 108;
33003 if (regno == CTR_REGNO)
33004 return 109;
33005 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
33006 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
33007 The actual code emitted saves the whole of CR, so we map CR2_REGNO
33008 to the DWARF reg for CR. */
33009 if (format == 1 && regno == CR2_REGNO)
33010 return 64;
33011 if (CR_REGNO_P (regno))
33012 return regno - CR0_REGNO + 86;
33013 if (regno == CA_REGNO)
33014 return 101; /* XER */
33015 if (ALTIVEC_REGNO_P (regno))
33016 return regno - FIRST_ALTIVEC_REGNO + 1124;
33017 if (regno == VRSAVE_REGNO)
33018 return 356;
33019 if (regno == VSCR_REGNO)
33020 return 67;
33021 if (regno == SPE_ACC_REGNO)
33022 return 99;
33023 if (regno == SPEFSCR_REGNO)
33024 return 612;
33025 #endif
33026 return regno;
33029 /* target hook eh_return_filter_mode */
33030 static machine_mode
33031 rs6000_eh_return_filter_mode (void)
33033 return TARGET_32BIT ? SImode : word_mode;
33036 /* Target hook for scalar_mode_supported_p. */
33037 static bool
33038 rs6000_scalar_mode_supported_p (machine_mode mode)
33040 /* -m32 does not support TImode. This is the default, from
33041 default_scalar_mode_supported_p. For -m32 -mpowerpc64 we want the
33042 same ABI as for -m32. But default_scalar_mode_supported_p allows
33043 integer modes of precision 2 * BITS_PER_WORD, which matches TImode
33044 for -mpowerpc64. */
33045 if (TARGET_32BIT && mode == TImode)
33046 return false;
33048 if (DECIMAL_FLOAT_MODE_P (mode))
33049 return default_decimal_float_supported_p ();
33050 else if (mode == KFmode)
33051 return TARGET_FLOAT128;
33052 else
33053 return default_scalar_mode_supported_p (mode);
33056 /* Target hook for vector_mode_supported_p. */
33057 static bool
33058 rs6000_vector_mode_supported_p (machine_mode mode)
33061 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
33062 return true;
33064 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
33065 return true;
33067 /* There is no vector form for IEEE 128-bit. If we return true for IEEE
33068 128-bit, the compiler might try to widen IEEE 128-bit to IBM
33069 double-double. */
33070 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode) && !FLOAT128_IEEE_P (mode))
33071 return true;
33073 else
33074 return false;
33077 /* Target hook for invalid_arg_for_unprototyped_fn. */
33078 static const char *
33079 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
33081 return (!rs6000_darwin64_abi
33082 && typelist == 0
33083 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
33084 && (funcdecl == NULL_TREE
33085 || (TREE_CODE (funcdecl) == FUNCTION_DECL
33086 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
33087 ? N_("AltiVec argument passed to unprototyped function")
33088 : NULL;
33091 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
33092 setup by using __stack_chk_fail_local hidden function instead of
33093 calling __stack_chk_fail directly. Otherwise it is better to call
33094 __stack_chk_fail directly. */
33096 static tree ATTRIBUTE_UNUSED
33097 rs6000_stack_protect_fail (void)
33099 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
33100 ? default_hidden_stack_protect_fail ()
33101 : default_external_stack_protect_fail ();
33104 void
33105 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
33106 int num_operands ATTRIBUTE_UNUSED)
33108 if (rs6000_warn_cell_microcode)
33110 const char *temp;
33111 int insn_code_number = recog_memoized (insn);
33112 location_t location = INSN_LOCATION (insn);
33114 /* Punt on insns we cannot recognize. */
33115 if (insn_code_number < 0)
33116 return;
33118 temp = get_insn_template (insn_code_number, insn);
33120 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
33121 warning_at (location, OPT_mwarn_cell_microcode,
33122 "emitting microcode insn %s\t[%s] #%d",
33123 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
33124 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
33125 warning_at (location, OPT_mwarn_cell_microcode,
33126 "emitting conditional microcode insn %s\t[%s] #%d",
33127 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
33131 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
33133 #if TARGET_ELF
33134 static unsigned HOST_WIDE_INT
33135 rs6000_asan_shadow_offset (void)
33137 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
33139 #endif
33141 /* Mask options that we want to support inside of attribute((target)) and
33142 #pragma GCC target operations. Note, we do not include things like
33143 64/32-bit, endianess, hard/soft floating point, etc. that would have
33144 different calling sequences. */
33146 struct rs6000_opt_mask {
33147 const char *name; /* option name */
33148 HOST_WIDE_INT mask; /* mask to set */
33149 bool invert; /* invert sense of mask */
33150 bool valid_target; /* option is a target option */
33153 static struct rs6000_opt_mask const rs6000_opt_masks[] =
33155 { "altivec", OPTION_MASK_ALTIVEC, false, true },
33156 { "cmpb", OPTION_MASK_CMPB, false, true },
33157 { "crypto", OPTION_MASK_CRYPTO, false, true },
33158 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
33159 { "dlmzb", OPTION_MASK_DLMZB, false, true },
33160 { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
33161 false, true },
33162 { "fprnd", OPTION_MASK_FPRND, false, true },
33163 { "hard-dfp", OPTION_MASK_DFP, false, true },
33164 { "htm", OPTION_MASK_HTM, false, true },
33165 { "isel", OPTION_MASK_ISEL, false, true },
33166 { "mfcrf", OPTION_MASK_MFCRF, false, true },
33167 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
33168 { "mulhw", OPTION_MASK_MULHW, false, true },
33169 { "multiple", OPTION_MASK_MULTIPLE, false, true },
33170 { "popcntb", OPTION_MASK_POPCNTB, false, true },
33171 { "popcntd", OPTION_MASK_POPCNTD, false, true },
33172 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
33173 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
33174 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
33175 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
33176 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
33177 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
33178 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
33179 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
33180 { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true },
33181 { "string", OPTION_MASK_STRING, false, true },
33182 { "update", OPTION_MASK_NO_UPDATE, true , true },
33183 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true },
33184 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true },
33185 { "vsx", OPTION_MASK_VSX, false, true },
33186 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
33187 #ifdef OPTION_MASK_64BIT
33188 #if TARGET_AIX_OS
33189 { "aix64", OPTION_MASK_64BIT, false, false },
33190 { "aix32", OPTION_MASK_64BIT, true, false },
33191 #else
33192 { "64", OPTION_MASK_64BIT, false, false },
33193 { "32", OPTION_MASK_64BIT, true, false },
33194 #endif
33195 #endif
33196 #ifdef OPTION_MASK_EABI
33197 { "eabi", OPTION_MASK_EABI, false, false },
33198 #endif
33199 #ifdef OPTION_MASK_LITTLE_ENDIAN
33200 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
33201 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
33202 #endif
33203 #ifdef OPTION_MASK_RELOCATABLE
33204 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
33205 #endif
33206 #ifdef OPTION_MASK_STRICT_ALIGN
33207 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
33208 #endif
33209 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
33210 { "string", OPTION_MASK_STRING, false, false },
33213 /* Builtin mask mapping for printing the flags. */
33214 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
33216 { "altivec", RS6000_BTM_ALTIVEC, false, false },
33217 { "vsx", RS6000_BTM_VSX, false, false },
33218 { "spe", RS6000_BTM_SPE, false, false },
33219 { "paired", RS6000_BTM_PAIRED, false, false },
33220 { "fre", RS6000_BTM_FRE, false, false },
33221 { "fres", RS6000_BTM_FRES, false, false },
33222 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
33223 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
33224 { "popcntd", RS6000_BTM_POPCNTD, false, false },
33225 { "cell", RS6000_BTM_CELL, false, false },
33226 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
33227 { "crypto", RS6000_BTM_CRYPTO, false, false },
33228 { "htm", RS6000_BTM_HTM, false, false },
33229 { "hard-dfp", RS6000_BTM_DFP, false, false },
33230 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
33231 { "long-double-128", RS6000_BTM_LDBL128, false, false },
33234 /* Option variables that we want to support inside attribute((target)) and
33235 #pragma GCC target operations. */
33237 struct rs6000_opt_var {
33238 const char *name; /* option name */
33239 size_t global_offset; /* offset of the option in global_options. */
33240 size_t target_offset; /* offset of the option in target optiosn. */
33243 static struct rs6000_opt_var const rs6000_opt_vars[] =
33245 { "friz",
33246 offsetof (struct gcc_options, x_TARGET_FRIZ),
33247 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
33248 { "avoid-indexed-addresses",
33249 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
33250 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
33251 { "paired",
33252 offsetof (struct gcc_options, x_rs6000_paired_float),
33253 offsetof (struct cl_target_option, x_rs6000_paired_float), },
33254 { "longcall",
33255 offsetof (struct gcc_options, x_rs6000_default_long_calls),
33256 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
33257 { "optimize-swaps",
33258 offsetof (struct gcc_options, x_rs6000_optimize_swaps),
33259 offsetof (struct cl_target_option, x_rs6000_optimize_swaps), },
33260 { "allow-movmisalign",
33261 offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN),
33262 offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), },
33263 { "allow-df-permute",
33264 offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE),
33265 offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), },
33266 { "sched-groups",
33267 offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS),
33268 offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), },
33269 { "always-hint",
33270 offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT),
33271 offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), },
33272 { "align-branch-targets",
33273 offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS),
33274 offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), },
33275 { "vectorize-builtins",
33276 offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS),
33277 offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), },
33278 { "tls-markers",
33279 offsetof (struct gcc_options, x_tls_markers),
33280 offsetof (struct cl_target_option, x_tls_markers), },
33281 { "sched-prolog",
33282 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
33283 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
33284 { "sched-epilog",
33285 offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG),
33286 offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), },
33287 { "gen-cell-microcode",
33288 offsetof (struct gcc_options, x_rs6000_gen_cell_microcode),
33289 offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), },
33290 { "warn-cell-microcode",
33291 offsetof (struct gcc_options, x_rs6000_warn_cell_microcode),
33292 offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), },
33295 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
33296 parsing. Return true if there were no errors. */
33298 static bool
33299 rs6000_inner_target_options (tree args, bool attr_p)
33301 bool ret = true;
33303 if (args == NULL_TREE)
33306 else if (TREE_CODE (args) == STRING_CST)
33308 char *p = ASTRDUP (TREE_STRING_POINTER (args));
33309 char *q;
33311 while ((q = strtok (p, ",")) != NULL)
33313 bool error_p = false;
33314 bool not_valid_p = false;
33315 const char *cpu_opt = NULL;
33317 p = NULL;
33318 if (strncmp (q, "cpu=", 4) == 0)
33320 int cpu_index = rs6000_cpu_name_lookup (q+4);
33321 if (cpu_index >= 0)
33322 rs6000_cpu_index = cpu_index;
33323 else
33325 error_p = true;
33326 cpu_opt = q+4;
33329 else if (strncmp (q, "tune=", 5) == 0)
33331 int tune_index = rs6000_cpu_name_lookup (q+5);
33332 if (tune_index >= 0)
33333 rs6000_tune_index = tune_index;
33334 else
33336 error_p = true;
33337 cpu_opt = q+5;
33340 else
33342 size_t i;
33343 bool invert = false;
33344 char *r = q;
33346 error_p = true;
33347 if (strncmp (r, "no-", 3) == 0)
33349 invert = true;
33350 r += 3;
33353 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
33354 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
33356 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
33358 if (!rs6000_opt_masks[i].valid_target)
33359 not_valid_p = true;
33360 else
33362 error_p = false;
33363 rs6000_isa_flags_explicit |= mask;
33365 /* VSX needs altivec, so -mvsx automagically sets
33366 altivec and disables -mavoid-indexed-addresses. */
33367 if (!invert)
33369 if (mask == OPTION_MASK_VSX)
33371 mask |= OPTION_MASK_ALTIVEC;
33372 TARGET_AVOID_XFORM = 0;
33376 if (rs6000_opt_masks[i].invert)
33377 invert = !invert;
33379 if (invert)
33380 rs6000_isa_flags &= ~mask;
33381 else
33382 rs6000_isa_flags |= mask;
33384 break;
33387 if (error_p && !not_valid_p)
33389 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
33390 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
33392 size_t j = rs6000_opt_vars[i].global_offset;
33393 *((int *) ((char *)&global_options + j)) = !invert;
33394 error_p = false;
33395 not_valid_p = false;
33396 break;
33401 if (error_p)
33403 const char *eprefix, *esuffix;
33405 ret = false;
33406 if (attr_p)
33408 eprefix = "__attribute__((__target__(";
33409 esuffix = ")))";
33411 else
33413 eprefix = "#pragma GCC target ";
33414 esuffix = "";
33417 if (cpu_opt)
33418 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
33419 q, esuffix);
33420 else if (not_valid_p)
33421 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
33422 else
33423 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
33428 else if (TREE_CODE (args) == TREE_LIST)
33432 tree value = TREE_VALUE (args);
33433 if (value)
33435 bool ret2 = rs6000_inner_target_options (value, attr_p);
33436 if (!ret2)
33437 ret = false;
33439 args = TREE_CHAIN (args);
33441 while (args != NULL_TREE);
33444 else
33445 gcc_unreachable ();
33447 return ret;
33450 /* Print out the target options as a list for -mdebug=target. */
33452 static void
33453 rs6000_debug_target_options (tree args, const char *prefix)
33455 if (args == NULL_TREE)
33456 fprintf (stderr, "%s<NULL>", prefix);
33458 else if (TREE_CODE (args) == STRING_CST)
33460 char *p = ASTRDUP (TREE_STRING_POINTER (args));
33461 char *q;
33463 while ((q = strtok (p, ",")) != NULL)
33465 p = NULL;
33466 fprintf (stderr, "%s\"%s\"", prefix, q);
33467 prefix = ", ";
33471 else if (TREE_CODE (args) == TREE_LIST)
33475 tree value = TREE_VALUE (args);
33476 if (value)
33478 rs6000_debug_target_options (value, prefix);
33479 prefix = ", ";
33481 args = TREE_CHAIN (args);
33483 while (args != NULL_TREE);
33486 else
33487 gcc_unreachable ();
33489 return;
33493 /* Hook to validate attribute((target("..."))). */
33495 static bool
33496 rs6000_valid_attribute_p (tree fndecl,
33497 tree ARG_UNUSED (name),
33498 tree args,
33499 int flags)
33501 struct cl_target_option cur_target;
33502 bool ret;
33503 tree old_optimize = build_optimization_node (&global_options);
33504 tree new_target, new_optimize;
33505 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33507 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
33509 if (TARGET_DEBUG_TARGET)
33511 tree tname = DECL_NAME (fndecl);
33512 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
33513 if (tname)
33514 fprintf (stderr, "function: %.*s\n",
33515 (int) IDENTIFIER_LENGTH (tname),
33516 IDENTIFIER_POINTER (tname));
33517 else
33518 fprintf (stderr, "function: unknown\n");
33520 fprintf (stderr, "args:");
33521 rs6000_debug_target_options (args, " ");
33522 fprintf (stderr, "\n");
33524 if (flags)
33525 fprintf (stderr, "flags: 0x%x\n", flags);
33527 fprintf (stderr, "--------------------\n");
33530 old_optimize = build_optimization_node (&global_options);
33531 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
33533 /* If the function changed the optimization levels as well as setting target
33534 options, start with the optimizations specified. */
33535 if (func_optimize && func_optimize != old_optimize)
33536 cl_optimization_restore (&global_options,
33537 TREE_OPTIMIZATION (func_optimize));
33539 /* The target attributes may also change some optimization flags, so update
33540 the optimization options if necessary. */
33541 cl_target_option_save (&cur_target, &global_options);
33542 rs6000_cpu_index = rs6000_tune_index = -1;
33543 ret = rs6000_inner_target_options (args, true);
33545 /* Set up any additional state. */
33546 if (ret)
33548 ret = rs6000_option_override_internal (false);
33549 new_target = build_target_option_node (&global_options);
33551 else
33552 new_target = NULL;
33554 new_optimize = build_optimization_node (&global_options);
33556 if (!new_target)
33557 ret = false;
33559 else if (fndecl)
33561 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
33563 if (old_optimize != new_optimize)
33564 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
33567 cl_target_option_restore (&global_options, &cur_target);
33569 if (old_optimize != new_optimize)
33570 cl_optimization_restore (&global_options,
33571 TREE_OPTIMIZATION (old_optimize));
33573 return ret;
33577 /* Hook to validate the current #pragma GCC target and set the state, and
33578 update the macros based on what was changed. If ARGS is NULL, then
33579 POP_TARGET is used to reset the options. */
33581 bool
33582 rs6000_pragma_target_parse (tree args, tree pop_target)
33584 tree prev_tree = build_target_option_node (&global_options);
33585 tree cur_tree;
33586 struct cl_target_option *prev_opt, *cur_opt;
33587 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
33588 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
33590 if (TARGET_DEBUG_TARGET)
33592 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
33593 fprintf (stderr, "args:");
33594 rs6000_debug_target_options (args, " ");
33595 fprintf (stderr, "\n");
33597 if (pop_target)
33599 fprintf (stderr, "pop_target:\n");
33600 debug_tree (pop_target);
33602 else
33603 fprintf (stderr, "pop_target: <NULL>\n");
33605 fprintf (stderr, "--------------------\n");
33608 if (! args)
33610 cur_tree = ((pop_target)
33611 ? pop_target
33612 : target_option_default_node);
33613 cl_target_option_restore (&global_options,
33614 TREE_TARGET_OPTION (cur_tree));
33616 else
33618 rs6000_cpu_index = rs6000_tune_index = -1;
33619 if (!rs6000_inner_target_options (args, false)
33620 || !rs6000_option_override_internal (false)
33621 || (cur_tree = build_target_option_node (&global_options))
33622 == NULL_TREE)
33624 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
33625 fprintf (stderr, "invalid pragma\n");
33627 return false;
33631 target_option_current_node = cur_tree;
33633 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
33634 change the macros that are defined. */
33635 if (rs6000_target_modify_macros_ptr)
33637 prev_opt = TREE_TARGET_OPTION (prev_tree);
33638 prev_bumask = prev_opt->x_rs6000_builtin_mask;
33639 prev_flags = prev_opt->x_rs6000_isa_flags;
33641 cur_opt = TREE_TARGET_OPTION (cur_tree);
33642 cur_flags = cur_opt->x_rs6000_isa_flags;
33643 cur_bumask = cur_opt->x_rs6000_builtin_mask;
33645 diff_bumask = (prev_bumask ^ cur_bumask);
33646 diff_flags = (prev_flags ^ cur_flags);
33648 if ((diff_flags != 0) || (diff_bumask != 0))
33650 /* Delete old macros. */
33651 rs6000_target_modify_macros_ptr (false,
33652 prev_flags & diff_flags,
33653 prev_bumask & diff_bumask);
33655 /* Define new macros. */
33656 rs6000_target_modify_macros_ptr (true,
33657 cur_flags & diff_flags,
33658 cur_bumask & diff_bumask);
33662 return true;
33666 /* Remember the last target of rs6000_set_current_function. */
33667 static GTY(()) tree rs6000_previous_fndecl;
33669 /* Establish appropriate back-end context for processing the function
33670 FNDECL. The argument might be NULL to indicate processing at top
33671 level, outside of any function scope. */
33672 static void
33673 rs6000_set_current_function (tree fndecl)
33675 tree old_tree = (rs6000_previous_fndecl
33676 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
33677 : NULL_TREE);
33679 tree new_tree = (fndecl
33680 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
33681 : NULL_TREE);
33683 if (TARGET_DEBUG_TARGET)
33685 bool print_final = false;
33686 fprintf (stderr, "\n==================== rs6000_set_current_function");
33688 if (fndecl)
33689 fprintf (stderr, ", fndecl %s (%p)",
33690 (DECL_NAME (fndecl)
33691 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
33692 : "<unknown>"), (void *)fndecl);
33694 if (rs6000_previous_fndecl)
33695 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
33697 fprintf (stderr, "\n");
33698 if (new_tree)
33700 fprintf (stderr, "\nnew fndecl target specific options:\n");
33701 debug_tree (new_tree);
33702 print_final = true;
33705 if (old_tree)
33707 fprintf (stderr, "\nold fndecl target specific options:\n");
33708 debug_tree (old_tree);
33709 print_final = true;
33712 if (print_final)
33713 fprintf (stderr, "--------------------\n");
33716 /* Only change the context if the function changes. This hook is called
33717 several times in the course of compiling a function, and we don't want to
33718 slow things down too much or call target_reinit when it isn't safe. */
33719 if (fndecl && fndecl != rs6000_previous_fndecl)
33721 rs6000_previous_fndecl = fndecl;
33722 if (old_tree == new_tree)
33725 else if (new_tree && new_tree != target_option_default_node)
33727 cl_target_option_restore (&global_options,
33728 TREE_TARGET_OPTION (new_tree));
33729 if (TREE_TARGET_GLOBALS (new_tree))
33730 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33731 else
33732 TREE_TARGET_GLOBALS (new_tree)
33733 = save_target_globals_default_opts ();
33736 else if (old_tree && old_tree != target_option_default_node)
33738 new_tree = target_option_current_node;
33739 cl_target_option_restore (&global_options,
33740 TREE_TARGET_OPTION (new_tree));
33741 if (TREE_TARGET_GLOBALS (new_tree))
33742 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
33743 else if (new_tree == target_option_default_node)
33744 restore_target_globals (&default_target_globals);
33745 else
33746 TREE_TARGET_GLOBALS (new_tree)
33747 = save_target_globals_default_opts ();
33753 /* Save the current options */
33755 static void
33756 rs6000_function_specific_save (struct cl_target_option *ptr,
33757 struct gcc_options *opts)
33759 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
33760 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
33763 /* Restore the current options */
33765 static void
33766 rs6000_function_specific_restore (struct gcc_options *opts,
33767 struct cl_target_option *ptr)
33770 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
33771 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
33772 (void) rs6000_option_override_internal (false);
33775 /* Print the current options */
33777 static void
33778 rs6000_function_specific_print (FILE *file, int indent,
33779 struct cl_target_option *ptr)
33781 rs6000_print_isa_options (file, indent, "Isa options set",
33782 ptr->x_rs6000_isa_flags);
33784 rs6000_print_isa_options (file, indent, "Isa options explicit",
33785 ptr->x_rs6000_isa_flags_explicit);
33788 /* Helper function to print the current isa or misc options on a line. */
33790 static void
33791 rs6000_print_options_internal (FILE *file,
33792 int indent,
33793 const char *string,
33794 HOST_WIDE_INT flags,
33795 const char *prefix,
33796 const struct rs6000_opt_mask *opts,
33797 size_t num_elements)
33799 size_t i;
33800 size_t start_column = 0;
33801 size_t cur_column;
33802 size_t max_column = 76;
33803 const char *comma = "";
33805 if (indent)
33806 start_column += fprintf (file, "%*s", indent, "");
33808 if (!flags)
33810 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
33811 return;
33814 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
33816 /* Print the various mask options. */
33817 cur_column = start_column;
33818 for (i = 0; i < num_elements; i++)
33820 if ((flags & opts[i].mask) != 0)
33822 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
33823 size_t len = (strlen (comma)
33824 + strlen (prefix)
33825 + strlen (no_str)
33826 + strlen (rs6000_opt_masks[i].name));
33828 cur_column += len;
33829 if (cur_column > max_column)
33831 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
33832 cur_column = start_column + len;
33833 comma = "";
33836 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
33837 rs6000_opt_masks[i].name);
33838 flags &= ~ opts[i].mask;
33839 comma = ", ";
33843 fputs ("\n", file);
33846 /* Helper function to print the current isa options on a line. */
33848 static void
33849 rs6000_print_isa_options (FILE *file, int indent, const char *string,
33850 HOST_WIDE_INT flags)
33852 rs6000_print_options_internal (file, indent, string, flags, "-m",
33853 &rs6000_opt_masks[0],
33854 ARRAY_SIZE (rs6000_opt_masks));
33857 static void
33858 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
33859 HOST_WIDE_INT flags)
33861 rs6000_print_options_internal (file, indent, string, flags, "",
33862 &rs6000_builtin_mask_names[0],
33863 ARRAY_SIZE (rs6000_builtin_mask_names));
33867 /* Hook to determine if one function can safely inline another. */
33869 static bool
33870 rs6000_can_inline_p (tree caller, tree callee)
33872 bool ret = false;
33873 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
33874 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
33876 /* If callee has no option attributes, then it is ok to inline. */
33877 if (!callee_tree)
33878 ret = true;
33880 /* If caller has no option attributes, but callee does then it is not ok to
33881 inline. */
33882 else if (!caller_tree)
33883 ret = false;
33885 else
33887 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
33888 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
33890 /* Callee's options should a subset of the caller's, i.e. a vsx function
33891 can inline an altivec function but a non-vsx function can't inline a
33892 vsx function. */
33893 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
33894 == callee_opts->x_rs6000_isa_flags)
33895 ret = true;
33898 if (TARGET_DEBUG_TARGET)
33899 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
33900 (DECL_NAME (caller)
33901 ? IDENTIFIER_POINTER (DECL_NAME (caller))
33902 : "<unknown>"),
33903 (DECL_NAME (callee)
33904 ? IDENTIFIER_POINTER (DECL_NAME (callee))
33905 : "<unknown>"),
33906 (ret ? "can" : "cannot"));
33908 return ret;
33911 /* Allocate a stack temp and fixup the address so it meets the particular
33912 memory requirements (either offetable or REG+REG addressing). */
33915 rs6000_allocate_stack_temp (machine_mode mode,
33916 bool offsettable_p,
33917 bool reg_reg_p)
33919 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
33920 rtx addr = XEXP (stack, 0);
33921 int strict_p = (reload_in_progress || reload_completed);
33923 if (!legitimate_indirect_address_p (addr, strict_p))
33925 if (offsettable_p
33926 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
33927 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
33929 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
33930 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
33933 return stack;
33936 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
33937 to such a form to deal with memory reference instructions like STFIWX that
33938 only take reg+reg addressing. */
33941 rs6000_address_for_fpconvert (rtx x)
33943 int strict_p = (reload_in_progress || reload_completed);
33944 rtx addr;
33946 gcc_assert (MEM_P (x));
33947 addr = XEXP (x, 0);
33948 if (! legitimate_indirect_address_p (addr, strict_p)
33949 && ! legitimate_indexed_address_p (addr, strict_p))
33951 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
33953 rtx reg = XEXP (addr, 0);
33954 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
33955 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
33956 gcc_assert (REG_P (reg));
33957 emit_insn (gen_add3_insn (reg, reg, size_rtx));
33958 addr = reg;
33960 else if (GET_CODE (addr) == PRE_MODIFY)
33962 rtx reg = XEXP (addr, 0);
33963 rtx expr = XEXP (addr, 1);
33964 gcc_assert (REG_P (reg));
33965 gcc_assert (GET_CODE (expr) == PLUS);
33966 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
33967 addr = reg;
33970 x = replace_equiv_address (x, copy_addr_to_reg (addr));
33973 return x;
33976 /* Given a memory reference, if it is not in the form for altivec memory
33977 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
33978 convert to the altivec format. */
33981 rs6000_address_for_altivec (rtx x)
33983 gcc_assert (MEM_P (x));
33984 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
33986 rtx addr = XEXP (x, 0);
33987 int strict_p = (reload_in_progress || reload_completed);
33989 if (!legitimate_indexed_address_p (addr, strict_p)
33990 && !legitimate_indirect_address_p (addr, strict_p))
33991 addr = copy_to_mode_reg (Pmode, addr);
33993 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
33994 x = change_address (x, GET_MODE (x), addr);
33997 return x;
34000 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
34002 On the RS/6000, all integer constants are acceptable, most won't be valid
34003 for particular insns, though. Only easy FP constants are acceptable. */
34005 static bool
34006 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
34008 if (TARGET_ELF && tls_referenced_p (x))
34009 return false;
34011 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
34012 || GET_MODE (x) == VOIDmode
34013 || (TARGET_POWERPC64 && mode == DImode)
34014 || easy_fp_constant (x, mode)
34015 || easy_vector_constant (x, mode));
34019 /* Return TRUE iff the sequence ending in LAST sets the static chain. */
34021 static bool
34022 chain_already_loaded (rtx_insn *last)
34024 for (; last != NULL; last = PREV_INSN (last))
34026 if (NONJUMP_INSN_P (last))
34028 rtx patt = PATTERN (last);
34030 if (GET_CODE (patt) == SET)
34032 rtx lhs = XEXP (patt, 0);
34034 if (REG_P (lhs) && REGNO (lhs) == STATIC_CHAIN_REGNUM)
34035 return true;
34039 return false;
34042 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
34044 void
34045 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
34047 const bool direct_call_p
34048 = GET_CODE (func_desc) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (func_desc);
34049 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
34050 rtx toc_load = NULL_RTX;
34051 rtx toc_restore = NULL_RTX;
34052 rtx func_addr;
34053 rtx abi_reg = NULL_RTX;
34054 rtx call[4];
34055 int n_call;
34056 rtx insn;
34058 /* Handle longcall attributes. */
34059 if (INTVAL (cookie) & CALL_LONG)
34060 func_desc = rs6000_longcall_ref (func_desc);
34062 /* Handle indirect calls. */
34063 if (GET_CODE (func_desc) != SYMBOL_REF
34064 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
34066 /* Save the TOC into its reserved slot before the call,
34067 and prepare to restore it after the call. */
34068 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
34069 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
34070 rtx stack_toc_mem = gen_frame_mem (Pmode,
34071 gen_rtx_PLUS (Pmode, stack_ptr,
34072 stack_toc_offset));
34073 rtx stack_toc_unspec = gen_rtx_UNSPEC (Pmode,
34074 gen_rtvec (1, stack_toc_offset),
34075 UNSPEC_TOCSLOT);
34076 toc_restore = gen_rtx_SET (toc_reg, stack_toc_unspec);
34078 /* Can we optimize saving the TOC in the prologue or
34079 do we need to do it at every call? */
34080 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
34081 cfun->machine->save_toc_in_prologue = true;
34082 else
34084 MEM_VOLATILE_P (stack_toc_mem) = 1;
34085 emit_move_insn (stack_toc_mem, toc_reg);
34088 if (DEFAULT_ABI == ABI_ELFv2)
34090 /* A function pointer in the ELFv2 ABI is just a plain address, but
34091 the ABI requires it to be loaded into r12 before the call. */
34092 func_addr = gen_rtx_REG (Pmode, 12);
34093 emit_move_insn (func_addr, func_desc);
34094 abi_reg = func_addr;
34096 else
34098 /* A function pointer under AIX is a pointer to a data area whose
34099 first word contains the actual address of the function, whose
34100 second word contains a pointer to its TOC, and whose third word
34101 contains a value to place in the static chain register (r11).
34102 Note that if we load the static chain, our "trampoline" need
34103 not have any executable code. */
34105 /* Load up address of the actual function. */
34106 func_desc = force_reg (Pmode, func_desc);
34107 func_addr = gen_reg_rtx (Pmode);
34108 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
34110 /* Prepare to load the TOC of the called function. Note that the
34111 TOC load must happen immediately before the actual call so
34112 that unwinding the TOC registers works correctly. See the
34113 comment in frob_update_context. */
34114 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
34115 rtx func_toc_mem = gen_rtx_MEM (Pmode,
34116 gen_rtx_PLUS (Pmode, func_desc,
34117 func_toc_offset));
34118 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
34120 /* If we have a static chain, load it up. But, if the call was
34121 originally direct, the 3rd word has not been written since no
34122 trampoline has been built, so we ought not to load it, lest we
34123 override a static chain value. */
34124 if (!direct_call_p
34125 && TARGET_POINTERS_TO_NESTED_FUNCTIONS
34126 && !chain_already_loaded (get_current_sequence ()->next->last))
34128 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
34129 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
34130 rtx func_sc_mem = gen_rtx_MEM (Pmode,
34131 gen_rtx_PLUS (Pmode, func_desc,
34132 func_sc_offset));
34133 emit_move_insn (sc_reg, func_sc_mem);
34134 abi_reg = sc_reg;
34138 else
34140 /* Direct calls use the TOC: for local calls, the callee will
34141 assume the TOC register is set; for non-local calls, the
34142 PLT stub needs the TOC register. */
34143 abi_reg = toc_reg;
34144 func_addr = func_desc;
34147 /* Create the call. */
34148 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
34149 if (value != NULL_RTX)
34150 call[0] = gen_rtx_SET (value, call[0]);
34151 n_call = 1;
34153 if (toc_load)
34154 call[n_call++] = toc_load;
34155 if (toc_restore)
34156 call[n_call++] = toc_restore;
34158 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
34160 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
34161 insn = emit_call_insn (insn);
34163 /* Mention all registers defined by the ABI to hold information
34164 as uses in CALL_INSN_FUNCTION_USAGE. */
34165 if (abi_reg)
34166 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
34169 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
34171 void
34172 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
34174 rtx call[2];
34175 rtx insn;
34177 gcc_assert (INTVAL (cookie) == 0);
34179 /* Create the call. */
34180 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
34181 if (value != NULL_RTX)
34182 call[0] = gen_rtx_SET (value, call[0]);
34184 call[1] = simple_return_rtx;
34186 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
34187 insn = emit_call_insn (insn);
34189 /* Note use of the TOC register. */
34190 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
34191 /* We need to also mark a use of the link register since the function we
34192 sibling-call to will use it to return to our caller. */
34193 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
34196 /* Return whether we need to always update the saved TOC pointer when we update
34197 the stack pointer. */
34199 static bool
34200 rs6000_save_toc_in_prologue_p (void)
34202 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
34205 #ifdef HAVE_GAS_HIDDEN
34206 # define USE_HIDDEN_LINKONCE 1
34207 #else
34208 # define USE_HIDDEN_LINKONCE 0
34209 #endif
34211 /* Fills in the label name that should be used for a 476 link stack thunk. */
34213 void
34214 get_ppc476_thunk_name (char name[32])
34216 gcc_assert (TARGET_LINK_STACK);
34218 if (USE_HIDDEN_LINKONCE)
34219 sprintf (name, "__ppc476.get_thunk");
34220 else
34221 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
34224 /* This function emits the simple thunk routine that is used to preserve
34225 the link stack on the 476 cpu. */
34227 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
34228 static void
34229 rs6000_code_end (void)
34231 char name[32];
34232 tree decl;
34234 if (!TARGET_LINK_STACK)
34235 return;
34237 get_ppc476_thunk_name (name);
34239 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
34240 build_function_type_list (void_type_node, NULL_TREE));
34241 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
34242 NULL_TREE, void_type_node);
34243 TREE_PUBLIC (decl) = 1;
34244 TREE_STATIC (decl) = 1;
34246 #if RS6000_WEAK
34247 if (USE_HIDDEN_LINKONCE)
34249 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
34250 targetm.asm_out.unique_section (decl, 0);
34251 switch_to_section (get_named_section (decl, NULL, 0));
34252 DECL_WEAK (decl) = 1;
34253 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
34254 targetm.asm_out.globalize_label (asm_out_file, name);
34255 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
34256 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
34258 else
34259 #endif
34261 switch_to_section (text_section);
34262 ASM_OUTPUT_LABEL (asm_out_file, name);
34265 DECL_INITIAL (decl) = make_node (BLOCK);
34266 current_function_decl = decl;
34267 init_function_start (decl);
34268 first_function_block_is_cold = false;
34269 /* Make sure unwind info is emitted for the thunk if needed. */
34270 final_start_function (emit_barrier (), asm_out_file, 1);
34272 fputs ("\tblr\n", asm_out_file);
34274 final_end_function ();
34275 init_insn_lengths ();
34276 free_after_compilation (cfun);
34277 set_cfun (NULL);
34278 current_function_decl = NULL;
34281 /* Add r30 to hard reg set if the prologue sets it up and it is not
34282 pic_offset_table_rtx. */
34284 static void
34285 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
34287 if (!TARGET_SINGLE_PIC_BASE
34288 && TARGET_TOC
34289 && TARGET_MINIMAL_TOC
34290 && get_pool_size () != 0)
34291 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
34292 if (cfun->machine->split_stack_argp_used)
34293 add_to_hard_reg_set (&set->set, Pmode, 12);
34297 /* Helper function for rs6000_split_logical to emit a logical instruction after
34298 spliting the operation to single GPR registers.
34300 DEST is the destination register.
34301 OP1 and OP2 are the input source registers.
34302 CODE is the base operation (AND, IOR, XOR, NOT).
34303 MODE is the machine mode.
34304 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
34305 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
34306 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
34308 static void
34309 rs6000_split_logical_inner (rtx dest,
34310 rtx op1,
34311 rtx op2,
34312 enum rtx_code code,
34313 machine_mode mode,
34314 bool complement_final_p,
34315 bool complement_op1_p,
34316 bool complement_op2_p)
34318 rtx bool_rtx;
34320 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
34321 if (op2 && GET_CODE (op2) == CONST_INT
34322 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
34323 && !complement_final_p && !complement_op1_p && !complement_op2_p)
34325 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
34326 HOST_WIDE_INT value = INTVAL (op2) & mask;
34328 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
34329 if (code == AND)
34331 if (value == 0)
34333 emit_insn (gen_rtx_SET (dest, const0_rtx));
34334 return;
34337 else if (value == mask)
34339 if (!rtx_equal_p (dest, op1))
34340 emit_insn (gen_rtx_SET (dest, op1));
34341 return;
34345 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
34346 into separate ORI/ORIS or XORI/XORIS instrucitons. */
34347 else if (code == IOR || code == XOR)
34349 if (value == 0)
34351 if (!rtx_equal_p (dest, op1))
34352 emit_insn (gen_rtx_SET (dest, op1));
34353 return;
34358 if (code == AND && mode == SImode
34359 && !complement_final_p && !complement_op1_p && !complement_op2_p)
34361 emit_insn (gen_andsi3 (dest, op1, op2));
34362 return;
34365 if (complement_op1_p)
34366 op1 = gen_rtx_NOT (mode, op1);
34368 if (complement_op2_p)
34369 op2 = gen_rtx_NOT (mode, op2);
34371 /* For canonical RTL, if only one arm is inverted it is the first. */
34372 if (!complement_op1_p && complement_op2_p)
34373 std::swap (op1, op2);
34375 bool_rtx = ((code == NOT)
34376 ? gen_rtx_NOT (mode, op1)
34377 : gen_rtx_fmt_ee (code, mode, op1, op2));
34379 if (complement_final_p)
34380 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
34382 emit_insn (gen_rtx_SET (dest, bool_rtx));
34385 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
34386 operations are split immediately during RTL generation to allow for more
34387 optimizations of the AND/IOR/XOR.
34389 OPERANDS is an array containing the destination and two input operands.
34390 CODE is the base operation (AND, IOR, XOR, NOT).
34391 MODE is the machine mode.
34392 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
34393 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
34394 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
34395 CLOBBER_REG is either NULL or a scratch register of type CC to allow
34396 formation of the AND instructions. */
34398 static void
34399 rs6000_split_logical_di (rtx operands[3],
34400 enum rtx_code code,
34401 bool complement_final_p,
34402 bool complement_op1_p,
34403 bool complement_op2_p)
34405 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
34406 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
34407 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
34408 enum hi_lo { hi = 0, lo = 1 };
34409 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
34410 size_t i;
34412 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
34413 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
34414 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
34415 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
34417 if (code == NOT)
34418 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
34419 else
34421 if (GET_CODE (operands[2]) != CONST_INT)
34423 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
34424 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
34426 else
34428 HOST_WIDE_INT value = INTVAL (operands[2]);
34429 HOST_WIDE_INT value_hi_lo[2];
34431 gcc_assert (!complement_final_p);
34432 gcc_assert (!complement_op1_p);
34433 gcc_assert (!complement_op2_p);
34435 value_hi_lo[hi] = value >> 32;
34436 value_hi_lo[lo] = value & lower_32bits;
34438 for (i = 0; i < 2; i++)
34440 HOST_WIDE_INT sub_value = value_hi_lo[i];
34442 if (sub_value & sign_bit)
34443 sub_value |= upper_32bits;
34445 op2_hi_lo[i] = GEN_INT (sub_value);
34447 /* If this is an AND instruction, check to see if we need to load
34448 the value in a register. */
34449 if (code == AND && sub_value != -1 && sub_value != 0
34450 && !and_operand (op2_hi_lo[i], SImode))
34451 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
34456 for (i = 0; i < 2; i++)
34458 /* Split large IOR/XOR operations. */
34459 if ((code == IOR || code == XOR)
34460 && GET_CODE (op2_hi_lo[i]) == CONST_INT
34461 && !complement_final_p
34462 && !complement_op1_p
34463 && !complement_op2_p
34464 && !logical_const_operand (op2_hi_lo[i], SImode))
34466 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
34467 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
34468 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
34469 rtx tmp = gen_reg_rtx (SImode);
34471 /* Make sure the constant is sign extended. */
34472 if ((hi_16bits & sign_bit) != 0)
34473 hi_16bits |= upper_32bits;
34475 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
34476 code, SImode, false, false, false);
34478 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
34479 code, SImode, false, false, false);
34481 else
34482 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
34483 code, SImode, complement_final_p,
34484 complement_op1_p, complement_op2_p);
34487 return;
34490 /* Split the insns that make up boolean operations operating on multiple GPR
34491 registers. The boolean MD patterns ensure that the inputs either are
34492 exactly the same as the output registers, or there is no overlap.
34494 OPERANDS is an array containing the destination and two input operands.
34495 CODE is the base operation (AND, IOR, XOR, NOT).
34496 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
34497 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
34498 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
34500 void
34501 rs6000_split_logical (rtx operands[3],
34502 enum rtx_code code,
34503 bool complement_final_p,
34504 bool complement_op1_p,
34505 bool complement_op2_p)
34507 machine_mode mode = GET_MODE (operands[0]);
34508 machine_mode sub_mode;
34509 rtx op0, op1, op2;
34510 int sub_size, regno0, regno1, nregs, i;
34512 /* If this is DImode, use the specialized version that can run before
34513 register allocation. */
34514 if (mode == DImode && !TARGET_POWERPC64)
34516 rs6000_split_logical_di (operands, code, complement_final_p,
34517 complement_op1_p, complement_op2_p);
34518 return;
34521 op0 = operands[0];
34522 op1 = operands[1];
34523 op2 = (code == NOT) ? NULL_RTX : operands[2];
34524 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
34525 sub_size = GET_MODE_SIZE (sub_mode);
34526 regno0 = REGNO (op0);
34527 regno1 = REGNO (op1);
34529 gcc_assert (reload_completed);
34530 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
34531 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
34533 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
34534 gcc_assert (nregs > 1);
34536 if (op2 && REG_P (op2))
34537 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
34539 for (i = 0; i < nregs; i++)
34541 int offset = i * sub_size;
34542 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
34543 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
34544 rtx sub_op2 = ((code == NOT)
34545 ? NULL_RTX
34546 : simplify_subreg (sub_mode, op2, mode, offset));
34548 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
34549 complement_final_p, complement_op1_p,
34550 complement_op2_p);
34553 return;
34557 /* Return true if the peephole2 can combine a load involving a combination of
34558 an addis instruction and a load with an offset that can be fused together on
34559 a power8. */
34561 bool
34562 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
34563 rtx addis_value, /* addis value. */
34564 rtx target, /* target register that is loaded. */
34565 rtx mem) /* bottom part of the memory addr. */
34567 rtx addr;
34568 rtx base_reg;
34570 /* Validate arguments. */
34571 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
34572 return false;
34574 if (!base_reg_operand (target, GET_MODE (target)))
34575 return false;
34577 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
34578 return false;
34580 /* Allow sign/zero extension. */
34581 if (GET_CODE (mem) == ZERO_EXTEND
34582 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
34583 mem = XEXP (mem, 0);
34585 if (!MEM_P (mem))
34586 return false;
34588 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
34589 return false;
34591 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
34592 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
34593 return false;
34595 /* Validate that the register used to load the high value is either the
34596 register being loaded, or we can safely replace its use.
34598 This function is only called from the peephole2 pass and we assume that
34599 there are 2 instructions in the peephole (addis and load), so we want to
34600 check if the target register was not used in the memory address and the
34601 register to hold the addis result is dead after the peephole. */
34602 if (REGNO (addis_reg) != REGNO (target))
34604 if (reg_mentioned_p (target, mem))
34605 return false;
34607 if (!peep2_reg_dead_p (2, addis_reg))
34608 return false;
34610 /* If the target register being loaded is the stack pointer, we must
34611 avoid loading any other value into it, even temporarily. */
34612 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
34613 return false;
34616 base_reg = XEXP (addr, 0);
34617 return REGNO (addis_reg) == REGNO (base_reg);
34620 /* During the peephole2 pass, adjust and expand the insns for a load fusion
34621 sequence. We adjust the addis register to use the target register. If the
34622 load sign extends, we adjust the code to do the zero extending load, and an
34623 explicit sign extension later since the fusion only covers zero extending
34624 loads.
34626 The operands are:
34627 operands[0] register set with addis (to be replaced with target)
34628 operands[1] value set via addis
34629 operands[2] target register being loaded
34630 operands[3] D-form memory reference using operands[0]. */
34632 void
34633 expand_fusion_gpr_load (rtx *operands)
34635 rtx addis_value = operands[1];
34636 rtx target = operands[2];
34637 rtx orig_mem = operands[3];
34638 rtx new_addr, new_mem, orig_addr, offset;
34639 enum rtx_code plus_or_lo_sum;
34640 machine_mode target_mode = GET_MODE (target);
34641 machine_mode extend_mode = target_mode;
34642 machine_mode ptr_mode = Pmode;
34643 enum rtx_code extend = UNKNOWN;
34645 if (GET_CODE (orig_mem) == ZERO_EXTEND
34646 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
34648 extend = GET_CODE (orig_mem);
34649 orig_mem = XEXP (orig_mem, 0);
34650 target_mode = GET_MODE (orig_mem);
34653 gcc_assert (MEM_P (orig_mem));
34655 orig_addr = XEXP (orig_mem, 0);
34656 plus_or_lo_sum = GET_CODE (orig_addr);
34657 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
34659 offset = XEXP (orig_addr, 1);
34660 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
34661 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
34663 if (extend != UNKNOWN)
34664 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
34666 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
34667 UNSPEC_FUSION_GPR);
34668 emit_insn (gen_rtx_SET (target, new_mem));
34670 if (extend == SIGN_EXTEND)
34672 int sub_off = ((BYTES_BIG_ENDIAN)
34673 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
34674 : 0);
34675 rtx sign_reg
34676 = simplify_subreg (target_mode, target, extend_mode, sub_off);
34678 emit_insn (gen_rtx_SET (target,
34679 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
34682 return;
34685 /* Return a string to fuse an addis instruction with a gpr load to the same
34686 register that we loaded up the addis instruction. The address that is used
34687 is the logical address that was formed during peephole2:
34688 (lo_sum (high) (low-part))
34690 The code is complicated, so we call output_asm_insn directly, and just
34691 return "". */
34693 const char *
34694 emit_fusion_gpr_load (rtx target, rtx mem)
34696 rtx addis_value;
34697 rtx fuse_ops[10];
34698 rtx addr;
34699 rtx load_offset;
34700 const char *addis_str = NULL;
34701 const char *load_str = NULL;
34702 const char *mode_name = NULL;
34703 char insn_template[80];
34704 machine_mode mode;
34705 const char *comment_str = ASM_COMMENT_START;
34707 if (GET_CODE (mem) == ZERO_EXTEND)
34708 mem = XEXP (mem, 0);
34710 gcc_assert (REG_P (target) && MEM_P (mem));
34712 if (*comment_str == ' ')
34713 comment_str++;
34715 addr = XEXP (mem, 0);
34716 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
34717 gcc_unreachable ();
34719 addis_value = XEXP (addr, 0);
34720 load_offset = XEXP (addr, 1);
34722 /* Now emit the load instruction to the same register. */
34723 mode = GET_MODE (mem);
34724 switch (mode)
34726 case QImode:
34727 mode_name = "char";
34728 load_str = "lbz";
34729 break;
34731 case HImode:
34732 mode_name = "short";
34733 load_str = "lhz";
34734 break;
34736 case SImode:
34737 mode_name = "int";
34738 load_str = "lwz";
34739 break;
34741 case DImode:
34742 gcc_assert (TARGET_POWERPC64);
34743 mode_name = "long";
34744 load_str = "ld";
34745 break;
34747 default:
34748 gcc_unreachable ();
34751 /* Emit the addis instruction. */
34752 fuse_ops[0] = target;
34753 if (satisfies_constraint_L (addis_value))
34755 fuse_ops[1] = addis_value;
34756 addis_str = "lis %0,%v1";
34759 else if (GET_CODE (addis_value) == PLUS)
34761 rtx op0 = XEXP (addis_value, 0);
34762 rtx op1 = XEXP (addis_value, 1);
34764 if (REG_P (op0) && CONST_INT_P (op1)
34765 && satisfies_constraint_L (op1))
34767 fuse_ops[1] = op0;
34768 fuse_ops[2] = op1;
34769 addis_str = "addis %0,%1,%v2";
34773 else if (GET_CODE (addis_value) == HIGH)
34775 rtx value = XEXP (addis_value, 0);
34776 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
34778 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
34779 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
34780 if (TARGET_ELF)
34781 addis_str = "addis %0,%2,%1@toc@ha";
34783 else if (TARGET_XCOFF)
34784 addis_str = "addis %0,%1@u(%2)";
34786 else
34787 gcc_unreachable ();
34790 else if (GET_CODE (value) == PLUS)
34792 rtx op0 = XEXP (value, 0);
34793 rtx op1 = XEXP (value, 1);
34795 if (GET_CODE (op0) == UNSPEC
34796 && XINT (op0, 1) == UNSPEC_TOCREL
34797 && CONST_INT_P (op1))
34799 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
34800 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
34801 fuse_ops[3] = op1;
34802 if (TARGET_ELF)
34803 addis_str = "addis %0,%2,%1+%3@toc@ha";
34805 else if (TARGET_XCOFF)
34806 addis_str = "addis %0,%1+%3@u(%2)";
34808 else
34809 gcc_unreachable ();
34813 else if (satisfies_constraint_L (value))
34815 fuse_ops[1] = value;
34816 addis_str = "lis %0,%v1";
34819 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
34821 fuse_ops[1] = value;
34822 addis_str = "lis %0,%1@ha";
34826 if (!addis_str)
34827 fatal_insn ("Could not generate addis value for fusion", addis_value);
34829 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
34830 comment_str, mode_name);
34831 output_asm_insn (insn_template, fuse_ops);
34833 /* Emit the D-form load instruction. */
34834 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
34836 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
34837 fuse_ops[1] = load_offset;
34838 output_asm_insn (insn_template, fuse_ops);
34841 else if (GET_CODE (load_offset) == UNSPEC
34842 && XINT (load_offset, 1) == UNSPEC_TOCREL)
34844 if (TARGET_ELF)
34845 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
34847 else if (TARGET_XCOFF)
34848 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
34850 else
34851 gcc_unreachable ();
34853 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
34854 output_asm_insn (insn_template, fuse_ops);
34857 else if (GET_CODE (load_offset) == PLUS
34858 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
34859 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
34860 && CONST_INT_P (XEXP (load_offset, 1)))
34862 rtx tocrel_unspec = XEXP (load_offset, 0);
34863 if (TARGET_ELF)
34864 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
34866 else if (TARGET_XCOFF)
34867 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
34869 else
34870 gcc_unreachable ();
34872 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
34873 fuse_ops[2] = XEXP (load_offset, 1);
34874 output_asm_insn (insn_template, fuse_ops);
34877 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
34879 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
34881 fuse_ops[1] = load_offset;
34882 output_asm_insn (insn_template, fuse_ops);
34885 else
34886 fatal_insn ("Unable to generate load offset for fusion", load_offset);
34888 return "";
34891 /* Analyze vector computations and remove unnecessary doubleword
34892 swaps (xxswapdi instructions). This pass is performed only
34893 for little-endian VSX code generation.
34895 For this specific case, loads and stores of 4x32 and 2x64 vectors
34896 are inefficient. These are implemented using the lvx2dx and
34897 stvx2dx instructions, which invert the order of doublewords in
34898 a vector register. Thus the code generation inserts an xxswapdi
34899 after each such load, and prior to each such store. (For spill
34900 code after register assignment, an additional xxswapdi is inserted
34901 following each store in order to return a hard register to its
34902 unpermuted value.)
34904 The extra xxswapdi instructions reduce performance. This can be
34905 particularly bad for vectorized code. The purpose of this pass
34906 is to reduce the number of xxswapdi instructions required for
34907 correctness.
34909 The primary insight is that much code that operates on vectors
34910 does not care about the relative order of elements in a register,
34911 so long as the correct memory order is preserved. If we have
34912 a computation where all input values are provided by lvxd2x/xxswapdi
34913 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
34914 and all intermediate computations are pure SIMD (independent of
34915 element order), then all the xxswapdi's associated with the loads
34916 and stores may be removed.
34918 This pass uses some of the infrastructure and logical ideas from
34919 the "web" pass in web.c. We create maximal webs of computations
34920 fitting the description above using union-find. Each such web is
34921 then optimized by removing its unnecessary xxswapdi instructions.
34923 The pass is placed prior to global optimization so that we can
34924 perform the optimization in the safest and simplest way possible;
34925 that is, by replacing each xxswapdi insn with a register copy insn.
34926 Subsequent forward propagation will remove copies where possible.
34928 There are some operations sensitive to element order for which we
34929 can still allow the operation, provided we modify those operations.
34930 These include CONST_VECTORs, for which we must swap the first and
34931 second halves of the constant vector; and SUBREGs, for which we
34932 must adjust the byte offset to account for the swapped doublewords.
34933 A remaining opportunity would be non-immediate-form splats, for
34934 which we should adjust the selected lane of the input. We should
34935 also make code generation adjustments for sum-across operations,
34936 since this is a common vectorizer reduction.
34938 Because we run prior to the first split, we can see loads and stores
34939 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
34940 vector loads and stores that have not yet been split into a permuting
34941 load/store and a swap. (One way this can happen is with a builtin
34942 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
34943 than deleting a swap, we convert the load/store into a permuting
34944 load/store (which effectively removes the swap). */
34946 /* Notes on Permutes
34948 We do not currently handle computations that contain permutes. There
34949 is a general transformation that can be performed correctly, but it
34950 may introduce more expensive code than it replaces. To handle these
34951 would require a cost model to determine when to perform the optimization.
34952 This commentary records how this could be done if desired.
34954 The most general permute is something like this (example for V16QI):
34956 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
34957 (parallel [(const_int a0) (const_int a1)
34959 (const_int a14) (const_int a15)]))
34961 where a0,...,a15 are in [0,31] and select elements from op1 and op2
34962 to produce in the result.
34964 Regardless of mode, we can convert the PARALLEL to a mask of 16
34965 byte-element selectors. Let's call this M, with M[i] representing
34966 the ith byte-element selector value. Then if we swap doublewords
34967 throughout the computation, we can get correct behavior by replacing
34968 M with M' as follows:
34970 M'[i] = { (M[i]+8)%16 : M[i] in [0,15]
34971 { ((M[i]+8)%16)+16 : M[i] in [16,31]
34973 This seems promising at first, since we are just replacing one mask
34974 with another. But certain masks are preferable to others. If M
34975 is a mask that matches a vmrghh pattern, for example, M' certainly
34976 will not. Instead of a single vmrghh, we would generate a load of
34977 M' and a vperm. So we would need to know how many xxswapd's we can
34978 remove as a result of this transformation to determine if it's
34979 profitable; and preferably the logic would need to be aware of all
34980 the special preferable masks.
34982 Another form of permute is an UNSPEC_VPERM, in which the mask is
34983 already in a register. In some cases, this mask may be a constant
34984 that we can discover with ud-chains, in which case the above
34985 transformation is ok. However, the common usage here is for the
34986 mask to be produced by an UNSPEC_LVSL, in which case the mask
34987 cannot be known at compile time. In such a case we would have to
34988 generate several instructions to compute M' as above at run time,
34989 and a cost model is needed again.
34991 However, when the mask M for an UNSPEC_VPERM is loaded from the
34992 constant pool, we can replace M with M' as above at no cost
34993 beyond adding a constant pool entry. */
34995 /* This is based on the union-find logic in web.c. web_entry_base is
34996 defined in df.h. */
34997 class swap_web_entry : public web_entry_base
34999 public:
35000 /* Pointer to the insn. */
35001 rtx_insn *insn;
35002 /* Set if insn contains a mention of a vector register. All other
35003 fields are undefined if this field is unset. */
35004 unsigned int is_relevant : 1;
35005 /* Set if insn is a load. */
35006 unsigned int is_load : 1;
35007 /* Set if insn is a store. */
35008 unsigned int is_store : 1;
35009 /* Set if insn is a doubleword swap. This can either be a register swap
35010 or a permuting load or store (test is_load and is_store for this). */
35011 unsigned int is_swap : 1;
35012 /* Set if the insn has a live-in use of a parameter register. */
35013 unsigned int is_live_in : 1;
35014 /* Set if the insn has a live-out def of a return register. */
35015 unsigned int is_live_out : 1;
35016 /* Set if the insn contains a subreg reference of a vector register. */
35017 unsigned int contains_subreg : 1;
35018 /* Set if the insn contains a 128-bit integer operand. */
35019 unsigned int is_128_int : 1;
35020 /* Set if this is a call-insn. */
35021 unsigned int is_call : 1;
35022 /* Set if this insn does not perform a vector operation for which
35023 element order matters, or if we know how to fix it up if it does.
35024 Undefined if is_swap is set. */
35025 unsigned int is_swappable : 1;
35026 /* A nonzero value indicates what kind of special handling for this
35027 insn is required if doublewords are swapped. Undefined if
35028 is_swappable is not set. */
35029 unsigned int special_handling : 4;
35030 /* Set if the web represented by this entry cannot be optimized. */
35031 unsigned int web_not_optimizable : 1;
35032 /* Set if this insn should be deleted. */
35033 unsigned int will_delete : 1;
35036 enum special_handling_values {
35037 SH_NONE = 0,
35038 SH_CONST_VECTOR,
35039 SH_SUBREG,
35040 SH_NOSWAP_LD,
35041 SH_NOSWAP_ST,
35042 SH_EXTRACT,
35043 SH_SPLAT,
35044 SH_XXPERMDI,
35045 SH_CONCAT,
35046 SH_VPERM
35049 /* Union INSN with all insns containing definitions that reach USE.
35050 Detect whether USE is live-in to the current function. */
35051 static void
35052 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
35054 struct df_link *link = DF_REF_CHAIN (use);
35056 if (!link)
35057 insn_entry[INSN_UID (insn)].is_live_in = 1;
35059 while (link)
35061 if (DF_REF_IS_ARTIFICIAL (link->ref))
35062 insn_entry[INSN_UID (insn)].is_live_in = 1;
35064 if (DF_REF_INSN_INFO (link->ref))
35066 rtx def_insn = DF_REF_INSN (link->ref);
35067 (void)unionfind_union (insn_entry + INSN_UID (insn),
35068 insn_entry + INSN_UID (def_insn));
35071 link = link->next;
35075 /* Union INSN with all insns containing uses reached from DEF.
35076 Detect whether DEF is live-out from the current function. */
35077 static void
35078 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
35080 struct df_link *link = DF_REF_CHAIN (def);
35082 if (!link)
35083 insn_entry[INSN_UID (insn)].is_live_out = 1;
35085 while (link)
35087 /* This could be an eh use or some other artificial use;
35088 we treat these all the same (killing the optimization). */
35089 if (DF_REF_IS_ARTIFICIAL (link->ref))
35090 insn_entry[INSN_UID (insn)].is_live_out = 1;
35092 if (DF_REF_INSN_INFO (link->ref))
35094 rtx use_insn = DF_REF_INSN (link->ref);
35095 (void)unionfind_union (insn_entry + INSN_UID (insn),
35096 insn_entry + INSN_UID (use_insn));
35099 link = link->next;
35103 /* Return 1 iff INSN is a load insn, including permuting loads that
35104 represent an lvxd2x instruction; else return 0. */
35105 static unsigned int
35106 insn_is_load_p (rtx insn)
35108 rtx body = PATTERN (insn);
35110 if (GET_CODE (body) == SET)
35112 if (GET_CODE (SET_SRC (body)) == MEM)
35113 return 1;
35115 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
35116 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
35117 return 1;
35119 return 0;
35122 if (GET_CODE (body) != PARALLEL)
35123 return 0;
35125 rtx set = XVECEXP (body, 0, 0);
35127 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
35128 return 1;
35130 return 0;
35133 /* Return 1 iff INSN is a store insn, including permuting stores that
35134 represent an stvxd2x instruction; else return 0. */
35135 static unsigned int
35136 insn_is_store_p (rtx insn)
35138 rtx body = PATTERN (insn);
35139 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
35140 return 1;
35141 if (GET_CODE (body) != PARALLEL)
35142 return 0;
35143 rtx set = XVECEXP (body, 0, 0);
35144 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
35145 return 1;
35146 return 0;
35149 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
35150 a permuting load, or a permuting store. */
35151 static unsigned int
35152 insn_is_swap_p (rtx insn)
35154 rtx body = PATTERN (insn);
35155 if (GET_CODE (body) != SET)
35156 return 0;
35157 rtx rhs = SET_SRC (body);
35158 if (GET_CODE (rhs) != VEC_SELECT)
35159 return 0;
35160 rtx parallel = XEXP (rhs, 1);
35161 if (GET_CODE (parallel) != PARALLEL)
35162 return 0;
35163 unsigned int len = XVECLEN (parallel, 0);
35164 if (len != 2 && len != 4 && len != 8 && len != 16)
35165 return 0;
35166 for (unsigned int i = 0; i < len / 2; ++i)
35168 rtx op = XVECEXP (parallel, 0, i);
35169 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
35170 return 0;
35172 for (unsigned int i = len / 2; i < len; ++i)
35174 rtx op = XVECEXP (parallel, 0, i);
35175 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
35176 return 0;
35178 return 1;
35181 /* Return TRUE if insn is a swap fed by a load from the constant pool. */
35182 static bool
35183 const_load_sequence_p (swap_web_entry *insn_entry, rtx insn)
35185 unsigned uid = INSN_UID (insn);
35186 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load)
35187 return false;
35189 /* Find the unique use in the swap and locate its def. If the def
35190 isn't unique, punt. */
35191 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35192 df_ref use;
35193 FOR_EACH_INSN_INFO_USE (use, insn_info)
35195 struct df_link *def_link = DF_REF_CHAIN (use);
35196 if (!def_link || def_link->next)
35197 return false;
35199 rtx def_insn = DF_REF_INSN (def_link->ref);
35200 unsigned uid2 = INSN_UID (def_insn);
35201 if (!insn_entry[uid2].is_load || !insn_entry[uid2].is_swap)
35202 return false;
35204 rtx body = PATTERN (def_insn);
35205 if (GET_CODE (body) != SET
35206 || GET_CODE (SET_SRC (body)) != VEC_SELECT
35207 || GET_CODE (XEXP (SET_SRC (body), 0)) != MEM)
35208 return false;
35210 rtx mem = XEXP (SET_SRC (body), 0);
35211 rtx base_reg = XEXP (mem, 0);
35213 df_ref base_use;
35214 insn_info = DF_INSN_INFO_GET (def_insn);
35215 FOR_EACH_INSN_INFO_USE (base_use, insn_info)
35217 if (!rtx_equal_p (DF_REF_REG (base_use), base_reg))
35218 continue;
35220 struct df_link *base_def_link = DF_REF_CHAIN (base_use);
35221 if (!base_def_link || base_def_link->next)
35222 return false;
35224 rtx tocrel_insn = DF_REF_INSN (base_def_link->ref);
35225 rtx tocrel_body = PATTERN (tocrel_insn);
35226 rtx base, offset;
35227 if (GET_CODE (tocrel_body) != SET)
35228 return false;
35229 if (!toc_relative_expr_p (SET_SRC (tocrel_body), false))
35230 return false;
35231 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
35232 if (GET_CODE (base) != SYMBOL_REF || !CONSTANT_POOL_ADDRESS_P (base))
35233 return false;
35236 return true;
35239 /* Return 1 iff OP is an operand that will not be affected by having
35240 vector doublewords swapped in memory. */
35241 static unsigned int
35242 rtx_is_swappable_p (rtx op, unsigned int *special)
35244 enum rtx_code code = GET_CODE (op);
35245 int i, j;
35246 rtx parallel;
35248 switch (code)
35250 case LABEL_REF:
35251 case SYMBOL_REF:
35252 case CLOBBER:
35253 case REG:
35254 return 1;
35256 case VEC_CONCAT:
35257 case ASM_INPUT:
35258 case ASM_OPERANDS:
35259 return 0;
35261 case CONST_VECTOR:
35263 *special = SH_CONST_VECTOR;
35264 return 1;
35267 case VEC_DUPLICATE:
35268 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
35269 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
35270 it represents a vector splat for which we can do special
35271 handling. */
35272 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
35273 return 1;
35274 else if (GET_CODE (XEXP (op, 0)) == REG
35275 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
35276 /* This catches V2DF and V2DI splat, at a minimum. */
35277 return 1;
35278 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
35279 /* If the duplicated item is from a select, defer to the select
35280 processing to see if we can change the lane for the splat. */
35281 return rtx_is_swappable_p (XEXP (op, 0), special);
35282 else
35283 return 0;
35285 case VEC_SELECT:
35286 /* A vec_extract operation is ok if we change the lane. */
35287 if (GET_CODE (XEXP (op, 0)) == REG
35288 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
35289 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
35290 && XVECLEN (parallel, 0) == 1
35291 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
35293 *special = SH_EXTRACT;
35294 return 1;
35296 /* An XXPERMDI is ok if we adjust the lanes. Note that if the
35297 XXPERMDI is a swap operation, it will be identified by
35298 insn_is_swap_p and therefore we won't get here. */
35299 else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
35300 && (GET_MODE (XEXP (op, 0)) == V4DFmode
35301 || GET_MODE (XEXP (op, 0)) == V4DImode)
35302 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
35303 && XVECLEN (parallel, 0) == 2
35304 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
35305 && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
35307 *special = SH_XXPERMDI;
35308 return 1;
35310 else
35311 return 0;
35313 case UNSPEC:
35315 /* Various operations are unsafe for this optimization, at least
35316 without significant additional work. Permutes are obviously
35317 problematic, as both the permute control vector and the ordering
35318 of the target values are invalidated by doubleword swapping.
35319 Vector pack and unpack modify the number of vector lanes.
35320 Merge-high/low will not operate correctly on swapped operands.
35321 Vector shifts across element boundaries are clearly uncool,
35322 as are vector select and concatenate operations. Vector
35323 sum-across instructions define one operand with a specific
35324 order-dependent element, so additional fixup code would be
35325 needed to make those work. Vector set and non-immediate-form
35326 vector splat are element-order sensitive. A few of these
35327 cases might be workable with special handling if required.
35328 Adding cost modeling would be appropriate in some cases. */
35329 int val = XINT (op, 1);
35330 switch (val)
35332 default:
35333 break;
35334 case UNSPEC_VMRGH_DIRECT:
35335 case UNSPEC_VMRGL_DIRECT:
35336 case UNSPEC_VPACK_SIGN_SIGN_SAT:
35337 case UNSPEC_VPACK_SIGN_UNS_SAT:
35338 case UNSPEC_VPACK_UNS_UNS_MOD:
35339 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
35340 case UNSPEC_VPACK_UNS_UNS_SAT:
35341 case UNSPEC_VPERM:
35342 case UNSPEC_VPERM_UNS:
35343 case UNSPEC_VPERMHI:
35344 case UNSPEC_VPERMSI:
35345 case UNSPEC_VPKPX:
35346 case UNSPEC_VSLDOI:
35347 case UNSPEC_VSLO:
35348 case UNSPEC_VSRO:
35349 case UNSPEC_VSUM2SWS:
35350 case UNSPEC_VSUM4S:
35351 case UNSPEC_VSUM4UBS:
35352 case UNSPEC_VSUMSWS:
35353 case UNSPEC_VSUMSWS_DIRECT:
35354 case UNSPEC_VSX_CONCAT:
35355 case UNSPEC_VSX_SET:
35356 case UNSPEC_VSX_SLDWI:
35357 case UNSPEC_VUNPACK_HI_SIGN:
35358 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
35359 case UNSPEC_VUNPACK_LO_SIGN:
35360 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
35361 case UNSPEC_VUPKHPX:
35362 case UNSPEC_VUPKHS_V4SF:
35363 case UNSPEC_VUPKHU_V4SF:
35364 case UNSPEC_VUPKLPX:
35365 case UNSPEC_VUPKLS_V4SF:
35366 case UNSPEC_VUPKLU_V4SF:
35367 case UNSPEC_VSX_CVDPSPN:
35368 case UNSPEC_VSX_CVSPDP:
35369 case UNSPEC_VSX_CVSPDPN:
35370 return 0;
35371 case UNSPEC_VSPLT_DIRECT:
35372 *special = SH_SPLAT;
35373 return 1;
35377 default:
35378 break;
35381 const char *fmt = GET_RTX_FORMAT (code);
35382 int ok = 1;
35384 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
35385 if (fmt[i] == 'e' || fmt[i] == 'u')
35387 unsigned int special_op = SH_NONE;
35388 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
35389 if (special_op == SH_NONE)
35390 continue;
35391 /* Ensure we never have two kinds of special handling
35392 for the same insn. */
35393 if (*special != SH_NONE && *special != special_op)
35394 return 0;
35395 *special = special_op;
35397 else if (fmt[i] == 'E')
35398 for (j = 0; j < XVECLEN (op, i); ++j)
35400 unsigned int special_op = SH_NONE;
35401 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
35402 if (special_op == SH_NONE)
35403 continue;
35404 /* Ensure we never have two kinds of special handling
35405 for the same insn. */
35406 if (*special != SH_NONE && *special != special_op)
35407 return 0;
35408 *special = special_op;
35411 return ok;
35414 /* Return 1 iff INSN is an operand that will not be affected by
35415 having vector doublewords swapped in memory (in which case
35416 *SPECIAL is unchanged), or that can be modified to be correct
35417 if vector doublewords are swapped in memory (in which case
35418 *SPECIAL is changed to a value indicating how). */
35419 static unsigned int
35420 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
35421 unsigned int *special)
35423 /* Calls are always bad. */
35424 if (GET_CODE (insn) == CALL_INSN)
35425 return 0;
35427 /* Loads and stores seen here are not permuting, but we can still
35428 fix them up by converting them to permuting ones. Exceptions:
35429 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
35430 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
35431 for the SET source. */
35432 rtx body = PATTERN (insn);
35433 int i = INSN_UID (insn);
35435 if (insn_entry[i].is_load)
35437 if (GET_CODE (body) == SET)
35439 *special = SH_NOSWAP_LD;
35440 return 1;
35442 else
35443 return 0;
35446 if (insn_entry[i].is_store)
35448 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
35450 *special = SH_NOSWAP_ST;
35451 return 1;
35453 else
35454 return 0;
35457 /* A convert to single precision can be left as is provided that
35458 all of its uses are in xxspltw instructions that splat BE element
35459 zero. */
35460 if (GET_CODE (body) == SET
35461 && GET_CODE (SET_SRC (body)) == UNSPEC
35462 && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN)
35464 df_ref def;
35465 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35467 FOR_EACH_INSN_INFO_DEF (def, insn_info)
35469 struct df_link *link = DF_REF_CHAIN (def);
35470 if (!link)
35471 return 0;
35473 for (; link; link = link->next) {
35474 rtx use_insn = DF_REF_INSN (link->ref);
35475 rtx use_body = PATTERN (use_insn);
35476 if (GET_CODE (use_body) != SET
35477 || GET_CODE (SET_SRC (use_body)) != UNSPEC
35478 || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW
35479 || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx)
35480 return 0;
35484 return 1;
35487 /* A concatenation of two doublewords is ok if we reverse the
35488 order of the inputs. */
35489 if (GET_CODE (body) == SET
35490 && GET_CODE (SET_SRC (body)) == VEC_CONCAT
35491 && (GET_MODE (SET_SRC (body)) == V2DFmode
35492 || GET_MODE (SET_SRC (body)) == V2DImode))
35494 *special = SH_CONCAT;
35495 return 1;
35498 /* An UNSPEC_VPERM is ok if the mask operand is loaded from the
35499 constant pool. */
35500 if (GET_CODE (body) == SET
35501 && GET_CODE (SET_SRC (body)) == UNSPEC
35502 && XINT (SET_SRC (body), 1) == UNSPEC_VPERM
35503 && XVECLEN (SET_SRC (body), 0) == 3
35504 && GET_CODE (XVECEXP (SET_SRC (body), 0, 2)) == REG)
35506 rtx mask_reg = XVECEXP (SET_SRC (body), 0, 2);
35507 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35508 df_ref use;
35509 FOR_EACH_INSN_INFO_USE (use, insn_info)
35510 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
35512 struct df_link *def_link = DF_REF_CHAIN (use);
35513 /* Punt if multiple definitions for this reg. */
35514 if (def_link && !def_link->next &&
35515 const_load_sequence_p (insn_entry,
35516 DF_REF_INSN (def_link->ref)))
35518 *special = SH_VPERM;
35519 return 1;
35524 /* Otherwise check the operands for vector lane violations. */
35525 return rtx_is_swappable_p (body, special);
35528 enum chain_purpose { FOR_LOADS, FOR_STORES };
35530 /* Return true if the UD or DU chain headed by LINK is non-empty,
35531 and every entry on the chain references an insn that is a
35532 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
35533 register swap must have only permuting loads as reaching defs.
35534 If PURPOSE is FOR_STORES, each such register swap must have only
35535 register swaps or permuting stores as reached uses. */
35536 static bool
35537 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
35538 enum chain_purpose purpose)
35540 if (!link)
35541 return false;
35543 for (; link; link = link->next)
35545 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
35546 continue;
35548 if (DF_REF_IS_ARTIFICIAL (link->ref))
35549 return false;
35551 rtx reached_insn = DF_REF_INSN (link->ref);
35552 unsigned uid = INSN_UID (reached_insn);
35553 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
35555 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
35556 || insn_entry[uid].is_store)
35557 return false;
35559 if (purpose == FOR_LOADS)
35561 df_ref use;
35562 FOR_EACH_INSN_INFO_USE (use, insn_info)
35564 struct df_link *swap_link = DF_REF_CHAIN (use);
35566 while (swap_link)
35568 if (DF_REF_IS_ARTIFICIAL (link->ref))
35569 return false;
35571 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
35572 unsigned uid2 = INSN_UID (swap_def_insn);
35574 /* Only permuting loads are allowed. */
35575 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
35576 return false;
35578 swap_link = swap_link->next;
35582 else if (purpose == FOR_STORES)
35584 df_ref def;
35585 FOR_EACH_INSN_INFO_DEF (def, insn_info)
35587 struct df_link *swap_link = DF_REF_CHAIN (def);
35589 while (swap_link)
35591 if (DF_REF_IS_ARTIFICIAL (link->ref))
35592 return false;
35594 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
35595 unsigned uid2 = INSN_UID (swap_use_insn);
35597 /* Permuting stores or register swaps are allowed. */
35598 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
35599 return false;
35601 swap_link = swap_link->next;
35607 return true;
35610 /* Mark the xxswapdi instructions associated with permuting loads and
35611 stores for removal. Note that we only flag them for deletion here,
35612 as there is a possibility of a swap being reached from multiple
35613 loads, etc. */
35614 static void
35615 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
35617 rtx insn = insn_entry[i].insn;
35618 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35620 if (insn_entry[i].is_load)
35622 df_ref def;
35623 FOR_EACH_INSN_INFO_DEF (def, insn_info)
35625 struct df_link *link = DF_REF_CHAIN (def);
35627 /* We know by now that these are swaps, so we can delete
35628 them confidently. */
35629 while (link)
35631 rtx use_insn = DF_REF_INSN (link->ref);
35632 insn_entry[INSN_UID (use_insn)].will_delete = 1;
35633 link = link->next;
35637 else if (insn_entry[i].is_store)
35639 df_ref use;
35640 FOR_EACH_INSN_INFO_USE (use, insn_info)
35642 /* Ignore uses for addressability. */
35643 machine_mode mode = GET_MODE (DF_REF_REG (use));
35644 if (!VECTOR_MODE_P (mode))
35645 continue;
35647 struct df_link *link = DF_REF_CHAIN (use);
35649 /* We know by now that these are swaps, so we can delete
35650 them confidently. */
35651 while (link)
35653 rtx def_insn = DF_REF_INSN (link->ref);
35654 insn_entry[INSN_UID (def_insn)].will_delete = 1;
35655 link = link->next;
35661 /* OP is either a CONST_VECTOR or an expression containing one.
35662 Swap the first half of the vector with the second in the first
35663 case. Recurse to find it in the second. */
35664 static void
35665 swap_const_vector_halves (rtx op)
35667 int i;
35668 enum rtx_code code = GET_CODE (op);
35669 if (GET_CODE (op) == CONST_VECTOR)
35671 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
35672 for (i = 0; i < half_units; ++i)
35674 rtx temp = CONST_VECTOR_ELT (op, i);
35675 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
35676 CONST_VECTOR_ELT (op, i + half_units) = temp;
35679 else
35681 int j;
35682 const char *fmt = GET_RTX_FORMAT (code);
35683 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
35684 if (fmt[i] == 'e' || fmt[i] == 'u')
35685 swap_const_vector_halves (XEXP (op, i));
35686 else if (fmt[i] == 'E')
35687 for (j = 0; j < XVECLEN (op, i); ++j)
35688 swap_const_vector_halves (XVECEXP (op, i, j));
35692 /* Find all subregs of a vector expression that perform a narrowing,
35693 and adjust the subreg index to account for doubleword swapping. */
35694 static void
35695 adjust_subreg_index (rtx op)
35697 enum rtx_code code = GET_CODE (op);
35698 if (code == SUBREG
35699 && (GET_MODE_SIZE (GET_MODE (op))
35700 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
35702 unsigned int index = SUBREG_BYTE (op);
35703 if (index < 8)
35704 index += 8;
35705 else
35706 index -= 8;
35707 SUBREG_BYTE (op) = index;
35710 const char *fmt = GET_RTX_FORMAT (code);
35711 int i,j;
35712 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
35713 if (fmt[i] == 'e' || fmt[i] == 'u')
35714 adjust_subreg_index (XEXP (op, i));
35715 else if (fmt[i] == 'E')
35716 for (j = 0; j < XVECLEN (op, i); ++j)
35717 adjust_subreg_index (XVECEXP (op, i, j));
35720 /* Convert the non-permuting load INSN to a permuting one. */
35721 static void
35722 permute_load (rtx_insn *insn)
35724 rtx body = PATTERN (insn);
35725 rtx mem_op = SET_SRC (body);
35726 rtx tgt_reg = SET_DEST (body);
35727 machine_mode mode = GET_MODE (tgt_reg);
35728 int n_elts = GET_MODE_NUNITS (mode);
35729 int half_elts = n_elts / 2;
35730 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35731 int i, j;
35732 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
35733 XVECEXP (par, 0, i) = GEN_INT (j);
35734 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
35735 XVECEXP (par, 0, i) = GEN_INT (j);
35736 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
35737 SET_SRC (body) = sel;
35738 INSN_CODE (insn) = -1; /* Force re-recognition. */
35739 df_insn_rescan (insn);
35741 if (dump_file)
35742 fprintf (dump_file, "Replacing load %d with permuted load\n",
35743 INSN_UID (insn));
35746 /* Convert the non-permuting store INSN to a permuting one. */
35747 static void
35748 permute_store (rtx_insn *insn)
35750 rtx body = PATTERN (insn);
35751 rtx src_reg = SET_SRC (body);
35752 machine_mode mode = GET_MODE (src_reg);
35753 int n_elts = GET_MODE_NUNITS (mode);
35754 int half_elts = n_elts / 2;
35755 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
35756 int i, j;
35757 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
35758 XVECEXP (par, 0, i) = GEN_INT (j);
35759 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
35760 XVECEXP (par, 0, i) = GEN_INT (j);
35761 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
35762 SET_SRC (body) = sel;
35763 INSN_CODE (insn) = -1; /* Force re-recognition. */
35764 df_insn_rescan (insn);
35766 if (dump_file)
35767 fprintf (dump_file, "Replacing store %d with permuted store\n",
35768 INSN_UID (insn));
35771 /* Given OP that contains a vector extract operation, adjust the index
35772 of the extracted lane to account for the doubleword swap. */
35773 static void
35774 adjust_extract (rtx_insn *insn)
35776 rtx pattern = PATTERN (insn);
35777 if (GET_CODE (pattern) == PARALLEL)
35778 pattern = XVECEXP (pattern, 0, 0);
35779 rtx src = SET_SRC (pattern);
35780 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
35781 account for that. */
35782 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
35783 rtx par = XEXP (sel, 1);
35784 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
35785 int lane = INTVAL (XVECEXP (par, 0, 0));
35786 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
35787 XVECEXP (par, 0, 0) = GEN_INT (lane);
35788 INSN_CODE (insn) = -1; /* Force re-recognition. */
35789 df_insn_rescan (insn);
35791 if (dump_file)
35792 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
35795 /* Given OP that contains a vector direct-splat operation, adjust the index
35796 of the source lane to account for the doubleword swap. */
35797 static void
35798 adjust_splat (rtx_insn *insn)
35800 rtx body = PATTERN (insn);
35801 rtx unspec = XEXP (body, 1);
35802 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
35803 int lane = INTVAL (XVECEXP (unspec, 0, 1));
35804 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
35805 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
35806 INSN_CODE (insn) = -1; /* Force re-recognition. */
35807 df_insn_rescan (insn);
35809 if (dump_file)
35810 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
35813 /* Given OP that contains an XXPERMDI operation (that is not a doubleword
35814 swap), reverse the order of the source operands and adjust the indices
35815 of the source lanes to account for doubleword reversal. */
35816 static void
35817 adjust_xxpermdi (rtx_insn *insn)
35819 rtx set = PATTERN (insn);
35820 rtx select = XEXP (set, 1);
35821 rtx concat = XEXP (select, 0);
35822 rtx src0 = XEXP (concat, 0);
35823 XEXP (concat, 0) = XEXP (concat, 1);
35824 XEXP (concat, 1) = src0;
35825 rtx parallel = XEXP (select, 1);
35826 int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
35827 int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
35828 int new_lane0 = 3 - lane1;
35829 int new_lane1 = 3 - lane0;
35830 XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
35831 XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
35832 INSN_CODE (insn) = -1; /* Force re-recognition. */
35833 df_insn_rescan (insn);
35835 if (dump_file)
35836 fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
35839 /* Given OP that contains a VEC_CONCAT operation of two doublewords,
35840 reverse the order of those inputs. */
35841 static void
35842 adjust_concat (rtx_insn *insn)
35844 rtx set = PATTERN (insn);
35845 rtx concat = XEXP (set, 1);
35846 rtx src0 = XEXP (concat, 0);
35847 XEXP (concat, 0) = XEXP (concat, 1);
35848 XEXP (concat, 1) = src0;
35849 INSN_CODE (insn) = -1; /* Force re-recognition. */
35850 df_insn_rescan (insn);
35852 if (dump_file)
35853 fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
35856 /* Given an UNSPEC_VPERM insn, modify the mask loaded from the
35857 constant pool to reflect swapped doublewords. */
35858 static void
35859 adjust_vperm (rtx_insn *insn)
35861 /* We previously determined that the UNSPEC_VPERM was fed by a
35862 swap of a swapping load of a TOC-relative constant pool symbol.
35863 Find the MEM in the swapping load and replace it with a MEM for
35864 the adjusted mask constant. */
35865 rtx set = PATTERN (insn);
35866 rtx mask_reg = XVECEXP (SET_SRC (set), 0, 2);
35868 /* Find the swap. */
35869 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
35870 df_ref use;
35871 rtx_insn *swap_insn = 0;
35872 FOR_EACH_INSN_INFO_USE (use, insn_info)
35873 if (rtx_equal_p (DF_REF_REG (use), mask_reg))
35875 struct df_link *def_link = DF_REF_CHAIN (use);
35876 gcc_assert (def_link && !def_link->next);
35877 swap_insn = DF_REF_INSN (def_link->ref);
35878 break;
35880 gcc_assert (swap_insn);
35882 /* Find the load. */
35883 insn_info = DF_INSN_INFO_GET (swap_insn);
35884 rtx_insn *load_insn = 0;
35885 FOR_EACH_INSN_INFO_USE (use, insn_info)
35887 struct df_link *def_link = DF_REF_CHAIN (use);
35888 gcc_assert (def_link && !def_link->next);
35889 load_insn = DF_REF_INSN (def_link->ref);
35890 break;
35892 gcc_assert (load_insn);
35894 /* Find the TOC-relative symbol access. */
35895 insn_info = DF_INSN_INFO_GET (load_insn);
35896 rtx_insn *tocrel_insn = 0;
35897 FOR_EACH_INSN_INFO_USE (use, insn_info)
35899 struct df_link *def_link = DF_REF_CHAIN (use);
35900 gcc_assert (def_link && !def_link->next);
35901 tocrel_insn = DF_REF_INSN (def_link->ref);
35902 break;
35904 gcc_assert (tocrel_insn);
35906 /* Find the embedded CONST_VECTOR. We have to call toc_relative_expr_p
35907 to set tocrel_base; otherwise it would be unnecessary as we've
35908 already established it will return true. */
35909 rtx base, offset;
35910 if (!toc_relative_expr_p (SET_SRC (PATTERN (tocrel_insn)), false))
35911 gcc_unreachable ();
35912 split_const (XVECEXP (tocrel_base, 0, 0), &base, &offset);
35913 rtx const_vector = get_pool_constant (base);
35914 gcc_assert (GET_CODE (const_vector) == CONST_VECTOR);
35916 /* Create an adjusted mask from the initial mask. */
35917 unsigned int new_mask[16], i, val;
35918 for (i = 0; i < 16; ++i) {
35919 val = INTVAL (XVECEXP (const_vector, 0, i));
35920 if (val < 16)
35921 new_mask[i] = (val + 8) % 16;
35922 else
35923 new_mask[i] = ((val + 8) % 16) + 16;
35926 /* Create a new CONST_VECTOR and a MEM that references it. */
35927 rtx vals = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
35928 for (i = 0; i < 16; ++i)
35929 XVECEXP (vals, 0, i) = GEN_INT (new_mask[i]);
35930 rtx new_const_vector = gen_rtx_CONST_VECTOR (V16QImode, XVEC (vals, 0));
35931 rtx new_mem = force_const_mem (V16QImode, new_const_vector);
35932 /* This gives us a MEM whose base operand is a SYMBOL_REF, which we
35933 can't recognize. Force the SYMBOL_REF into a register. */
35934 if (!REG_P (XEXP (new_mem, 0))) {
35935 rtx base_reg = force_reg (Pmode, XEXP (new_mem, 0));
35936 XEXP (new_mem, 0) = base_reg;
35937 /* Move the newly created insn ahead of the load insn. */
35938 rtx_insn *force_insn = get_last_insn ();
35939 remove_insn (force_insn);
35940 rtx_insn *before_load_insn = PREV_INSN (load_insn);
35941 add_insn_after (force_insn, before_load_insn, BLOCK_FOR_INSN (load_insn));
35942 df_insn_rescan (before_load_insn);
35943 df_insn_rescan (force_insn);
35946 /* Replace the MEM in the load instruction and rescan it. */
35947 XEXP (SET_SRC (PATTERN (load_insn)), 0) = new_mem;
35948 INSN_CODE (load_insn) = -1; /* Force re-recognition. */
35949 df_insn_rescan (load_insn);
35951 if (dump_file)
35952 fprintf (dump_file, "Adjusting mask for vperm %d\n", INSN_UID (insn));
35955 /* The insn described by INSN_ENTRY[I] can be swapped, but only
35956 with special handling. Take care of that here. */
35957 static void
35958 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
35960 rtx_insn *insn = insn_entry[i].insn;
35961 rtx body = PATTERN (insn);
35963 switch (insn_entry[i].special_handling)
35965 default:
35966 gcc_unreachable ();
35967 case SH_CONST_VECTOR:
35969 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
35970 gcc_assert (GET_CODE (body) == SET);
35971 rtx rhs = SET_SRC (body);
35972 swap_const_vector_halves (rhs);
35973 if (dump_file)
35974 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
35975 break;
35977 case SH_SUBREG:
35978 /* A subreg of the same size is already safe. For subregs that
35979 select a smaller portion of a reg, adjust the index for
35980 swapped doublewords. */
35981 adjust_subreg_index (body);
35982 if (dump_file)
35983 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
35984 break;
35985 case SH_NOSWAP_LD:
35986 /* Convert a non-permuting load to a permuting one. */
35987 permute_load (insn);
35988 break;
35989 case SH_NOSWAP_ST:
35990 /* Convert a non-permuting store to a permuting one. */
35991 permute_store (insn);
35992 break;
35993 case SH_EXTRACT:
35994 /* Change the lane on an extract operation. */
35995 adjust_extract (insn);
35996 break;
35997 case SH_SPLAT:
35998 /* Change the lane on a direct-splat operation. */
35999 adjust_splat (insn);
36000 break;
36001 case SH_XXPERMDI:
36002 /* Change the lanes on an XXPERMDI operation. */
36003 adjust_xxpermdi (insn);
36004 break;
36005 case SH_CONCAT:
36006 /* Reverse the order of a concatenation operation. */
36007 adjust_concat (insn);
36008 break;
36009 case SH_VPERM:
36010 /* Change the mask loaded from the constant pool for a VPERM. */
36011 adjust_vperm (insn);
36012 break;
36016 /* Find the insn from the Ith table entry, which is known to be a
36017 register swap Y = SWAP(X). Replace it with a copy Y = X. */
36018 static void
36019 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
36021 rtx_insn *insn = insn_entry[i].insn;
36022 rtx body = PATTERN (insn);
36023 rtx src_reg = XEXP (SET_SRC (body), 0);
36024 rtx copy = gen_rtx_SET (SET_DEST (body), src_reg);
36025 rtx_insn *new_insn = emit_insn_before (copy, insn);
36026 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
36027 df_insn_rescan (new_insn);
36029 if (dump_file)
36031 unsigned int new_uid = INSN_UID (new_insn);
36032 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
36035 df_insn_delete (insn);
36036 remove_insn (insn);
36037 insn->set_deleted ();
36040 /* Dump the swap table to DUMP_FILE. */
36041 static void
36042 dump_swap_insn_table (swap_web_entry *insn_entry)
36044 int e = get_max_uid ();
36045 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
36047 for (int i = 0; i < e; ++i)
36048 if (insn_entry[i].is_relevant)
36050 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
36051 fprintf (dump_file, "%6d %6d ", i,
36052 pred_entry && pred_entry->insn
36053 ? INSN_UID (pred_entry->insn) : 0);
36054 if (insn_entry[i].is_load)
36055 fputs ("load ", dump_file);
36056 if (insn_entry[i].is_store)
36057 fputs ("store ", dump_file);
36058 if (insn_entry[i].is_swap)
36059 fputs ("swap ", dump_file);
36060 if (insn_entry[i].is_live_in)
36061 fputs ("live-in ", dump_file);
36062 if (insn_entry[i].is_live_out)
36063 fputs ("live-out ", dump_file);
36064 if (insn_entry[i].contains_subreg)
36065 fputs ("subreg ", dump_file);
36066 if (insn_entry[i].is_128_int)
36067 fputs ("int128 ", dump_file);
36068 if (insn_entry[i].is_call)
36069 fputs ("call ", dump_file);
36070 if (insn_entry[i].is_swappable)
36072 fputs ("swappable ", dump_file);
36073 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
36074 fputs ("special:constvec ", dump_file);
36075 else if (insn_entry[i].special_handling == SH_SUBREG)
36076 fputs ("special:subreg ", dump_file);
36077 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
36078 fputs ("special:load ", dump_file);
36079 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
36080 fputs ("special:store ", dump_file);
36081 else if (insn_entry[i].special_handling == SH_EXTRACT)
36082 fputs ("special:extract ", dump_file);
36083 else if (insn_entry[i].special_handling == SH_SPLAT)
36084 fputs ("special:splat ", dump_file);
36085 else if (insn_entry[i].special_handling == SH_XXPERMDI)
36086 fputs ("special:xxpermdi ", dump_file);
36087 else if (insn_entry[i].special_handling == SH_CONCAT)
36088 fputs ("special:concat ", dump_file);
36089 else if (insn_entry[i].special_handling == SH_VPERM)
36090 fputs ("special:vperm ", dump_file);
36092 if (insn_entry[i].web_not_optimizable)
36093 fputs ("unoptimizable ", dump_file);
36094 if (insn_entry[i].will_delete)
36095 fputs ("delete ", dump_file);
36096 fputs ("\n", dump_file);
36098 fputs ("\n", dump_file);
36101 /* Main entry point for this pass. */
36102 unsigned int
36103 rs6000_analyze_swaps (function *fun)
36105 swap_web_entry *insn_entry;
36106 basic_block bb;
36107 rtx_insn *insn;
36109 /* Dataflow analysis for use-def chains. */
36110 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
36111 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
36112 df_analyze ();
36113 df_set_flags (DF_DEFER_INSN_RESCAN);
36115 /* Allocate structure to represent webs of insns. */
36116 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
36118 /* Walk the insns to gather basic data. */
36119 FOR_ALL_BB_FN (bb, fun)
36120 FOR_BB_INSNS (bb, insn)
36122 unsigned int uid = INSN_UID (insn);
36123 if (NONDEBUG_INSN_P (insn))
36125 insn_entry[uid].insn = insn;
36127 if (GET_CODE (insn) == CALL_INSN)
36128 insn_entry[uid].is_call = 1;
36130 /* Walk the uses and defs to see if we mention vector regs.
36131 Record any constraints on optimization of such mentions. */
36132 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36133 df_ref mention;
36134 FOR_EACH_INSN_INFO_USE (mention, insn_info)
36136 /* We use DF_REF_REAL_REG here to get inside any subregs. */
36137 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
36139 /* If a use gets its value from a call insn, it will be
36140 a hard register and will look like (reg:V4SI 3 3).
36141 The df analysis creates two mentions for GPR3 and GPR4,
36142 both DImode. We must recognize this and treat it as a
36143 vector mention to ensure the call is unioned with this
36144 use. */
36145 if (mode == DImode && DF_REF_INSN_INFO (mention))
36147 rtx feeder = DF_REF_INSN (mention);
36148 /* FIXME: It is pretty hard to get from the df mention
36149 to the mode of the use in the insn. We arbitrarily
36150 pick a vector mode here, even though the use might
36151 be a real DImode. We can be too conservative
36152 (create a web larger than necessary) because of
36153 this, so consider eventually fixing this. */
36154 if (GET_CODE (feeder) == CALL_INSN)
36155 mode = V4SImode;
36158 if (VECTOR_MODE_P (mode) || mode == TImode)
36160 insn_entry[uid].is_relevant = 1;
36161 if (mode == TImode || mode == V1TImode)
36162 insn_entry[uid].is_128_int = 1;
36163 if (DF_REF_INSN_INFO (mention))
36164 insn_entry[uid].contains_subreg
36165 = !rtx_equal_p (DF_REF_REG (mention),
36166 DF_REF_REAL_REG (mention));
36167 union_defs (insn_entry, insn, mention);
36170 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
36172 /* We use DF_REF_REAL_REG here to get inside any subregs. */
36173 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
36175 /* If we're loading up a hard vector register for a call,
36176 it looks like (set (reg:V4SI 9 9) (...)). The df
36177 analysis creates two mentions for GPR9 and GPR10, both
36178 DImode. So relying on the mode from the mentions
36179 isn't sufficient to ensure we union the call into the
36180 web with the parameter setup code. */
36181 if (mode == DImode && GET_CODE (insn) == SET
36182 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
36183 mode = GET_MODE (SET_DEST (insn));
36185 if (VECTOR_MODE_P (mode) || mode == TImode)
36187 insn_entry[uid].is_relevant = 1;
36188 if (mode == TImode || mode == V1TImode)
36189 insn_entry[uid].is_128_int = 1;
36190 if (DF_REF_INSN_INFO (mention))
36191 insn_entry[uid].contains_subreg
36192 = !rtx_equal_p (DF_REF_REG (mention),
36193 DF_REF_REAL_REG (mention));
36194 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
36195 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
36196 insn_entry[uid].is_live_out = 1;
36197 union_uses (insn_entry, insn, mention);
36201 if (insn_entry[uid].is_relevant)
36203 /* Determine if this is a load or store. */
36204 insn_entry[uid].is_load = insn_is_load_p (insn);
36205 insn_entry[uid].is_store = insn_is_store_p (insn);
36207 /* Determine if this is a doubleword swap. If not,
36208 determine whether it can legally be swapped. */
36209 if (insn_is_swap_p (insn))
36210 insn_entry[uid].is_swap = 1;
36211 else
36213 unsigned int special = SH_NONE;
36214 insn_entry[uid].is_swappable
36215 = insn_is_swappable_p (insn_entry, insn, &special);
36216 if (special != SH_NONE && insn_entry[uid].contains_subreg)
36217 insn_entry[uid].is_swappable = 0;
36218 else if (special != SH_NONE)
36219 insn_entry[uid].special_handling = special;
36220 else if (insn_entry[uid].contains_subreg)
36221 insn_entry[uid].special_handling = SH_SUBREG;
36227 if (dump_file)
36229 fprintf (dump_file, "\nSwap insn entry table when first built\n");
36230 dump_swap_insn_table (insn_entry);
36233 /* Record unoptimizable webs. */
36234 unsigned e = get_max_uid (), i;
36235 for (i = 0; i < e; ++i)
36237 if (!insn_entry[i].is_relevant)
36238 continue;
36240 swap_web_entry *root
36241 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
36243 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
36244 || (insn_entry[i].contains_subreg
36245 && insn_entry[i].special_handling != SH_SUBREG)
36246 || insn_entry[i].is_128_int || insn_entry[i].is_call
36247 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
36248 root->web_not_optimizable = 1;
36250 /* If we have loads or stores that aren't permuting then the
36251 optimization isn't appropriate. */
36252 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
36253 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
36254 root->web_not_optimizable = 1;
36256 /* If we have permuting loads or stores that are not accompanied
36257 by a register swap, the optimization isn't appropriate. */
36258 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
36260 rtx insn = insn_entry[i].insn;
36261 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36262 df_ref def;
36264 FOR_EACH_INSN_INFO_DEF (def, insn_info)
36266 struct df_link *link = DF_REF_CHAIN (def);
36268 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
36270 root->web_not_optimizable = 1;
36271 break;
36275 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
36277 rtx insn = insn_entry[i].insn;
36278 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
36279 df_ref use;
36281 FOR_EACH_INSN_INFO_USE (use, insn_info)
36283 struct df_link *link = DF_REF_CHAIN (use);
36285 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
36287 root->web_not_optimizable = 1;
36288 break;
36294 if (dump_file)
36296 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
36297 dump_swap_insn_table (insn_entry);
36300 /* For each load and store in an optimizable web (which implies
36301 the loads and stores are permuting), find the associated
36302 register swaps and mark them for removal. Due to various
36303 optimizations we may mark the same swap more than once. Also
36304 perform special handling for swappable insns that require it. */
36305 for (i = 0; i < e; ++i)
36306 if ((insn_entry[i].is_load || insn_entry[i].is_store)
36307 && insn_entry[i].is_swap)
36309 swap_web_entry* root_entry
36310 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
36311 if (!root_entry->web_not_optimizable)
36312 mark_swaps_for_removal (insn_entry, i);
36314 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
36316 swap_web_entry* root_entry
36317 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
36318 if (!root_entry->web_not_optimizable)
36319 handle_special_swappables (insn_entry, i);
36322 /* Now delete the swaps marked for removal. */
36323 for (i = 0; i < e; ++i)
36324 if (insn_entry[i].will_delete)
36325 replace_swap_with_copy (insn_entry, i);
36327 /* Clean up. */
36328 free (insn_entry);
36329 return 0;
36332 const pass_data pass_data_analyze_swaps =
36334 RTL_PASS, /* type */
36335 "swaps", /* name */
36336 OPTGROUP_NONE, /* optinfo_flags */
36337 TV_NONE, /* tv_id */
36338 0, /* properties_required */
36339 0, /* properties_provided */
36340 0, /* properties_destroyed */
36341 0, /* todo_flags_start */
36342 TODO_df_finish, /* todo_flags_finish */
36345 class pass_analyze_swaps : public rtl_opt_pass
36347 public:
36348 pass_analyze_swaps(gcc::context *ctxt)
36349 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
36352 /* opt_pass methods: */
36353 virtual bool gate (function *)
36355 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
36356 && rs6000_optimize_swaps);
36359 virtual unsigned int execute (function *fun)
36361 return rs6000_analyze_swaps (fun);
36364 }; // class pass_analyze_swaps
36366 rtl_opt_pass *
36367 make_pass_analyze_swaps (gcc::context *ctxt)
36369 return new pass_analyze_swaps (ctxt);
36372 #ifdef RS6000_GLIBC_ATOMIC_FENV
36373 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
36374 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
36375 #endif
36377 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
36379 static void
36380 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
36382 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
36384 #ifdef RS6000_GLIBC_ATOMIC_FENV
36385 if (atomic_hold_decl == NULL_TREE)
36387 atomic_hold_decl
36388 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
36389 get_identifier ("__atomic_feholdexcept"),
36390 build_function_type_list (void_type_node,
36391 double_ptr_type_node,
36392 NULL_TREE));
36393 TREE_PUBLIC (atomic_hold_decl) = 1;
36394 DECL_EXTERNAL (atomic_hold_decl) = 1;
36397 if (atomic_clear_decl == NULL_TREE)
36399 atomic_clear_decl
36400 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
36401 get_identifier ("__atomic_feclearexcept"),
36402 build_function_type_list (void_type_node,
36403 NULL_TREE));
36404 TREE_PUBLIC (atomic_clear_decl) = 1;
36405 DECL_EXTERNAL (atomic_clear_decl) = 1;
36408 tree const_double = build_qualified_type (double_type_node,
36409 TYPE_QUAL_CONST);
36410 tree const_double_ptr = build_pointer_type (const_double);
36411 if (atomic_update_decl == NULL_TREE)
36413 atomic_update_decl
36414 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
36415 get_identifier ("__atomic_feupdateenv"),
36416 build_function_type_list (void_type_node,
36417 const_double_ptr,
36418 NULL_TREE));
36419 TREE_PUBLIC (atomic_update_decl) = 1;
36420 DECL_EXTERNAL (atomic_update_decl) = 1;
36423 tree fenv_var = create_tmp_var (double_type_node);
36424 mark_addressable (fenv_var);
36425 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
36427 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
36428 *clear = build_call_expr (atomic_clear_decl, 0);
36429 *update = build_call_expr (atomic_update_decl, 1,
36430 fold_convert (const_double_ptr, fenv_addr));
36431 #endif
36432 return;
36435 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
36436 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
36437 tree call_mffs = build_call_expr (mffs, 0);
36439 /* Generates the equivalent of feholdexcept (&fenv_var)
36441 *fenv_var = __builtin_mffs ();
36442 double fenv_hold;
36443 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
36444 __builtin_mtfsf (0xff, fenv_hold); */
36446 /* Mask to clear everything except for the rounding modes and non-IEEE
36447 arithmetic flag. */
36448 const unsigned HOST_WIDE_INT hold_exception_mask =
36449 HOST_WIDE_INT_C (0xffffffff00000007);
36451 tree fenv_var = create_tmp_var (double_type_node);
36453 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
36455 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
36456 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
36457 build_int_cst (uint64_type_node,
36458 hold_exception_mask));
36460 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
36461 fenv_llu_and);
36463 tree hold_mtfsf = build_call_expr (mtfsf, 2,
36464 build_int_cst (unsigned_type_node, 0xff),
36465 fenv_hold_mtfsf);
36467 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
36469 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
36471 double fenv_clear = __builtin_mffs ();
36472 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
36473 __builtin_mtfsf (0xff, fenv_clear); */
36475 /* Mask to clear everything except for the rounding modes and non-IEEE
36476 arithmetic flag. */
36477 const unsigned HOST_WIDE_INT clear_exception_mask =
36478 HOST_WIDE_INT_C (0xffffffff00000000);
36480 tree fenv_clear = create_tmp_var (double_type_node);
36482 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
36484 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
36485 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
36486 fenv_clean_llu,
36487 build_int_cst (uint64_type_node,
36488 clear_exception_mask));
36490 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
36491 fenv_clear_llu_and);
36493 tree clear_mtfsf = build_call_expr (mtfsf, 2,
36494 build_int_cst (unsigned_type_node, 0xff),
36495 fenv_clear_mtfsf);
36497 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
36499 /* Generates the equivalent of feupdateenv (&fenv_var)
36501 double old_fenv = __builtin_mffs ();
36502 double fenv_update;
36503 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
36504 (*(uint64_t*)fenv_var 0x1ff80fff);
36505 __builtin_mtfsf (0xff, fenv_update); */
36507 const unsigned HOST_WIDE_INT update_exception_mask =
36508 HOST_WIDE_INT_C (0xffffffff1fffff00);
36509 const unsigned HOST_WIDE_INT new_exception_mask =
36510 HOST_WIDE_INT_C (0x1ff80fff);
36512 tree old_fenv = create_tmp_var (double_type_node);
36513 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
36515 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
36516 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
36517 build_int_cst (uint64_type_node,
36518 update_exception_mask));
36520 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
36521 build_int_cst (uint64_type_node,
36522 new_exception_mask));
36524 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
36525 old_llu_and, new_llu_and);
36527 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
36528 new_llu_mask);
36530 tree update_mtfsf = build_call_expr (mtfsf, 2,
36531 build_int_cst (unsigned_type_node, 0xff),
36532 fenv_update_mtfsf);
36534 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
36538 struct gcc_target targetm = TARGET_INITIALIZER;
36540 #include "gt-rs6000.h"