Sync with upstream 4.9 branch
[official-gcc.git] / embedded-4_9-branch / gcc / config / rs6000 / rs6000.c
blobe1a226c5ecdc206f9625888ec6e8b8fb89040e28
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "tree.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "print-tree.h"
39 #include "varasm.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "except.h"
43 #include "function.h"
44 #include "output.h"
45 #include "dbxout.h"
46 #include "basic-block.h"
47 #include "diagnostic-core.h"
48 #include "toplev.h"
49 #include "ggc.h"
50 #include "hashtab.h"
51 #include "tm_p.h"
52 #include "target.h"
53 #include "target-def.h"
54 #include "common/common-target.h"
55 #include "langhooks.h"
56 #include "reload.h"
57 #include "cfgloop.h"
58 #include "sched-int.h"
59 #include "pointer-set.h"
60 #include "hash-table.h"
61 #include "vec.h"
62 #include "basic-block.h"
63 #include "tree-ssa-alias.h"
64 #include "internal-fn.h"
65 #include "gimple-fold.h"
66 #include "tree-eh.h"
67 #include "gimple-expr.h"
68 #include "is-a.h"
69 #include "gimple.h"
70 #include "gimplify.h"
71 #include "gimple-iterator.h"
72 #include "gimple-walk.h"
73 #include "intl.h"
74 #include "params.h"
75 #include "tm-constrs.h"
76 #include "ira.h"
77 #include "opts.h"
78 #include "tree-vectorizer.h"
79 #include "dumpfile.h"
80 #include "cgraph.h"
81 #include "target-globals.h"
82 #if TARGET_XCOFF
83 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
84 #endif
85 #if TARGET_MACHO
86 #include "gstab.h" /* for N_SLINE */
87 #endif
89 #ifndef TARGET_NO_PROTOTYPE
90 #define TARGET_NO_PROTOTYPE 0
91 #endif
93 #define min(A,B) ((A) < (B) ? (A) : (B))
94 #define max(A,B) ((A) > (B) ? (A) : (B))
96 /* Structure used to define the rs6000 stack */
97 typedef struct rs6000_stack {
98 int reload_completed; /* stack info won't change from here on */
99 int first_gp_reg_save; /* first callee saved GP register used */
100 int first_fp_reg_save; /* first callee saved FP register used */
101 int first_altivec_reg_save; /* first callee saved AltiVec register used */
102 int lr_save_p; /* true if the link reg needs to be saved */
103 int cr_save_p; /* true if the CR reg needs to be saved */
104 unsigned int vrsave_mask; /* mask of vec registers to save */
105 int push_p; /* true if we need to allocate stack space */
106 int calls_p; /* true if the function makes any calls */
107 int world_save_p; /* true if we're saving *everything*:
108 r13-r31, cr, f14-f31, vrsave, v20-v31 */
109 enum rs6000_abi abi; /* which ABI to use */
110 int gp_save_offset; /* offset to save GP regs from initial SP */
111 int fp_save_offset; /* offset to save FP regs from initial SP */
112 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
113 int lr_save_offset; /* offset to save LR from initial SP */
114 int cr_save_offset; /* offset to save CR from initial SP */
115 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
116 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
117 int varargs_save_offset; /* offset to save the varargs registers */
118 int ehrd_offset; /* offset to EH return data */
119 int ehcr_offset; /* offset to EH CR field data */
120 int reg_size; /* register size (4 or 8) */
121 HOST_WIDE_INT vars_size; /* variable save area size */
122 int parm_size; /* outgoing parameter size */
123 int save_size; /* save area size */
124 int fixed_size; /* fixed size of stack frame */
125 int gp_size; /* size of saved GP registers */
126 int fp_size; /* size of saved FP registers */
127 int altivec_size; /* size of saved AltiVec registers */
128 int cr_size; /* size to hold CR if not in save_size */
129 int vrsave_size; /* size to hold VRSAVE if not in save_size */
130 int altivec_padding_size; /* size of altivec alignment padding if
131 not in save_size */
132 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
133 int spe_padding_size;
134 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
135 int spe_64bit_regs_used;
136 int savres_strategy;
137 } rs6000_stack_t;
139 /* A C structure for machine-specific, per-function data.
140 This is added to the cfun structure. */
141 typedef struct GTY(()) machine_function
143 /* Some local-dynamic symbol. */
144 const char *some_ld_name;
145 /* Whether the instruction chain has been scanned already. */
146 int insn_chain_scanned_p;
147 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
148 int ra_needs_full_frame;
149 /* Flags if __builtin_return_address (0) was used. */
150 int ra_need_lr;
151 /* Cache lr_save_p after expansion of builtin_eh_return. */
152 int lr_save_state;
153 /* Whether we need to save the TOC to the reserved stack location in the
154 function prologue. */
155 bool save_toc_in_prologue;
156 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
157 varargs save area. */
158 HOST_WIDE_INT varargs_save_offset;
159 /* Temporary stack slot to use for SDmode copies. This slot is
160 64-bits wide and is allocated early enough so that the offset
161 does not overflow the 16-bit load/store offset field. */
162 rtx sdmode_stack_slot;
163 /* Flag if r2 setup is needed with ELFv2 ABI. */
164 bool r2_setup_needed;
165 } machine_function;
167 /* Support targetm.vectorize.builtin_mask_for_load. */
168 static GTY(()) tree altivec_builtin_mask_for_load;
170 /* Set to nonzero once AIX common-mode calls have been defined. */
171 static GTY(()) int common_mode_defined;
173 /* Label number of label created for -mrelocatable, to call to so we can
174 get the address of the GOT section */
175 static int rs6000_pic_labelno;
177 #ifdef USING_ELFOS_H
178 /* Counter for labels which are to be placed in .fixup. */
179 int fixuplabelno = 0;
180 #endif
182 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
183 int dot_symbols;
185 /* Specify the machine mode that pointers have. After generation of rtl, the
186 compiler makes no further distinction between pointers and any other objects
187 of this machine mode. The type is unsigned since not all things that
188 include rs6000.h also include machmode.h. */
189 unsigned rs6000_pmode;
191 /* Width in bits of a pointer. */
192 unsigned rs6000_pointer_size;
194 #ifdef HAVE_AS_GNU_ATTRIBUTE
195 /* Flag whether floating point values have been passed/returned. */
196 static bool rs6000_passes_float;
197 /* Flag whether vector values have been passed/returned. */
198 static bool rs6000_passes_vector;
199 /* Flag whether small (<= 8 byte) structures have been returned. */
200 static bool rs6000_returns_struct;
201 #endif
203 /* Value is TRUE if register/mode pair is acceptable. */
204 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
206 /* Maximum number of registers needed for a given register class and mode. */
207 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
209 /* How many registers are needed for a given register and mode. */
210 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
212 /* Map register number to register class. */
213 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
215 static int dbg_cost_ctrl;
217 /* Built in types. */
218 tree rs6000_builtin_types[RS6000_BTI_MAX];
219 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
221 /* Flag to say the TOC is initialized */
222 int toc_initialized;
223 char toc_label_name[10];
225 /* Cached value of rs6000_variable_issue. This is cached in
226 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
227 static short cached_can_issue_more;
229 static GTY(()) section *read_only_data_section;
230 static GTY(()) section *private_data_section;
231 static GTY(()) section *tls_data_section;
232 static GTY(()) section *tls_private_data_section;
233 static GTY(()) section *read_only_private_data_section;
234 static GTY(()) section *sdata2_section;
235 static GTY(()) section *toc_section;
237 struct builtin_description
239 const HOST_WIDE_INT mask;
240 const enum insn_code icode;
241 const char *const name;
242 const enum rs6000_builtins code;
245 /* Describe the vector unit used for modes. */
246 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
247 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
249 /* Register classes for various constraints that are based on the target
250 switches. */
251 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
253 /* Describe the alignment of a vector. */
254 int rs6000_vector_align[NUM_MACHINE_MODES];
256 /* Map selected modes to types for builtins. */
257 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
259 /* What modes to automatically generate reciprocal divide estimate (fre) and
260 reciprocal sqrt (frsqrte) for. */
261 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
263 /* Masks to determine which reciprocal esitmate instructions to generate
264 automatically. */
265 enum rs6000_recip_mask {
266 RECIP_SF_DIV = 0x001, /* Use divide estimate */
267 RECIP_DF_DIV = 0x002,
268 RECIP_V4SF_DIV = 0x004,
269 RECIP_V2DF_DIV = 0x008,
271 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
272 RECIP_DF_RSQRT = 0x020,
273 RECIP_V4SF_RSQRT = 0x040,
274 RECIP_V2DF_RSQRT = 0x080,
276 /* Various combination of flags for -mrecip=xxx. */
277 RECIP_NONE = 0,
278 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
279 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
280 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
282 RECIP_HIGH_PRECISION = RECIP_ALL,
284 /* On low precision machines like the power5, don't enable double precision
285 reciprocal square root estimate, since it isn't accurate enough. */
286 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
289 /* -mrecip options. */
290 static struct
292 const char *string; /* option name */
293 unsigned int mask; /* mask bits to set */
294 } recip_options[] = {
295 { "all", RECIP_ALL },
296 { "none", RECIP_NONE },
297 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
298 | RECIP_V2DF_DIV) },
299 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
300 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
301 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
302 | RECIP_V2DF_RSQRT) },
303 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
304 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
307 /* Pointer to function (in rs6000-c.c) that can define or undefine target
308 macros that have changed. Languages that don't support the preprocessor
309 don't link in rs6000-c.c, so we can't call it directly. */
310 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
312 /* Simplfy register classes into simpler classifications. We assume
313 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
314 check for standard register classes (gpr/floating/altivec/vsx) and
315 floating/vector classes (float/altivec/vsx). */
317 enum rs6000_reg_type {
318 NO_REG_TYPE,
319 PSEUDO_REG_TYPE,
320 GPR_REG_TYPE,
321 VSX_REG_TYPE,
322 ALTIVEC_REG_TYPE,
323 FPR_REG_TYPE,
324 SPR_REG_TYPE,
325 CR_REG_TYPE,
326 SPE_ACC_TYPE,
327 SPEFSCR_REG_TYPE
330 /* Map register class to register type. */
331 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
333 /* First/last register type for the 'normal' register types (i.e. general
334 purpose, floating point, altivec, and VSX registers). */
335 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
337 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
340 /* Register classes we care about in secondary reload or go if legitimate
341 address. We only need to worry about GPR, FPR, and Altivec registers here,
342 along an ANY field that is the OR of the 3 register classes. */
344 enum rs6000_reload_reg_type {
345 RELOAD_REG_GPR, /* General purpose registers. */
346 RELOAD_REG_FPR, /* Traditional floating point regs. */
347 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
348 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
349 N_RELOAD_REG
352 /* For setting up register classes, loop through the 3 register classes mapping
353 into real registers, and skip the ANY class, which is just an OR of the
354 bits. */
355 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
356 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
358 /* Map reload register type to a register in the register class. */
359 struct reload_reg_map_type {
360 const char *name; /* Register class name. */
361 int reg; /* Register in the register class. */
364 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
365 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
366 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
367 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
368 { "Any", -1 }, /* RELOAD_REG_ANY. */
371 /* Mask bits for each register class, indexed per mode. Historically the
372 compiler has been more restrictive which types can do PRE_MODIFY instead of
373 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
374 typedef unsigned char addr_mask_type;
376 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
377 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
378 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
379 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
380 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
381 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
383 /* Register type masks based on the type, of valid addressing modes. */
384 struct rs6000_reg_addr {
385 enum insn_code reload_load; /* INSN to reload for loading. */
386 enum insn_code reload_store; /* INSN to reload for storing. */
387 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
388 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
389 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
390 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
391 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
394 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
396 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
397 static inline bool
398 mode_supports_pre_incdec_p (enum machine_mode mode)
400 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
401 != 0);
404 /* Helper function to say whether a mode supports PRE_MODIFY. */
405 static inline bool
406 mode_supports_pre_modify_p (enum machine_mode mode)
408 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
409 != 0);
413 /* Target cpu costs. */
415 struct processor_costs {
416 const int mulsi; /* cost of SImode multiplication. */
417 const int mulsi_const; /* cost of SImode multiplication by constant. */
418 const int mulsi_const9; /* cost of SImode mult by short constant. */
419 const int muldi; /* cost of DImode multiplication. */
420 const int divsi; /* cost of SImode division. */
421 const int divdi; /* cost of DImode division. */
422 const int fp; /* cost of simple SFmode and DFmode insns. */
423 const int dmul; /* cost of DFmode multiplication (and fmadd). */
424 const int sdiv; /* cost of SFmode division (fdivs). */
425 const int ddiv; /* cost of DFmode division (fdiv). */
426 const int cache_line_size; /* cache line size in bytes. */
427 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
428 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
429 const int simultaneous_prefetches; /* number of parallel prefetch
430 operations. */
433 const struct processor_costs *rs6000_cost;
435 /* Processor costs (relative to an add) */
437 /* Instruction size costs on 32bit processors. */
438 static const
439 struct processor_costs size32_cost = {
440 COSTS_N_INSNS (1), /* mulsi */
441 COSTS_N_INSNS (1), /* mulsi_const */
442 COSTS_N_INSNS (1), /* mulsi_const9 */
443 COSTS_N_INSNS (1), /* muldi */
444 COSTS_N_INSNS (1), /* divsi */
445 COSTS_N_INSNS (1), /* divdi */
446 COSTS_N_INSNS (1), /* fp */
447 COSTS_N_INSNS (1), /* dmul */
448 COSTS_N_INSNS (1), /* sdiv */
449 COSTS_N_INSNS (1), /* ddiv */
456 /* Instruction size costs on 64bit processors. */
457 static const
458 struct processor_costs size64_cost = {
459 COSTS_N_INSNS (1), /* mulsi */
460 COSTS_N_INSNS (1), /* mulsi_const */
461 COSTS_N_INSNS (1), /* mulsi_const9 */
462 COSTS_N_INSNS (1), /* muldi */
463 COSTS_N_INSNS (1), /* divsi */
464 COSTS_N_INSNS (1), /* divdi */
465 COSTS_N_INSNS (1), /* fp */
466 COSTS_N_INSNS (1), /* dmul */
467 COSTS_N_INSNS (1), /* sdiv */
468 COSTS_N_INSNS (1), /* ddiv */
469 128,
475 /* Instruction costs on RS64A processors. */
476 static const
477 struct processor_costs rs64a_cost = {
478 COSTS_N_INSNS (20), /* mulsi */
479 COSTS_N_INSNS (12), /* mulsi_const */
480 COSTS_N_INSNS (8), /* mulsi_const9 */
481 COSTS_N_INSNS (34), /* muldi */
482 COSTS_N_INSNS (65), /* divsi */
483 COSTS_N_INSNS (67), /* divdi */
484 COSTS_N_INSNS (4), /* fp */
485 COSTS_N_INSNS (4), /* dmul */
486 COSTS_N_INSNS (31), /* sdiv */
487 COSTS_N_INSNS (31), /* ddiv */
488 128, /* cache line size */
489 128, /* l1 cache */
490 2048, /* l2 cache */
491 1, /* streams */
494 /* Instruction costs on MPCCORE processors. */
495 static const
496 struct processor_costs mpccore_cost = {
497 COSTS_N_INSNS (2), /* mulsi */
498 COSTS_N_INSNS (2), /* mulsi_const */
499 COSTS_N_INSNS (2), /* mulsi_const9 */
500 COSTS_N_INSNS (2), /* muldi */
501 COSTS_N_INSNS (6), /* divsi */
502 COSTS_N_INSNS (6), /* divdi */
503 COSTS_N_INSNS (4), /* fp */
504 COSTS_N_INSNS (5), /* dmul */
505 COSTS_N_INSNS (10), /* sdiv */
506 COSTS_N_INSNS (17), /* ddiv */
507 32, /* cache line size */
508 4, /* l1 cache */
509 16, /* l2 cache */
510 1, /* streams */
513 /* Instruction costs on PPC403 processors. */
514 static const
515 struct processor_costs ppc403_cost = {
516 COSTS_N_INSNS (4), /* mulsi */
517 COSTS_N_INSNS (4), /* mulsi_const */
518 COSTS_N_INSNS (4), /* mulsi_const9 */
519 COSTS_N_INSNS (4), /* muldi */
520 COSTS_N_INSNS (33), /* divsi */
521 COSTS_N_INSNS (33), /* divdi */
522 COSTS_N_INSNS (11), /* fp */
523 COSTS_N_INSNS (11), /* dmul */
524 COSTS_N_INSNS (11), /* sdiv */
525 COSTS_N_INSNS (11), /* ddiv */
526 32, /* cache line size */
527 4, /* l1 cache */
528 16, /* l2 cache */
529 1, /* streams */
532 /* Instruction costs on PPC405 processors. */
533 static const
534 struct processor_costs ppc405_cost = {
535 COSTS_N_INSNS (5), /* mulsi */
536 COSTS_N_INSNS (4), /* mulsi_const */
537 COSTS_N_INSNS (3), /* mulsi_const9 */
538 COSTS_N_INSNS (5), /* muldi */
539 COSTS_N_INSNS (35), /* divsi */
540 COSTS_N_INSNS (35), /* divdi */
541 COSTS_N_INSNS (11), /* fp */
542 COSTS_N_INSNS (11), /* dmul */
543 COSTS_N_INSNS (11), /* sdiv */
544 COSTS_N_INSNS (11), /* ddiv */
545 32, /* cache line size */
546 16, /* l1 cache */
547 128, /* l2 cache */
548 1, /* streams */
551 /* Instruction costs on PPC440 processors. */
552 static const
553 struct processor_costs ppc440_cost = {
554 COSTS_N_INSNS (3), /* mulsi */
555 COSTS_N_INSNS (2), /* mulsi_const */
556 COSTS_N_INSNS (2), /* mulsi_const9 */
557 COSTS_N_INSNS (3), /* muldi */
558 COSTS_N_INSNS (34), /* divsi */
559 COSTS_N_INSNS (34), /* divdi */
560 COSTS_N_INSNS (5), /* fp */
561 COSTS_N_INSNS (5), /* dmul */
562 COSTS_N_INSNS (19), /* sdiv */
563 COSTS_N_INSNS (33), /* ddiv */
564 32, /* cache line size */
565 32, /* l1 cache */
566 256, /* l2 cache */
567 1, /* streams */
570 /* Instruction costs on PPC476 processors. */
571 static const
572 struct processor_costs ppc476_cost = {
573 COSTS_N_INSNS (4), /* mulsi */
574 COSTS_N_INSNS (4), /* mulsi_const */
575 COSTS_N_INSNS (4), /* mulsi_const9 */
576 COSTS_N_INSNS (4), /* muldi */
577 COSTS_N_INSNS (11), /* divsi */
578 COSTS_N_INSNS (11), /* divdi */
579 COSTS_N_INSNS (6), /* fp */
580 COSTS_N_INSNS (6), /* dmul */
581 COSTS_N_INSNS (19), /* sdiv */
582 COSTS_N_INSNS (33), /* ddiv */
583 32, /* l1 cache line size */
584 32, /* l1 cache */
585 512, /* l2 cache */
586 1, /* streams */
589 /* Instruction costs on PPC601 processors. */
590 static const
591 struct processor_costs ppc601_cost = {
592 COSTS_N_INSNS (5), /* mulsi */
593 COSTS_N_INSNS (5), /* mulsi_const */
594 COSTS_N_INSNS (5), /* mulsi_const9 */
595 COSTS_N_INSNS (5), /* muldi */
596 COSTS_N_INSNS (36), /* divsi */
597 COSTS_N_INSNS (36), /* divdi */
598 COSTS_N_INSNS (4), /* fp */
599 COSTS_N_INSNS (5), /* dmul */
600 COSTS_N_INSNS (17), /* sdiv */
601 COSTS_N_INSNS (31), /* ddiv */
602 32, /* cache line size */
603 32, /* l1 cache */
604 256, /* l2 cache */
605 1, /* streams */
608 /* Instruction costs on PPC603 processors. */
609 static const
610 struct processor_costs ppc603_cost = {
611 COSTS_N_INSNS (5), /* mulsi */
612 COSTS_N_INSNS (3), /* mulsi_const */
613 COSTS_N_INSNS (2), /* mulsi_const9 */
614 COSTS_N_INSNS (5), /* muldi */
615 COSTS_N_INSNS (37), /* divsi */
616 COSTS_N_INSNS (37), /* divdi */
617 COSTS_N_INSNS (3), /* fp */
618 COSTS_N_INSNS (4), /* dmul */
619 COSTS_N_INSNS (18), /* sdiv */
620 COSTS_N_INSNS (33), /* ddiv */
621 32, /* cache line size */
622 8, /* l1 cache */
623 64, /* l2 cache */
624 1, /* streams */
627 /* Instruction costs on PPC604 processors. */
628 static const
629 struct processor_costs ppc604_cost = {
630 COSTS_N_INSNS (4), /* mulsi */
631 COSTS_N_INSNS (4), /* mulsi_const */
632 COSTS_N_INSNS (4), /* mulsi_const9 */
633 COSTS_N_INSNS (4), /* muldi */
634 COSTS_N_INSNS (20), /* divsi */
635 COSTS_N_INSNS (20), /* divdi */
636 COSTS_N_INSNS (3), /* fp */
637 COSTS_N_INSNS (3), /* dmul */
638 COSTS_N_INSNS (18), /* sdiv */
639 COSTS_N_INSNS (32), /* ddiv */
640 32, /* cache line size */
641 16, /* l1 cache */
642 512, /* l2 cache */
643 1, /* streams */
646 /* Instruction costs on PPC604e processors. */
647 static const
648 struct processor_costs ppc604e_cost = {
649 COSTS_N_INSNS (2), /* mulsi */
650 COSTS_N_INSNS (2), /* mulsi_const */
651 COSTS_N_INSNS (2), /* mulsi_const9 */
652 COSTS_N_INSNS (2), /* muldi */
653 COSTS_N_INSNS (20), /* divsi */
654 COSTS_N_INSNS (20), /* divdi */
655 COSTS_N_INSNS (3), /* fp */
656 COSTS_N_INSNS (3), /* dmul */
657 COSTS_N_INSNS (18), /* sdiv */
658 COSTS_N_INSNS (32), /* ddiv */
659 32, /* cache line size */
660 32, /* l1 cache */
661 1024, /* l2 cache */
662 1, /* streams */
665 /* Instruction costs on PPC620 processors. */
666 static const
667 struct processor_costs ppc620_cost = {
668 COSTS_N_INSNS (5), /* mulsi */
669 COSTS_N_INSNS (4), /* mulsi_const */
670 COSTS_N_INSNS (3), /* mulsi_const9 */
671 COSTS_N_INSNS (7), /* muldi */
672 COSTS_N_INSNS (21), /* divsi */
673 COSTS_N_INSNS (37), /* divdi */
674 COSTS_N_INSNS (3), /* fp */
675 COSTS_N_INSNS (3), /* dmul */
676 COSTS_N_INSNS (18), /* sdiv */
677 COSTS_N_INSNS (32), /* ddiv */
678 128, /* cache line size */
679 32, /* l1 cache */
680 1024, /* l2 cache */
681 1, /* streams */
684 /* Instruction costs on PPC630 processors. */
685 static const
686 struct processor_costs ppc630_cost = {
687 COSTS_N_INSNS (5), /* mulsi */
688 COSTS_N_INSNS (4), /* mulsi_const */
689 COSTS_N_INSNS (3), /* mulsi_const9 */
690 COSTS_N_INSNS (7), /* muldi */
691 COSTS_N_INSNS (21), /* divsi */
692 COSTS_N_INSNS (37), /* divdi */
693 COSTS_N_INSNS (3), /* fp */
694 COSTS_N_INSNS (3), /* dmul */
695 COSTS_N_INSNS (17), /* sdiv */
696 COSTS_N_INSNS (21), /* ddiv */
697 128, /* cache line size */
698 64, /* l1 cache */
699 1024, /* l2 cache */
700 1, /* streams */
703 /* Instruction costs on Cell processor. */
704 /* COSTS_N_INSNS (1) ~ one add. */
705 static const
706 struct processor_costs ppccell_cost = {
707 COSTS_N_INSNS (9/2)+2, /* mulsi */
708 COSTS_N_INSNS (6/2), /* mulsi_const */
709 COSTS_N_INSNS (6/2), /* mulsi_const9 */
710 COSTS_N_INSNS (15/2)+2, /* muldi */
711 COSTS_N_INSNS (38/2), /* divsi */
712 COSTS_N_INSNS (70/2), /* divdi */
713 COSTS_N_INSNS (10/2), /* fp */
714 COSTS_N_INSNS (10/2), /* dmul */
715 COSTS_N_INSNS (74/2), /* sdiv */
716 COSTS_N_INSNS (74/2), /* ddiv */
717 128, /* cache line size */
718 32, /* l1 cache */
719 512, /* l2 cache */
720 6, /* streams */
723 /* Instruction costs on PPC750 and PPC7400 processors. */
724 static const
725 struct processor_costs ppc750_cost = {
726 COSTS_N_INSNS (5), /* mulsi */
727 COSTS_N_INSNS (3), /* mulsi_const */
728 COSTS_N_INSNS (2), /* mulsi_const9 */
729 COSTS_N_INSNS (5), /* muldi */
730 COSTS_N_INSNS (17), /* divsi */
731 COSTS_N_INSNS (17), /* divdi */
732 COSTS_N_INSNS (3), /* fp */
733 COSTS_N_INSNS (3), /* dmul */
734 COSTS_N_INSNS (17), /* sdiv */
735 COSTS_N_INSNS (31), /* ddiv */
736 32, /* cache line size */
737 32, /* l1 cache */
738 512, /* l2 cache */
739 1, /* streams */
742 /* Instruction costs on PPC7450 processors. */
743 static const
744 struct processor_costs ppc7450_cost = {
745 COSTS_N_INSNS (4), /* mulsi */
746 COSTS_N_INSNS (3), /* mulsi_const */
747 COSTS_N_INSNS (3), /* mulsi_const9 */
748 COSTS_N_INSNS (4), /* muldi */
749 COSTS_N_INSNS (23), /* divsi */
750 COSTS_N_INSNS (23), /* divdi */
751 COSTS_N_INSNS (5), /* fp */
752 COSTS_N_INSNS (5), /* dmul */
753 COSTS_N_INSNS (21), /* sdiv */
754 COSTS_N_INSNS (35), /* ddiv */
755 32, /* cache line size */
756 32, /* l1 cache */
757 1024, /* l2 cache */
758 1, /* streams */
761 /* Instruction costs on PPC8540 processors. */
762 static const
763 struct processor_costs ppc8540_cost = {
764 COSTS_N_INSNS (4), /* mulsi */
765 COSTS_N_INSNS (4), /* mulsi_const */
766 COSTS_N_INSNS (4), /* mulsi_const9 */
767 COSTS_N_INSNS (4), /* muldi */
768 COSTS_N_INSNS (19), /* divsi */
769 COSTS_N_INSNS (19), /* divdi */
770 COSTS_N_INSNS (4), /* fp */
771 COSTS_N_INSNS (4), /* dmul */
772 COSTS_N_INSNS (29), /* sdiv */
773 COSTS_N_INSNS (29), /* ddiv */
774 32, /* cache line size */
775 32, /* l1 cache */
776 256, /* l2 cache */
777 1, /* prefetch streams /*/
780 /* Instruction costs on E300C2 and E300C3 cores. */
781 static const
782 struct processor_costs ppce300c2c3_cost = {
783 COSTS_N_INSNS (4), /* mulsi */
784 COSTS_N_INSNS (4), /* mulsi_const */
785 COSTS_N_INSNS (4), /* mulsi_const9 */
786 COSTS_N_INSNS (4), /* muldi */
787 COSTS_N_INSNS (19), /* divsi */
788 COSTS_N_INSNS (19), /* divdi */
789 COSTS_N_INSNS (3), /* fp */
790 COSTS_N_INSNS (4), /* dmul */
791 COSTS_N_INSNS (18), /* sdiv */
792 COSTS_N_INSNS (33), /* ddiv */
794 16, /* l1 cache */
795 16, /* l2 cache */
796 1, /* prefetch streams /*/
799 /* Instruction costs on PPCE500MC processors. */
800 static const
801 struct processor_costs ppce500mc_cost = {
802 COSTS_N_INSNS (4), /* mulsi */
803 COSTS_N_INSNS (4), /* mulsi_const */
804 COSTS_N_INSNS (4), /* mulsi_const9 */
805 COSTS_N_INSNS (4), /* muldi */
806 COSTS_N_INSNS (14), /* divsi */
807 COSTS_N_INSNS (14), /* divdi */
808 COSTS_N_INSNS (8), /* fp */
809 COSTS_N_INSNS (10), /* dmul */
810 COSTS_N_INSNS (36), /* sdiv */
811 COSTS_N_INSNS (66), /* ddiv */
812 64, /* cache line size */
813 32, /* l1 cache */
814 128, /* l2 cache */
815 1, /* prefetch streams /*/
818 /* Instruction costs on PPCE500MC64 processors. */
819 static const
820 struct processor_costs ppce500mc64_cost = {
821 COSTS_N_INSNS (4), /* mulsi */
822 COSTS_N_INSNS (4), /* mulsi_const */
823 COSTS_N_INSNS (4), /* mulsi_const9 */
824 COSTS_N_INSNS (4), /* muldi */
825 COSTS_N_INSNS (14), /* divsi */
826 COSTS_N_INSNS (14), /* divdi */
827 COSTS_N_INSNS (4), /* fp */
828 COSTS_N_INSNS (10), /* dmul */
829 COSTS_N_INSNS (36), /* sdiv */
830 COSTS_N_INSNS (66), /* ddiv */
831 64, /* cache line size */
832 32, /* l1 cache */
833 128, /* l2 cache */
834 1, /* prefetch streams /*/
837 /* Instruction costs on PPCE5500 processors. */
838 static const
839 struct processor_costs ppce5500_cost = {
840 COSTS_N_INSNS (5), /* mulsi */
841 COSTS_N_INSNS (5), /* mulsi_const */
842 COSTS_N_INSNS (4), /* mulsi_const9 */
843 COSTS_N_INSNS (5), /* muldi */
844 COSTS_N_INSNS (14), /* divsi */
845 COSTS_N_INSNS (14), /* divdi */
846 COSTS_N_INSNS (7), /* fp */
847 COSTS_N_INSNS (10), /* dmul */
848 COSTS_N_INSNS (36), /* sdiv */
849 COSTS_N_INSNS (66), /* ddiv */
850 64, /* cache line size */
851 32, /* l1 cache */
852 128, /* l2 cache */
853 1, /* prefetch streams /*/
856 /* Instruction costs on PPCE6500 processors. */
857 static const
858 struct processor_costs ppce6500_cost = {
859 COSTS_N_INSNS (5), /* mulsi */
860 COSTS_N_INSNS (5), /* mulsi_const */
861 COSTS_N_INSNS (4), /* mulsi_const9 */
862 COSTS_N_INSNS (5), /* muldi */
863 COSTS_N_INSNS (14), /* divsi */
864 COSTS_N_INSNS (14), /* divdi */
865 COSTS_N_INSNS (7), /* fp */
866 COSTS_N_INSNS (10), /* dmul */
867 COSTS_N_INSNS (36), /* sdiv */
868 COSTS_N_INSNS (66), /* ddiv */
869 64, /* cache line size */
870 32, /* l1 cache */
871 128, /* l2 cache */
872 1, /* prefetch streams /*/
875 /* Instruction costs on AppliedMicro Titan processors. */
876 static const
877 struct processor_costs titan_cost = {
878 COSTS_N_INSNS (5), /* mulsi */
879 COSTS_N_INSNS (5), /* mulsi_const */
880 COSTS_N_INSNS (5), /* mulsi_const9 */
881 COSTS_N_INSNS (5), /* muldi */
882 COSTS_N_INSNS (18), /* divsi */
883 COSTS_N_INSNS (18), /* divdi */
884 COSTS_N_INSNS (10), /* fp */
885 COSTS_N_INSNS (10), /* dmul */
886 COSTS_N_INSNS (46), /* sdiv */
887 COSTS_N_INSNS (72), /* ddiv */
888 32, /* cache line size */
889 32, /* l1 cache */
890 512, /* l2 cache */
891 1, /* prefetch streams /*/
894 /* Instruction costs on POWER4 and POWER5 processors. */
895 static const
896 struct processor_costs power4_cost = {
897 COSTS_N_INSNS (3), /* mulsi */
898 COSTS_N_INSNS (2), /* mulsi_const */
899 COSTS_N_INSNS (2), /* mulsi_const9 */
900 COSTS_N_INSNS (4), /* muldi */
901 COSTS_N_INSNS (18), /* divsi */
902 COSTS_N_INSNS (34), /* divdi */
903 COSTS_N_INSNS (3), /* fp */
904 COSTS_N_INSNS (3), /* dmul */
905 COSTS_N_INSNS (17), /* sdiv */
906 COSTS_N_INSNS (17), /* ddiv */
907 128, /* cache line size */
908 32, /* l1 cache */
909 1024, /* l2 cache */
910 8, /* prefetch streams /*/
913 /* Instruction costs on POWER6 processors. */
914 static const
915 struct processor_costs power6_cost = {
916 COSTS_N_INSNS (8), /* mulsi */
917 COSTS_N_INSNS (8), /* mulsi_const */
918 COSTS_N_INSNS (8), /* mulsi_const9 */
919 COSTS_N_INSNS (8), /* muldi */
920 COSTS_N_INSNS (22), /* divsi */
921 COSTS_N_INSNS (28), /* divdi */
922 COSTS_N_INSNS (3), /* fp */
923 COSTS_N_INSNS (3), /* dmul */
924 COSTS_N_INSNS (13), /* sdiv */
925 COSTS_N_INSNS (16), /* ddiv */
926 128, /* cache line size */
927 64, /* l1 cache */
928 2048, /* l2 cache */
929 16, /* prefetch streams */
932 /* Instruction costs on POWER7 processors. */
933 static const
934 struct processor_costs power7_cost = {
935 COSTS_N_INSNS (2), /* mulsi */
936 COSTS_N_INSNS (2), /* mulsi_const */
937 COSTS_N_INSNS (2), /* mulsi_const9 */
938 COSTS_N_INSNS (2), /* muldi */
939 COSTS_N_INSNS (18), /* divsi */
940 COSTS_N_INSNS (34), /* divdi */
941 COSTS_N_INSNS (3), /* fp */
942 COSTS_N_INSNS (3), /* dmul */
943 COSTS_N_INSNS (13), /* sdiv */
944 COSTS_N_INSNS (16), /* ddiv */
945 128, /* cache line size */
946 32, /* l1 cache */
947 256, /* l2 cache */
948 12, /* prefetch streams */
951 /* Instruction costs on POWER8 processors. */
952 static const
953 struct processor_costs power8_cost = {
954 COSTS_N_INSNS (3), /* mulsi */
955 COSTS_N_INSNS (3), /* mulsi_const */
956 COSTS_N_INSNS (3), /* mulsi_const9 */
957 COSTS_N_INSNS (3), /* muldi */
958 COSTS_N_INSNS (19), /* divsi */
959 COSTS_N_INSNS (35), /* divdi */
960 COSTS_N_INSNS (3), /* fp */
961 COSTS_N_INSNS (3), /* dmul */
962 COSTS_N_INSNS (14), /* sdiv */
963 COSTS_N_INSNS (17), /* ddiv */
964 128, /* cache line size */
965 32, /* l1 cache */
966 256, /* l2 cache */
967 12, /* prefetch streams */
970 /* Instruction costs on POWER A2 processors. */
971 static const
972 struct processor_costs ppca2_cost = {
973 COSTS_N_INSNS (16), /* mulsi */
974 COSTS_N_INSNS (16), /* mulsi_const */
975 COSTS_N_INSNS (16), /* mulsi_const9 */
976 COSTS_N_INSNS (16), /* muldi */
977 COSTS_N_INSNS (22), /* divsi */
978 COSTS_N_INSNS (28), /* divdi */
979 COSTS_N_INSNS (3), /* fp */
980 COSTS_N_INSNS (3), /* dmul */
981 COSTS_N_INSNS (59), /* sdiv */
982 COSTS_N_INSNS (72), /* ddiv */
984 16, /* l1 cache */
985 2048, /* l2 cache */
986 16, /* prefetch streams */
990 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
991 #undef RS6000_BUILTIN_1
992 #undef RS6000_BUILTIN_2
993 #undef RS6000_BUILTIN_3
994 #undef RS6000_BUILTIN_A
995 #undef RS6000_BUILTIN_D
996 #undef RS6000_BUILTIN_E
997 #undef RS6000_BUILTIN_H
998 #undef RS6000_BUILTIN_P
999 #undef RS6000_BUILTIN_Q
1000 #undef RS6000_BUILTIN_S
1001 #undef RS6000_BUILTIN_X
1003 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1004 { NAME, ICODE, MASK, ATTR },
1006 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1007 { NAME, ICODE, MASK, ATTR },
1009 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1010 { NAME, ICODE, MASK, ATTR },
1012 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1013 { NAME, ICODE, MASK, ATTR },
1015 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1016 { NAME, ICODE, MASK, ATTR },
1018 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1019 { NAME, ICODE, MASK, ATTR },
1021 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1022 { NAME, ICODE, MASK, ATTR },
1024 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1025 { NAME, ICODE, MASK, ATTR },
1027 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1028 { NAME, ICODE, MASK, ATTR },
1030 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1031 { NAME, ICODE, MASK, ATTR },
1033 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1034 { NAME, ICODE, MASK, ATTR },
1036 struct rs6000_builtin_info_type {
1037 const char *name;
1038 const enum insn_code icode;
1039 const HOST_WIDE_INT mask;
1040 const unsigned attr;
1043 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1045 #include "rs6000-builtin.def"
1048 #undef RS6000_BUILTIN_1
1049 #undef RS6000_BUILTIN_2
1050 #undef RS6000_BUILTIN_3
1051 #undef RS6000_BUILTIN_A
1052 #undef RS6000_BUILTIN_D
1053 #undef RS6000_BUILTIN_E
1054 #undef RS6000_BUILTIN_H
1055 #undef RS6000_BUILTIN_P
1056 #undef RS6000_BUILTIN_Q
1057 #undef RS6000_BUILTIN_S
1058 #undef RS6000_BUILTIN_X
1060 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1061 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1064 static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
1065 static bool spe_func_has_64bit_regs_p (void);
1066 static struct machine_function * rs6000_init_machine_status (void);
1067 static int rs6000_ra_ever_killed (void);
1068 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1069 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1070 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1071 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1072 static rtx rs6000_emit_set_long_const (rtx, HOST_WIDE_INT, HOST_WIDE_INT);
1073 static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
1074 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1075 static int rs6000_debug_address_cost (rtx, enum machine_mode, addr_space_t,
1076 bool);
1077 static int rs6000_debug_adjust_cost (rtx, rtx, rtx, int);
1078 static bool is_microcoded_insn (rtx);
1079 static bool is_nonpipeline_insn (rtx);
1080 static bool is_cracked_insn (rtx);
1081 static bool is_load_insn (rtx, rtx *);
1082 static bool is_store_insn (rtx, rtx *);
1083 static bool set_to_load_agen (rtx,rtx);
1084 static bool insn_terminates_group_p (rtx , enum group_termination);
1085 static bool insn_must_be_first_in_group (rtx);
1086 static bool insn_must_be_last_in_group (rtx);
1087 static void altivec_init_builtins (void);
1088 static tree builtin_function_type (enum machine_mode, enum machine_mode,
1089 enum machine_mode, enum machine_mode,
1090 enum rs6000_builtins, const char *name);
1091 static void rs6000_common_init_builtins (void);
1092 static void paired_init_builtins (void);
1093 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1094 static void spe_init_builtins (void);
1095 static void htm_init_builtins (void);
1096 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1097 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1098 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1099 static rs6000_stack_t *rs6000_stack_info (void);
1100 static void is_altivec_return_reg (rtx, void *);
1101 int easy_vector_constant (rtx, enum machine_mode);
1102 static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
1103 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1104 static int rs6000_tls_symbol_ref_1 (rtx *, void *);
1105 static int rs6000_get_some_local_dynamic_name_1 (rtx *, void *);
1106 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1107 bool, bool);
1108 #if TARGET_MACHO
1109 static void macho_branch_islands (void);
1110 #endif
1111 static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int,
1112 int, int *);
1113 static rtx rs6000_debug_legitimize_reload_address (rtx, enum machine_mode, int,
1114 int, int, int *);
1115 static bool rs6000_mode_dependent_address (const_rtx);
1116 static bool rs6000_debug_mode_dependent_address (const_rtx);
1117 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1118 enum machine_mode, rtx);
1119 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1120 enum machine_mode,
1121 rtx);
1122 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1123 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1124 enum reg_class);
1125 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1126 enum machine_mode);
1127 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1128 enum reg_class,
1129 enum machine_mode);
1130 static bool rs6000_cannot_change_mode_class (enum machine_mode,
1131 enum machine_mode,
1132 enum reg_class);
1133 static bool rs6000_debug_cannot_change_mode_class (enum machine_mode,
1134 enum machine_mode,
1135 enum reg_class);
1136 static bool rs6000_save_toc_in_prologue_p (void);
1138 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int,
1139 int, int *)
1140 = rs6000_legitimize_reload_address;
1142 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1143 = rs6000_mode_dependent_address;
1145 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1146 enum machine_mode, rtx)
1147 = rs6000_secondary_reload_class;
1149 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1150 = rs6000_preferred_reload_class;
1152 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1153 enum machine_mode)
1154 = rs6000_secondary_memory_needed;
1156 bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
1157 enum machine_mode,
1158 enum reg_class)
1159 = rs6000_cannot_change_mode_class;
1161 const int INSN_NOT_AVAILABLE = -1;
1163 static void rs6000_print_isa_options (FILE *, int, const char *,
1164 HOST_WIDE_INT);
1165 static void rs6000_print_builtin_options (FILE *, int, const char *,
1166 HOST_WIDE_INT);
1168 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1169 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1170 enum rs6000_reg_type,
1171 enum machine_mode,
1172 secondary_reload_info *,
1173 bool);
1175 /* Hash table stuff for keeping track of TOC entries. */
1177 struct GTY(()) toc_hash_struct
1179 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1180 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1181 rtx key;
1182 enum machine_mode key_mode;
1183 int labelno;
1186 static GTY ((param_is (struct toc_hash_struct))) htab_t toc_hash_table;
1188 /* Hash table to keep track of the argument types for builtin functions. */
1190 struct GTY(()) builtin_hash_struct
1192 tree type;
1193 enum machine_mode mode[4]; /* return value + 3 arguments. */
1194 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1197 static GTY ((param_is (struct builtin_hash_struct))) htab_t builtin_hash_table;
1200 /* Default register names. */
1201 char rs6000_reg_names[][8] =
1203 "0", "1", "2", "3", "4", "5", "6", "7",
1204 "8", "9", "10", "11", "12", "13", "14", "15",
1205 "16", "17", "18", "19", "20", "21", "22", "23",
1206 "24", "25", "26", "27", "28", "29", "30", "31",
1207 "0", "1", "2", "3", "4", "5", "6", "7",
1208 "8", "9", "10", "11", "12", "13", "14", "15",
1209 "16", "17", "18", "19", "20", "21", "22", "23",
1210 "24", "25", "26", "27", "28", "29", "30", "31",
1211 "mq", "lr", "ctr","ap",
1212 "0", "1", "2", "3", "4", "5", "6", "7",
1213 "ca",
1214 /* AltiVec registers. */
1215 "0", "1", "2", "3", "4", "5", "6", "7",
1216 "8", "9", "10", "11", "12", "13", "14", "15",
1217 "16", "17", "18", "19", "20", "21", "22", "23",
1218 "24", "25", "26", "27", "28", "29", "30", "31",
1219 "vrsave", "vscr",
1220 /* SPE registers. */
1221 "spe_acc", "spefscr",
1222 /* Soft frame pointer. */
1223 "sfp",
1224 /* HTM SPR registers. */
1225 "tfhar", "tfiar", "texasr",
1226 /* SPE High registers. */
1227 "0", "1", "2", "3", "4", "5", "6", "7",
1228 "8", "9", "10", "11", "12", "13", "14", "15",
1229 "16", "17", "18", "19", "20", "21", "22", "23",
1230 "24", "25", "26", "27", "28", "29", "30", "31"
1233 #ifdef TARGET_REGNAMES
1234 static const char alt_reg_names[][8] =
1236 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1237 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1238 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1239 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1240 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1241 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1242 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1243 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1244 "mq", "lr", "ctr", "ap",
1245 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1246 "ca",
1247 /* AltiVec registers. */
1248 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1249 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1250 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1251 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1252 "vrsave", "vscr",
1253 /* SPE registers. */
1254 "spe_acc", "spefscr",
1255 /* Soft frame pointer. */
1256 "sfp",
1257 /* HTM SPR registers. */
1258 "tfhar", "tfiar", "texasr",
1259 /* SPE High registers. */
1260 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1261 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1262 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1263 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1265 #endif
1267 /* Table of valid machine attributes. */
1269 static const struct attribute_spec rs6000_attribute_table[] =
1271 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1272 affects_type_identity } */
1273 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1274 false },
1275 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1276 false },
1277 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1278 false },
1279 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1280 false },
1281 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1282 false },
1283 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1284 SUBTARGET_ATTRIBUTE_TABLE,
1285 #endif
1286 { NULL, 0, 0, false, false, false, NULL, false }
1289 #ifndef TARGET_PROFILE_KERNEL
1290 #define TARGET_PROFILE_KERNEL 0
1291 #endif
1293 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1294 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1296 /* Initialize the GCC target structure. */
1297 #undef TARGET_ATTRIBUTE_TABLE
1298 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1299 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1300 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1301 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1302 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1304 #undef TARGET_ASM_ALIGNED_DI_OP
1305 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1307 /* Default unaligned ops are only provided for ELF. Find the ops needed
1308 for non-ELF systems. */
1309 #ifndef OBJECT_FORMAT_ELF
1310 #if TARGET_XCOFF
1311 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1312 64-bit targets. */
1313 #undef TARGET_ASM_UNALIGNED_HI_OP
1314 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1315 #undef TARGET_ASM_UNALIGNED_SI_OP
1316 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1317 #undef TARGET_ASM_UNALIGNED_DI_OP
1318 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1319 #else
1320 /* For Darwin. */
1321 #undef TARGET_ASM_UNALIGNED_HI_OP
1322 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1323 #undef TARGET_ASM_UNALIGNED_SI_OP
1324 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1325 #undef TARGET_ASM_UNALIGNED_DI_OP
1326 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1327 #undef TARGET_ASM_ALIGNED_DI_OP
1328 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1329 #endif
1330 #endif
1332 /* This hook deals with fixups for relocatable code and DI-mode objects
1333 in 64-bit code. */
1334 #undef TARGET_ASM_INTEGER
1335 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1337 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1338 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1339 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1340 #endif
1342 #undef TARGET_SET_UP_BY_PROLOGUE
1343 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1345 #undef TARGET_HAVE_TLS
1346 #define TARGET_HAVE_TLS HAVE_AS_TLS
1348 #undef TARGET_CANNOT_FORCE_CONST_MEM
1349 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1351 #undef TARGET_DELEGITIMIZE_ADDRESS
1352 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1354 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1355 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1357 #undef TARGET_ASM_FUNCTION_PROLOGUE
1358 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1359 #undef TARGET_ASM_FUNCTION_EPILOGUE
1360 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1362 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1363 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1365 #undef TARGET_LEGITIMIZE_ADDRESS
1366 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1368 #undef TARGET_SCHED_VARIABLE_ISSUE
1369 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1371 #undef TARGET_SCHED_ISSUE_RATE
1372 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1373 #undef TARGET_SCHED_ADJUST_COST
1374 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1375 #undef TARGET_SCHED_ADJUST_PRIORITY
1376 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1377 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1378 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1379 #undef TARGET_SCHED_INIT
1380 #define TARGET_SCHED_INIT rs6000_sched_init
1381 #undef TARGET_SCHED_FINISH
1382 #define TARGET_SCHED_FINISH rs6000_sched_finish
1383 #undef TARGET_SCHED_REORDER
1384 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1385 #undef TARGET_SCHED_REORDER2
1386 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1388 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1389 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1391 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1392 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1394 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1395 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1396 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1397 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1398 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1399 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1400 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1401 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1403 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1404 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1405 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1406 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1407 rs6000_builtin_support_vector_misalignment
1408 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1409 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1410 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1411 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1412 rs6000_builtin_vectorization_cost
1413 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1414 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1415 rs6000_preferred_simd_mode
1416 #undef TARGET_VECTORIZE_INIT_COST
1417 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1418 #undef TARGET_VECTORIZE_ADD_STMT_COST
1419 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1420 #undef TARGET_VECTORIZE_FINISH_COST
1421 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1422 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1423 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1425 #undef TARGET_INIT_BUILTINS
1426 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1427 #undef TARGET_BUILTIN_DECL
1428 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1430 #undef TARGET_EXPAND_BUILTIN
1431 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1433 #undef TARGET_MANGLE_TYPE
1434 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1436 #undef TARGET_INIT_LIBFUNCS
1437 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1439 #if TARGET_MACHO
1440 #undef TARGET_BINDS_LOCAL_P
1441 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1442 #endif
1444 #undef TARGET_MS_BITFIELD_LAYOUT_P
1445 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1447 #undef TARGET_ASM_OUTPUT_MI_THUNK
1448 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1450 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1451 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1453 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1454 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1456 #undef TARGET_REGISTER_MOVE_COST
1457 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1458 #undef TARGET_MEMORY_MOVE_COST
1459 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1460 #undef TARGET_RTX_COSTS
1461 #define TARGET_RTX_COSTS rs6000_rtx_costs
1462 #undef TARGET_ADDRESS_COST
1463 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1465 #undef TARGET_DWARF_REGISTER_SPAN
1466 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1468 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1469 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1471 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1472 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1474 /* On rs6000, function arguments are promoted, as are function return
1475 values. */
1476 #undef TARGET_PROMOTE_FUNCTION_MODE
1477 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
1479 #undef TARGET_RETURN_IN_MEMORY
1480 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1482 #undef TARGET_RETURN_IN_MSB
1483 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1485 #undef TARGET_SETUP_INCOMING_VARARGS
1486 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1488 /* Always strict argument naming on rs6000. */
1489 #undef TARGET_STRICT_ARGUMENT_NAMING
1490 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1491 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1492 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1493 #undef TARGET_SPLIT_COMPLEX_ARG
1494 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1495 #undef TARGET_MUST_PASS_IN_STACK
1496 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1497 #undef TARGET_PASS_BY_REFERENCE
1498 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1499 #undef TARGET_ARG_PARTIAL_BYTES
1500 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1501 #undef TARGET_FUNCTION_ARG_ADVANCE
1502 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1503 #undef TARGET_FUNCTION_ARG
1504 #define TARGET_FUNCTION_ARG rs6000_function_arg
1505 #undef TARGET_FUNCTION_ARG_BOUNDARY
1506 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1508 #undef TARGET_BUILD_BUILTIN_VA_LIST
1509 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1511 #undef TARGET_EXPAND_BUILTIN_VA_START
1512 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1514 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1515 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1517 #undef TARGET_EH_RETURN_FILTER_MODE
1518 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1520 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1521 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1523 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1524 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1526 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1527 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1529 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1530 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1532 #undef TARGET_OPTION_OVERRIDE
1533 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1535 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1536 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1537 rs6000_builtin_vectorized_function
1539 #if !TARGET_MACHO
1540 #undef TARGET_STACK_PROTECT_FAIL
1541 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1542 #endif
1544 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1545 The PowerPC architecture requires only weak consistency among
1546 processors--that is, memory accesses between processors need not be
1547 sequentially consistent and memory accesses among processors can occur
1548 in any order. The ability to order memory accesses weakly provides
1549 opportunities for more efficient use of the system bus. Unless a
1550 dependency exists, the 604e allows read operations to precede store
1551 operations. */
1552 #undef TARGET_RELAXED_ORDERING
1553 #define TARGET_RELAXED_ORDERING true
1555 #ifdef HAVE_AS_TLS
1556 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1557 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1558 #endif
1560 /* Use a 32-bit anchor range. This leads to sequences like:
1562 addis tmp,anchor,high
1563 add dest,tmp,low
1565 where tmp itself acts as an anchor, and can be shared between
1566 accesses to the same 64k page. */
1567 #undef TARGET_MIN_ANCHOR_OFFSET
1568 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1569 #undef TARGET_MAX_ANCHOR_OFFSET
1570 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1571 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1572 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1573 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1574 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1576 #undef TARGET_BUILTIN_RECIPROCAL
1577 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1579 #undef TARGET_EXPAND_TO_RTL_HOOK
1580 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1582 #undef TARGET_INSTANTIATE_DECLS
1583 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1585 #undef TARGET_SECONDARY_RELOAD
1586 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1588 #undef TARGET_LEGITIMATE_ADDRESS_P
1589 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1591 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1592 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1594 #undef TARGET_LRA_P
1595 #define TARGET_LRA_P rs6000_lra_p
1597 #undef TARGET_CAN_ELIMINATE
1598 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1600 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1601 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1603 #undef TARGET_TRAMPOLINE_INIT
1604 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1606 #undef TARGET_FUNCTION_VALUE
1607 #define TARGET_FUNCTION_VALUE rs6000_function_value
1609 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1610 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1612 #undef TARGET_OPTION_SAVE
1613 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1615 #undef TARGET_OPTION_RESTORE
1616 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1618 #undef TARGET_OPTION_PRINT
1619 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1621 #undef TARGET_CAN_INLINE_P
1622 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1624 #undef TARGET_SET_CURRENT_FUNCTION
1625 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1627 #undef TARGET_LEGITIMATE_CONSTANT_P
1628 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1630 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1631 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1633 #undef TARGET_CAN_USE_DOLOOP_P
1634 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1637 /* Processor table. */
1638 struct rs6000_ptt
1640 const char *const name; /* Canonical processor name. */
1641 const enum processor_type processor; /* Processor type enum value. */
1642 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1645 static struct rs6000_ptt const processor_target_table[] =
1647 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1648 #include "rs6000-cpus.def"
1649 #undef RS6000_CPU
1652 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1653 name is invalid. */
1655 static int
1656 rs6000_cpu_name_lookup (const char *name)
1658 size_t i;
1660 if (name != NULL)
1662 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1663 if (! strcmp (name, processor_target_table[i].name))
1664 return (int)i;
1667 return -1;
1671 /* Return number of consecutive hard regs needed starting at reg REGNO
1672 to hold something of mode MODE.
1673 This is ordinarily the length in words of a value of mode MODE
1674 but can be less for certain modes in special long registers.
1676 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1677 scalar instructions. The upper 32 bits are only available to the
1678 SIMD instructions.
1680 POWER and PowerPC GPRs hold 32 bits worth;
1681 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1683 static int
1684 rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
1686 unsigned HOST_WIDE_INT reg_size;
1688 /* TF/TD modes are special in that they always take 2 registers. */
1689 if (FP_REGNO_P (regno))
1690 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1691 ? UNITS_PER_VSX_WORD
1692 : UNITS_PER_FP_WORD);
1694 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1695 reg_size = UNITS_PER_SPE_WORD;
1697 else if (ALTIVEC_REGNO_P (regno))
1698 reg_size = UNITS_PER_ALTIVEC_WORD;
1700 /* The value returned for SCmode in the E500 double case is 2 for
1701 ABI compatibility; storing an SCmode value in a single register
1702 would require function_arg and rs6000_spe_function_arg to handle
1703 SCmode so as to pass the value correctly in a pair of
1704 registers. */
1705 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1706 && !DECIMAL_FLOAT_MODE_P (mode))
1707 reg_size = UNITS_PER_FP_WORD;
1709 else
1710 reg_size = UNITS_PER_WORD;
1712 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1715 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1716 MODE. */
1717 static int
1718 rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
1720 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1722 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1723 register combinations, and use PTImode where we need to deal with quad
1724 word memory operations. Don't allow quad words in the argument or frame
1725 pointer registers, just registers 0..31. */
1726 if (mode == PTImode)
1727 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1728 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1729 && ((regno & 1) == 0));
1731 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1732 implementations. Don't allow an item to be split between a FP register
1733 and an Altivec register. Allow TImode in all VSX registers if the user
1734 asked for it. */
1735 if (TARGET_VSX && VSX_REGNO_P (regno)
1736 && (VECTOR_MEM_VSX_P (mode)
1737 || reg_addr[mode].scalar_in_vmx_p
1738 || (TARGET_VSX_TIMODE && mode == TImode)
1739 || (TARGET_VADDUQM && mode == V1TImode)))
1741 if (FP_REGNO_P (regno))
1742 return FP_REGNO_P (last_regno);
1744 if (ALTIVEC_REGNO_P (regno))
1746 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1747 return 0;
1749 return ALTIVEC_REGNO_P (last_regno);
1753 /* The GPRs can hold any mode, but values bigger than one register
1754 cannot go past R31. */
1755 if (INT_REGNO_P (regno))
1756 return INT_REGNO_P (last_regno);
1758 /* The float registers (except for VSX vector modes) can only hold floating
1759 modes and DImode. */
1760 if (FP_REGNO_P (regno))
1762 if (SCALAR_FLOAT_MODE_P (mode)
1763 && (mode != TDmode || (regno % 2) == 0)
1764 && FP_REGNO_P (last_regno))
1765 return 1;
1767 if (GET_MODE_CLASS (mode) == MODE_INT
1768 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1769 return 1;
1771 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1772 && PAIRED_VECTOR_MODE (mode))
1773 return 1;
1775 return 0;
1778 /* The CR register can only hold CC modes. */
1779 if (CR_REGNO_P (regno))
1780 return GET_MODE_CLASS (mode) == MODE_CC;
1782 if (CA_REGNO_P (regno))
1783 return mode == BImode;
1785 /* AltiVec only in AldyVec registers. */
1786 if (ALTIVEC_REGNO_P (regno))
1787 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1788 || mode == V1TImode);
1790 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1791 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1792 return 1;
1794 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1795 and it must be able to fit within the register set. */
1797 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1800 /* Print interesting facts about registers. */
1801 static void
1802 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1804 int r, m;
1806 for (r = first_regno; r <= last_regno; ++r)
1808 const char *comma = "";
1809 int len;
1811 if (first_regno == last_regno)
1812 fprintf (stderr, "%s:\t", reg_name);
1813 else
1814 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1816 len = 8;
1817 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1818 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1820 if (len > 70)
1822 fprintf (stderr, ",\n\t");
1823 len = 8;
1824 comma = "";
1827 if (rs6000_hard_regno_nregs[m][r] > 1)
1828 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1829 rs6000_hard_regno_nregs[m][r]);
1830 else
1831 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1833 comma = ", ";
1836 if (call_used_regs[r])
1838 if (len > 70)
1840 fprintf (stderr, ",\n\t");
1841 len = 8;
1842 comma = "";
1845 len += fprintf (stderr, "%s%s", comma, "call-used");
1846 comma = ", ";
1849 if (fixed_regs[r])
1851 if (len > 70)
1853 fprintf (stderr, ",\n\t");
1854 len = 8;
1855 comma = "";
1858 len += fprintf (stderr, "%s%s", comma, "fixed");
1859 comma = ", ";
1862 if (len > 70)
1864 fprintf (stderr, ",\n\t");
1865 comma = "";
1868 len += fprintf (stderr, "%sreg-class = %s", comma,
1869 reg_class_names[(int)rs6000_regno_regclass[r]]);
1870 comma = ", ";
1872 if (len > 70)
1874 fprintf (stderr, ",\n\t");
1875 comma = "";
1878 fprintf (stderr, "%sregno = %d\n", comma, r);
1882 static const char *
1883 rs6000_debug_vector_unit (enum rs6000_vector v)
1885 const char *ret;
1887 switch (v)
1889 case VECTOR_NONE: ret = "none"; break;
1890 case VECTOR_ALTIVEC: ret = "altivec"; break;
1891 case VECTOR_VSX: ret = "vsx"; break;
1892 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1893 case VECTOR_PAIRED: ret = "paired"; break;
1894 case VECTOR_SPE: ret = "spe"; break;
1895 case VECTOR_OTHER: ret = "other"; break;
1896 default: ret = "unknown"; break;
1899 return ret;
1902 /* Print the address masks in a human readble fashion. */
1903 DEBUG_FUNCTION void
1904 rs6000_debug_print_mode (ssize_t m)
1906 ssize_t rc;
1908 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
1909 for (rc = 0; rc < N_RELOAD_REG; rc++)
1911 addr_mask_type mask = reg_addr[m].addr_mask[rc];
1912 fprintf (stderr,
1913 " %s: %c%c%c%c%c%c",
1914 reload_reg_map[rc].name,
1915 (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ',
1916 (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ',
1917 (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ',
1918 (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ',
1919 (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
1920 (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
1923 if (rs6000_vector_unit[m] != VECTOR_NONE
1924 || rs6000_vector_mem[m] != VECTOR_NONE
1925 || (reg_addr[m].reload_store != CODE_FOR_nothing)
1926 || (reg_addr[m].reload_load != CODE_FOR_nothing)
1927 || reg_addr[m].scalar_in_vmx_p)
1929 fprintf (stderr,
1930 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
1931 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
1932 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
1933 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
1934 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
1935 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
1938 fputs ("\n", stderr);
1941 #define DEBUG_FMT_ID "%-32s= "
1942 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
1943 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
1944 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
1946 /* Print various interesting information with -mdebug=reg. */
1947 static void
1948 rs6000_debug_reg_global (void)
1950 static const char *const tf[2] = { "false", "true" };
1951 const char *nl = (const char *)0;
1952 int m;
1953 size_t m1, m2, v;
1954 char costly_num[20];
1955 char nop_num[20];
1956 char flags_buffer[40];
1957 const char *costly_str;
1958 const char *nop_str;
1959 const char *trace_str;
1960 const char *abi_str;
1961 const char *cmodel_str;
1962 struct cl_target_option cl_opts;
1964 /* Modes we want tieable information on. */
1965 static const enum machine_mode print_tieable_modes[] = {
1966 QImode,
1967 HImode,
1968 SImode,
1969 DImode,
1970 TImode,
1971 PTImode,
1972 SFmode,
1973 DFmode,
1974 TFmode,
1975 SDmode,
1976 DDmode,
1977 TDmode,
1978 V8QImode,
1979 V4HImode,
1980 V2SImode,
1981 V16QImode,
1982 V8HImode,
1983 V4SImode,
1984 V2DImode,
1985 V1TImode,
1986 V32QImode,
1987 V16HImode,
1988 V8SImode,
1989 V4DImode,
1990 V2TImode,
1991 V2SFmode,
1992 V4SFmode,
1993 V2DFmode,
1994 V8SFmode,
1995 V4DFmode,
1996 CCmode,
1997 CCUNSmode,
1998 CCEQmode,
2001 /* Virtual regs we are interested in. */
2002 const static struct {
2003 int regno; /* register number. */
2004 const char *name; /* register name. */
2005 } virtual_regs[] = {
2006 { STACK_POINTER_REGNUM, "stack pointer:" },
2007 { TOC_REGNUM, "toc: " },
2008 { STATIC_CHAIN_REGNUM, "static chain: " },
2009 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2010 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2011 { ARG_POINTER_REGNUM, "arg pointer: " },
2012 { FRAME_POINTER_REGNUM, "frame pointer:" },
2013 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2014 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2015 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2016 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2017 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2018 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2019 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2020 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2021 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2024 fputs ("\nHard register information:\n", stderr);
2025 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2026 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2027 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2028 LAST_ALTIVEC_REGNO,
2029 "vs");
2030 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2031 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2032 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2033 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2034 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2035 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2036 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2037 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2039 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2040 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2041 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2043 fprintf (stderr,
2044 "\n"
2045 "d reg_class = %s\n"
2046 "f reg_class = %s\n"
2047 "v reg_class = %s\n"
2048 "wa reg_class = %s\n"
2049 "wd reg_class = %s\n"
2050 "wf reg_class = %s\n"
2051 "wg reg_class = %s\n"
2052 "wh reg_class = %s\n"
2053 "wi reg_class = %s\n"
2054 "wj reg_class = %s\n"
2055 "wk reg_class = %s\n"
2056 "wl reg_class = %s\n"
2057 "wm reg_class = %s\n"
2058 "wr reg_class = %s\n"
2059 "ws reg_class = %s\n"
2060 "wt reg_class = %s\n"
2061 "wu reg_class = %s\n"
2062 "wv reg_class = %s\n"
2063 "ww reg_class = %s\n"
2064 "wx reg_class = %s\n"
2065 "wy reg_class = %s\n"
2066 "wz reg_class = %s\n"
2067 "\n",
2068 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2069 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2070 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2071 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2072 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2073 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2074 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2075 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2076 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2077 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2078 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2079 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2080 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2081 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2082 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2083 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2084 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2085 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2086 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2087 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2088 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2089 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2091 nl = "\n";
2092 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2093 rs6000_debug_print_mode (m);
2095 fputs ("\n", stderr);
2097 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2099 enum machine_mode mode1 = print_tieable_modes[m1];
2100 bool first_time = true;
2102 nl = (const char *)0;
2103 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2105 enum machine_mode mode2 = print_tieable_modes[m2];
2106 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2108 if (first_time)
2110 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2111 nl = "\n";
2112 first_time = false;
2115 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2119 if (!first_time)
2120 fputs ("\n", stderr);
2123 if (nl)
2124 fputs (nl, stderr);
2126 if (rs6000_recip_control)
2128 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2130 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2131 if (rs6000_recip_bits[m])
2133 fprintf (stderr,
2134 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2135 GET_MODE_NAME (m),
2136 (RS6000_RECIP_AUTO_RE_P (m)
2137 ? "auto"
2138 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2139 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2140 ? "auto"
2141 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2144 fputs ("\n", stderr);
2147 if (rs6000_cpu_index >= 0)
2149 const char *name = processor_target_table[rs6000_cpu_index].name;
2150 HOST_WIDE_INT flags
2151 = processor_target_table[rs6000_cpu_index].target_enable;
2153 sprintf (flags_buffer, "-mcpu=%s flags", name);
2154 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2156 else
2157 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2159 if (rs6000_tune_index >= 0)
2161 const char *name = processor_target_table[rs6000_tune_index].name;
2162 HOST_WIDE_INT flags
2163 = processor_target_table[rs6000_tune_index].target_enable;
2165 sprintf (flags_buffer, "-mtune=%s flags", name);
2166 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2168 else
2169 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2171 cl_target_option_save (&cl_opts, &global_options);
2172 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2173 rs6000_isa_flags);
2175 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2176 rs6000_isa_flags_explicit);
2178 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2179 rs6000_builtin_mask);
2181 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2183 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2184 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2186 switch (rs6000_sched_costly_dep)
2188 case max_dep_latency:
2189 costly_str = "max_dep_latency";
2190 break;
2192 case no_dep_costly:
2193 costly_str = "no_dep_costly";
2194 break;
2196 case all_deps_costly:
2197 costly_str = "all_deps_costly";
2198 break;
2200 case true_store_to_load_dep_costly:
2201 costly_str = "true_store_to_load_dep_costly";
2202 break;
2204 case store_to_load_dep_costly:
2205 costly_str = "store_to_load_dep_costly";
2206 break;
2208 default:
2209 costly_str = costly_num;
2210 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2211 break;
2214 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2216 switch (rs6000_sched_insert_nops)
2218 case sched_finish_regroup_exact:
2219 nop_str = "sched_finish_regroup_exact";
2220 break;
2222 case sched_finish_pad_groups:
2223 nop_str = "sched_finish_pad_groups";
2224 break;
2226 case sched_finish_none:
2227 nop_str = "sched_finish_none";
2228 break;
2230 default:
2231 nop_str = nop_num;
2232 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2233 break;
2236 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2238 switch (rs6000_sdata)
2240 default:
2241 case SDATA_NONE:
2242 break;
2244 case SDATA_DATA:
2245 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2246 break;
2248 case SDATA_SYSV:
2249 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2250 break;
2252 case SDATA_EABI:
2253 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2254 break;
2258 switch (rs6000_traceback)
2260 case traceback_default: trace_str = "default"; break;
2261 case traceback_none: trace_str = "none"; break;
2262 case traceback_part: trace_str = "part"; break;
2263 case traceback_full: trace_str = "full"; break;
2264 default: trace_str = "unknown"; break;
2267 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2269 switch (rs6000_current_cmodel)
2271 case CMODEL_SMALL: cmodel_str = "small"; break;
2272 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2273 case CMODEL_LARGE: cmodel_str = "large"; break;
2274 default: cmodel_str = "unknown"; break;
2277 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2279 switch (rs6000_current_abi)
2281 case ABI_NONE: abi_str = "none"; break;
2282 case ABI_AIX: abi_str = "aix"; break;
2283 case ABI_ELFv2: abi_str = "ELFv2"; break;
2284 case ABI_V4: abi_str = "V4"; break;
2285 case ABI_DARWIN: abi_str = "darwin"; break;
2286 default: abi_str = "unknown"; break;
2289 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2291 if (rs6000_altivec_abi)
2292 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2294 if (rs6000_spe_abi)
2295 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2297 if (rs6000_darwin64_abi)
2298 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2300 if (rs6000_float_gprs)
2301 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2303 fprintf (stderr, DEBUG_FMT_S, "fprs",
2304 (TARGET_FPRS ? "true" : "false"));
2306 fprintf (stderr, DEBUG_FMT_S, "single_float",
2307 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2309 fprintf (stderr, DEBUG_FMT_S, "double_float",
2310 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2312 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2313 (TARGET_SOFT_FLOAT ? "true" : "false"));
2315 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2316 (TARGET_E500_SINGLE ? "true" : "false"));
2318 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2319 (TARGET_E500_DOUBLE ? "true" : "false"));
2321 if (TARGET_LINK_STACK)
2322 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2324 if (targetm.lra_p ())
2325 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2327 if (TARGET_P8_FUSION)
2328 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2329 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2331 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2332 TARGET_SECURE_PLT ? "secure" : "bss");
2333 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2334 aix_struct_return ? "aix" : "sysv");
2335 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2336 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2337 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2338 tf[!!rs6000_align_branch_targets]);
2339 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2340 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2341 rs6000_long_double_type_size);
2342 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2343 (int)rs6000_sched_restricted_insns_priority);
2344 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2345 (int)END_BUILTINS);
2346 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2347 (int)RS6000_BUILTIN_COUNT);
2349 if (TARGET_VSX)
2350 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2351 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2355 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2356 legitimate address support to figure out the appropriate addressing to
2357 use. */
2359 static void
2360 rs6000_setup_reg_addr_masks (void)
2362 ssize_t rc, reg, m, nregs;
2363 addr_mask_type any_addr_mask, addr_mask;
2365 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2367 enum machine_mode m2 = (enum machine_mode)m;
2369 /* SDmode is special in that we want to access it only via REG+REG
2370 addressing on power7 and above, since we want to use the LFIWZX and
2371 STFIWZX instructions to load it. */
2372 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2374 any_addr_mask = 0;
2375 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2377 addr_mask = 0;
2378 reg = reload_reg_map[rc].reg;
2380 /* Can mode values go in the GPR/FPR/Altivec registers? */
2381 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2383 nregs = rs6000_hard_regno_nregs[m][reg];
2384 addr_mask |= RELOAD_REG_VALID;
2386 /* Indicate if the mode takes more than 1 physical register. If
2387 it takes a single register, indicate it can do REG+REG
2388 addressing. */
2389 if (nregs > 1 || m == BLKmode)
2390 addr_mask |= RELOAD_REG_MULTIPLE;
2391 else
2392 addr_mask |= RELOAD_REG_INDEXED;
2394 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2395 addressing. Restrict addressing on SPE for 64-bit types
2396 because of the SUBREG hackery used to address 64-bit floats in
2397 '32-bit' GPRs. To simplify secondary reload, don't allow
2398 update forms on scalar floating point types that can go in the
2399 upper registers. */
2401 if (TARGET_UPDATE
2402 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2403 && GET_MODE_SIZE (m2) <= 8
2404 && !VECTOR_MODE_P (m2)
2405 && !COMPLEX_MODE_P (m2)
2406 && !indexed_only_p
2407 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8)
2408 && !reg_addr[m2].scalar_in_vmx_p)
2410 addr_mask |= RELOAD_REG_PRE_INCDEC;
2412 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2413 we don't allow PRE_MODIFY for some multi-register
2414 operations. */
2415 switch (m)
2417 default:
2418 addr_mask |= RELOAD_REG_PRE_MODIFY;
2419 break;
2421 case DImode:
2422 if (TARGET_POWERPC64)
2423 addr_mask |= RELOAD_REG_PRE_MODIFY;
2424 break;
2426 case DFmode:
2427 case DDmode:
2428 if (TARGET_DF_INSN)
2429 addr_mask |= RELOAD_REG_PRE_MODIFY;
2430 break;
2435 /* GPR and FPR registers can do REG+OFFSET addressing, except
2436 possibly for SDmode. */
2437 if ((addr_mask != 0) && !indexed_only_p
2438 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2439 addr_mask |= RELOAD_REG_OFFSET;
2441 reg_addr[m].addr_mask[rc] = addr_mask;
2442 any_addr_mask |= addr_mask;
2445 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2450 /* Initialize the various global tables that are based on register size. */
2451 static void
2452 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2454 ssize_t r, m, c;
2455 int align64;
2456 int align32;
2458 /* Precalculate REGNO_REG_CLASS. */
2459 rs6000_regno_regclass[0] = GENERAL_REGS;
2460 for (r = 1; r < 32; ++r)
2461 rs6000_regno_regclass[r] = BASE_REGS;
2463 for (r = 32; r < 64; ++r)
2464 rs6000_regno_regclass[r] = FLOAT_REGS;
2466 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2467 rs6000_regno_regclass[r] = NO_REGS;
2469 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2470 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2472 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2473 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2474 rs6000_regno_regclass[r] = CR_REGS;
2476 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2477 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2478 rs6000_regno_regclass[CA_REGNO] = CA_REGS;
2479 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2480 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2481 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2482 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2483 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2484 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2485 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2486 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2487 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2489 /* Precalculate register class to simpler reload register class. We don't
2490 need all of the register classes that are combinations of different
2491 classes, just the simple ones that have constraint letters. */
2492 for (c = 0; c < N_REG_CLASSES; c++)
2493 reg_class_to_reg_type[c] = NO_REG_TYPE;
2495 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2496 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2497 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2498 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2499 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2500 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2501 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2502 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2503 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2504 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2505 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2506 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2508 if (TARGET_VSX)
2510 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2511 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2513 else
2515 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2516 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2519 /* Precalculate the valid memory formats as well as the vector information,
2520 this must be set up before the rs6000_hard_regno_nregs_internal calls
2521 below. */
2522 gcc_assert ((int)VECTOR_NONE == 0);
2523 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2524 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2526 gcc_assert ((int)CODE_FOR_nothing == 0);
2527 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2529 gcc_assert ((int)NO_REGS == 0);
2530 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2532 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2533 believes it can use native alignment or still uses 128-bit alignment. */
2534 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2536 align64 = 64;
2537 align32 = 32;
2539 else
2541 align64 = 128;
2542 align32 = 128;
2545 /* V2DF mode, VSX only. */
2546 if (TARGET_VSX)
2548 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2549 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2550 rs6000_vector_align[V2DFmode] = align64;
2553 /* V4SF mode, either VSX or Altivec. */
2554 if (TARGET_VSX)
2556 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2557 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2558 rs6000_vector_align[V4SFmode] = align32;
2560 else if (TARGET_ALTIVEC)
2562 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2563 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2564 rs6000_vector_align[V4SFmode] = align32;
2567 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2568 and stores. */
2569 if (TARGET_ALTIVEC)
2571 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2572 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2573 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2574 rs6000_vector_align[V4SImode] = align32;
2575 rs6000_vector_align[V8HImode] = align32;
2576 rs6000_vector_align[V16QImode] = align32;
2578 if (TARGET_VSX)
2580 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2581 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2582 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2584 else
2586 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2587 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2588 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2592 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2593 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2594 if (TARGET_VSX)
2596 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2597 rs6000_vector_unit[V2DImode]
2598 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2599 rs6000_vector_align[V2DImode] = align64;
2601 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2602 rs6000_vector_unit[V1TImode]
2603 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2604 rs6000_vector_align[V1TImode] = 128;
2607 /* DFmode, see if we want to use the VSX unit. */
2608 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2610 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2611 rs6000_vector_mem[DFmode]
2612 = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
2613 rs6000_vector_align[DFmode] = align64;
2616 /* Allow TImode in VSX register and set the VSX memory macros. */
2617 if (TARGET_VSX && TARGET_VSX_TIMODE)
2619 rs6000_vector_mem[TImode] = VECTOR_VSX;
2620 rs6000_vector_align[TImode] = align64;
2623 /* TODO add SPE and paired floating point vector support. */
2625 /* Register class constraints for the constraints that depend on compile
2626 switches. When the VSX code was added, different constraints were added
2627 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2628 of the VSX registers are used. The register classes for scalar floating
2629 point types is set, based on whether we allow that type into the upper
2630 (Altivec) registers. GCC has register classes to target the Altivec
2631 registers for load/store operations, to select using a VSX memory
2632 operation instead of the traditional floating point operation. The
2633 constraints are:
2635 d - Register class to use with traditional DFmode instructions.
2636 f - Register class to use with traditional SFmode instructions.
2637 v - Altivec register.
2638 wa - Any VSX register.
2639 wc - Reserved to represent individual CR bits (used in LLVM).
2640 wd - Preferred register class for V2DFmode.
2641 wf - Preferred register class for V4SFmode.
2642 wg - Float register for power6x move insns.
2643 wh - FP register for direct move instructions.
2644 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2645 wj - FP or VSX register to hold 64-bit integers for direct moves.
2646 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2647 wl - Float register if we can do 32-bit signed int loads.
2648 wm - VSX register for ISA 2.07 direct move operations.
2649 wn - always NO_REGS.
2650 wr - GPR if 64-bit mode is permitted.
2651 ws - Register class to do ISA 2.06 DF operations.
2652 wt - VSX register for TImode in VSX registers.
2653 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2654 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2655 ww - Register class to do SF conversions in with VSX operations.
2656 wx - Float register if we can do 32-bit int stores.
2657 wy - Register class to do ISA 2.07 SF operations.
2658 wz - Float register if we can do 32-bit unsigned int loads. */
2660 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2661 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2663 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2664 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2666 if (TARGET_VSX)
2668 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2669 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2670 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2671 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2673 if (TARGET_VSX_TIMODE)
2674 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2676 if (TARGET_UPPER_REGS_DF) /* DFmode */
2678 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2679 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2681 else
2682 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2685 /* Add conditional constraints based on various options, to allow us to
2686 collapse multiple insn patterns. */
2687 if (TARGET_ALTIVEC)
2688 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2690 if (TARGET_MFPGPR) /* DFmode */
2691 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2693 if (TARGET_LFIWAX)
2694 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2696 if (TARGET_DIRECT_MOVE)
2698 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2699 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2700 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2701 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2702 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2703 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2706 if (TARGET_POWERPC64)
2707 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2709 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2711 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2712 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2713 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2715 else if (TARGET_P8_VECTOR)
2717 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2718 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2720 else if (TARGET_VSX)
2721 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2723 if (TARGET_STFIWX)
2724 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2726 if (TARGET_LFIWZX)
2727 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2729 /* Set up the reload helper and direct move functions. */
2730 if (TARGET_VSX || TARGET_ALTIVEC)
2732 if (TARGET_64BIT)
2734 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2735 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2736 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2737 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2738 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2739 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2740 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2741 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2742 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2743 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2744 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2745 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2746 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2747 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2748 if (TARGET_VSX && TARGET_UPPER_REGS_DF)
2750 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2751 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2752 reg_addr[DFmode].scalar_in_vmx_p = true;
2753 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2754 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2756 if (TARGET_P8_VECTOR)
2758 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2759 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2760 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2761 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2762 if (TARGET_UPPER_REGS_SF)
2763 reg_addr[SFmode].scalar_in_vmx_p = true;
2765 if (TARGET_VSX_TIMODE)
2767 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2768 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2770 if (TARGET_DIRECT_MOVE)
2772 if (TARGET_POWERPC64)
2774 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2775 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2776 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2777 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2778 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2779 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2780 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2781 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2782 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2784 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2785 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2786 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2787 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2788 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2789 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2790 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2791 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2792 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2794 else
2796 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2797 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2798 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2802 else
2804 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2805 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2806 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2807 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2808 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2809 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2810 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2811 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2812 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2813 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2814 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2815 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2816 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2817 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2818 if (TARGET_VSX && TARGET_UPPER_REGS_DF)
2820 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2821 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2822 reg_addr[DFmode].scalar_in_vmx_p = true;
2823 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2824 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2826 if (TARGET_P8_VECTOR)
2828 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2829 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2830 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2831 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2832 if (TARGET_UPPER_REGS_SF)
2833 reg_addr[SFmode].scalar_in_vmx_p = true;
2835 if (TARGET_VSX_TIMODE)
2837 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2838 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2843 /* Precalculate HARD_REGNO_NREGS. */
2844 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2845 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2846 rs6000_hard_regno_nregs[m][r]
2847 = rs6000_hard_regno_nregs_internal (r, (enum machine_mode)m);
2849 /* Precalculate HARD_REGNO_MODE_OK. */
2850 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2851 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2852 if (rs6000_hard_regno_mode_ok (r, (enum machine_mode)m))
2853 rs6000_hard_regno_mode_ok_p[m][r] = true;
2855 /* Precalculate CLASS_MAX_NREGS sizes. */
2856 for (c = 0; c < LIM_REG_CLASSES; ++c)
2858 int reg_size;
2860 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2861 reg_size = UNITS_PER_VSX_WORD;
2863 else if (c == ALTIVEC_REGS)
2864 reg_size = UNITS_PER_ALTIVEC_WORD;
2866 else if (c == FLOAT_REGS)
2867 reg_size = UNITS_PER_FP_WORD;
2869 else
2870 reg_size = UNITS_PER_WORD;
2872 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2874 enum machine_mode m2 = (enum machine_mode)m;
2875 int reg_size2 = reg_size;
2877 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2878 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2879 && (m == TDmode || m == TFmode))
2880 reg_size2 = UNITS_PER_FP_WORD;
2882 rs6000_class_max_nregs[m][c]
2883 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2887 if (TARGET_E500_DOUBLE)
2888 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2890 /* Calculate which modes to automatically generate code to use a the
2891 reciprocal divide and square root instructions. In the future, possibly
2892 automatically generate the instructions even if the user did not specify
2893 -mrecip. The older machines double precision reciprocal sqrt estimate is
2894 not accurate enough. */
2895 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2896 if (TARGET_FRES)
2897 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2898 if (TARGET_FRE)
2899 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2900 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2901 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2902 if (VECTOR_UNIT_VSX_P (V2DFmode))
2903 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2905 if (TARGET_FRSQRTES)
2906 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2907 if (TARGET_FRSQRTE)
2908 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2909 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2910 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2911 if (VECTOR_UNIT_VSX_P (V2DFmode))
2912 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2914 if (rs6000_recip_control)
2916 if (!flag_finite_math_only)
2917 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
2918 if (flag_trapping_math)
2919 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
2920 if (!flag_reciprocal_math)
2921 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
2922 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
2924 if (RS6000_RECIP_HAVE_RE_P (SFmode)
2925 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
2926 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2928 if (RS6000_RECIP_HAVE_RE_P (DFmode)
2929 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
2930 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2932 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
2933 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
2934 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2936 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
2937 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
2938 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2940 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
2941 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
2942 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2944 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
2945 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
2946 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2948 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
2949 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
2950 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2952 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
2953 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
2954 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2958 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2959 legitimate address support to figure out the appropriate addressing to
2960 use. */
2961 rs6000_setup_reg_addr_masks ();
2963 if (global_init_p || TARGET_DEBUG_TARGET)
2965 if (TARGET_DEBUG_REG)
2966 rs6000_debug_reg_global ();
2968 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
2969 fprintf (stderr,
2970 "SImode variable mult cost = %d\n"
2971 "SImode constant mult cost = %d\n"
2972 "SImode short constant mult cost = %d\n"
2973 "DImode multipliciation cost = %d\n"
2974 "SImode division cost = %d\n"
2975 "DImode division cost = %d\n"
2976 "Simple fp operation cost = %d\n"
2977 "DFmode multiplication cost = %d\n"
2978 "SFmode division cost = %d\n"
2979 "DFmode division cost = %d\n"
2980 "cache line size = %d\n"
2981 "l1 cache size = %d\n"
2982 "l2 cache size = %d\n"
2983 "simultaneous prefetches = %d\n"
2984 "\n",
2985 rs6000_cost->mulsi,
2986 rs6000_cost->mulsi_const,
2987 rs6000_cost->mulsi_const9,
2988 rs6000_cost->muldi,
2989 rs6000_cost->divsi,
2990 rs6000_cost->divdi,
2991 rs6000_cost->fp,
2992 rs6000_cost->dmul,
2993 rs6000_cost->sdiv,
2994 rs6000_cost->ddiv,
2995 rs6000_cost->cache_line_size,
2996 rs6000_cost->l1_cache_size,
2997 rs6000_cost->l2_cache_size,
2998 rs6000_cost->simultaneous_prefetches);
3002 #if TARGET_MACHO
3003 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3005 static void
3006 darwin_rs6000_override_options (void)
3008 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3009 off. */
3010 rs6000_altivec_abi = 1;
3011 TARGET_ALTIVEC_VRSAVE = 1;
3012 rs6000_current_abi = ABI_DARWIN;
3014 if (DEFAULT_ABI == ABI_DARWIN
3015 && TARGET_64BIT)
3016 darwin_one_byte_bool = 1;
3018 if (TARGET_64BIT && ! TARGET_POWERPC64)
3020 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3021 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3023 if (flag_mkernel)
3025 rs6000_default_long_calls = 1;
3026 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3029 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3030 Altivec. */
3031 if (!flag_mkernel && !flag_apple_kext
3032 && TARGET_64BIT
3033 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3034 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3036 /* Unless the user (not the configurer) has explicitly overridden
3037 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3038 G4 unless targeting the kernel. */
3039 if (!flag_mkernel
3040 && !flag_apple_kext
3041 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3042 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3043 && ! global_options_set.x_rs6000_cpu_index)
3045 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3048 #endif
3050 /* If not otherwise specified by a target, make 'long double' equivalent to
3051 'double'. */
3053 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3054 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3055 #endif
3057 /* Return the builtin mask of the various options used that could affect which
3058 builtins were used. In the past we used target_flags, but we've run out of
3059 bits, and some options like SPE and PAIRED are no longer in
3060 target_flags. */
3062 HOST_WIDE_INT
3063 rs6000_builtin_mask_calculate (void)
3065 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3066 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3067 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3068 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3069 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3070 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3071 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3072 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3073 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3074 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3075 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3076 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3077 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3078 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3079 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3080 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3083 /* Override command line options. Mostly we process the processor type and
3084 sometimes adjust other TARGET_ options. */
3086 static bool
3087 rs6000_option_override_internal (bool global_init_p)
3089 bool ret = true;
3090 bool have_cpu = false;
3092 /* The default cpu requested at configure time, if any. */
3093 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3095 HOST_WIDE_INT set_masks;
3096 int cpu_index;
3097 int tune_index;
3098 struct cl_target_option *main_target_opt
3099 = ((global_init_p || target_option_default_node == NULL)
3100 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3102 /* Remember the explicit arguments. */
3103 if (global_init_p)
3104 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3106 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3107 library functions, so warn about it. The flag may be useful for
3108 performance studies from time to time though, so don't disable it
3109 entirely. */
3110 if (global_options_set.x_rs6000_alignment_flags
3111 && rs6000_alignment_flags == MASK_ALIGN_POWER
3112 && DEFAULT_ABI == ABI_DARWIN
3113 && TARGET_64BIT)
3114 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3115 " it is incompatible with the installed C and C++ libraries");
3117 /* Numerous experiment shows that IRA based loop pressure
3118 calculation works better for RTL loop invariant motion on targets
3119 with enough (>= 32) registers. It is an expensive optimization.
3120 So it is on only for peak performance. */
3121 if (optimize >= 3 && global_init_p
3122 && !global_options_set.x_flag_ira_loop_pressure)
3123 flag_ira_loop_pressure = 1;
3125 /* Set the pointer size. */
3126 if (TARGET_64BIT)
3128 rs6000_pmode = (int)DImode;
3129 rs6000_pointer_size = 64;
3131 else
3133 rs6000_pmode = (int)SImode;
3134 rs6000_pointer_size = 32;
3137 /* Some OSs don't support saving the high part of 64-bit registers on context
3138 switch. Other OSs don't support saving Altivec registers. On those OSs,
3139 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3140 if the user wants either, the user must explicitly specify them and we
3141 won't interfere with the user's specification. */
3143 set_masks = POWERPC_MASKS;
3144 #ifdef OS_MISSING_POWERPC64
3145 if (OS_MISSING_POWERPC64)
3146 set_masks &= ~OPTION_MASK_POWERPC64;
3147 #endif
3148 #ifdef OS_MISSING_ALTIVEC
3149 if (OS_MISSING_ALTIVEC)
3150 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3151 #endif
3153 /* Don't override by the processor default if given explicitly. */
3154 set_masks &= ~rs6000_isa_flags_explicit;
3156 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3157 the cpu in a target attribute or pragma, but did not specify a tuning
3158 option, use the cpu for the tuning option rather than the option specified
3159 with -mtune on the command line. Process a '--with-cpu' configuration
3160 request as an implicit --cpu. */
3161 if (rs6000_cpu_index >= 0)
3163 cpu_index = rs6000_cpu_index;
3164 have_cpu = true;
3166 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3168 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3169 have_cpu = true;
3171 else if (implicit_cpu)
3173 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3174 have_cpu = true;
3176 else
3178 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3179 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3180 have_cpu = false;
3183 gcc_assert (cpu_index >= 0);
3185 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3186 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3187 with those from the cpu, except for options that were explicitly set. If
3188 we don't have a cpu, do not override the target bits set in
3189 TARGET_DEFAULT. */
3190 if (have_cpu)
3192 rs6000_isa_flags &= ~set_masks;
3193 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3194 & set_masks);
3196 else
3197 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3198 & ~rs6000_isa_flags_explicit);
3200 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3201 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3202 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3203 to using rs6000_isa_flags, we need to do the initialization here. */
3204 if (!have_cpu)
3205 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3207 if (rs6000_tune_index >= 0)
3208 tune_index = rs6000_tune_index;
3209 else if (have_cpu)
3210 rs6000_tune_index = tune_index = cpu_index;
3211 else
3213 size_t i;
3214 enum processor_type tune_proc
3215 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3217 tune_index = -1;
3218 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3219 if (processor_target_table[i].processor == tune_proc)
3221 rs6000_tune_index = tune_index = i;
3222 break;
3226 gcc_assert (tune_index >= 0);
3227 rs6000_cpu = processor_target_table[tune_index].processor;
3229 /* Pick defaults for SPE related control flags. Do this early to make sure
3230 that the TARGET_ macros are representative ASAP. */
3232 int spe_capable_cpu =
3233 (rs6000_cpu == PROCESSOR_PPC8540
3234 || rs6000_cpu == PROCESSOR_PPC8548);
3236 if (!global_options_set.x_rs6000_spe_abi)
3237 rs6000_spe_abi = spe_capable_cpu;
3239 if (!global_options_set.x_rs6000_spe)
3240 rs6000_spe = spe_capable_cpu;
3242 if (!global_options_set.x_rs6000_float_gprs)
3243 rs6000_float_gprs =
3244 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3245 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3246 : 0);
3249 if (global_options_set.x_rs6000_spe_abi
3250 && rs6000_spe_abi
3251 && !TARGET_SPE_ABI)
3252 error ("not configured for SPE ABI");
3254 if (global_options_set.x_rs6000_spe
3255 && rs6000_spe
3256 && !TARGET_SPE)
3257 error ("not configured for SPE instruction set");
3259 if (main_target_opt != NULL
3260 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3261 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3262 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3263 error ("target attribute or pragma changes SPE ABI");
3265 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3266 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3267 || rs6000_cpu == PROCESSOR_PPCE5500)
3269 if (TARGET_ALTIVEC)
3270 error ("AltiVec not supported in this target");
3271 if (TARGET_SPE)
3272 error ("SPE not supported in this target");
3274 if (rs6000_cpu == PROCESSOR_PPCE6500)
3276 if (TARGET_SPE)
3277 error ("SPE not supported in this target");
3280 /* Disable Cell microcode if we are optimizing for the Cell
3281 and not optimizing for size. */
3282 if (rs6000_gen_cell_microcode == -1)
3283 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3284 && !optimize_size);
3286 /* If we are optimizing big endian systems for space and it's OK to
3287 use instructions that would be microcoded on the Cell, use the
3288 load/store multiple and string instructions. */
3289 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3290 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3291 | OPTION_MASK_STRING);
3293 /* Don't allow -mmultiple or -mstring on little endian systems
3294 unless the cpu is a 750, because the hardware doesn't support the
3295 instructions used in little endian mode, and causes an alignment
3296 trap. The 750 does not cause an alignment trap (except when the
3297 target is unaligned). */
3299 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3301 if (TARGET_MULTIPLE)
3303 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3304 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3305 warning (0, "-mmultiple is not supported on little endian systems");
3308 if (TARGET_STRING)
3310 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3311 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3312 warning (0, "-mstring is not supported on little endian systems");
3316 /* If little-endian, default to -mstrict-align on older processors.
3317 Testing for htm matches power8 and later. */
3318 if (!BYTES_BIG_ENDIAN
3319 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3320 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3322 /* -maltivec={le,be} implies -maltivec. */
3323 if (rs6000_altivec_element_order != 0)
3324 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3326 /* Disallow -maltivec=le in big endian mode for now. This is not
3327 known to be useful for anyone. */
3328 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3330 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3331 rs6000_altivec_element_order = 0;
3334 /* Add some warnings for VSX. */
3335 if (TARGET_VSX)
3337 const char *msg = NULL;
3338 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3339 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3341 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3342 msg = N_("-mvsx requires hardware floating point");
3343 else
3345 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3346 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3349 else if (TARGET_PAIRED_FLOAT)
3350 msg = N_("-mvsx and -mpaired are incompatible");
3351 else if (TARGET_AVOID_XFORM > 0)
3352 msg = N_("-mvsx needs indexed addressing");
3353 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3354 & OPTION_MASK_ALTIVEC))
3356 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3357 msg = N_("-mvsx and -mno-altivec are incompatible");
3358 else
3359 msg = N_("-mno-altivec disables vsx");
3362 if (msg)
3364 warning (0, msg);
3365 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3366 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3370 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3371 the -mcpu setting to enable options that conflict. */
3372 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3373 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3374 | OPTION_MASK_ALTIVEC
3375 | OPTION_MASK_VSX)) != 0)
3376 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3377 | OPTION_MASK_DIRECT_MOVE)
3378 & ~rs6000_isa_flags_explicit);
3380 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3381 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3383 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3384 unless the user explicitly used the -mno-<option> to disable the code. */
3385 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3386 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3387 else if (TARGET_VSX)
3388 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3389 else if (TARGET_POPCNTD)
3390 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3391 else if (TARGET_DFP)
3392 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3393 else if (TARGET_CMPB)
3394 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3395 else if (TARGET_FPRND)
3396 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3397 else if (TARGET_POPCNTB)
3398 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3399 else if (TARGET_ALTIVEC)
3400 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3402 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3404 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3405 error ("-mcrypto requires -maltivec");
3406 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3409 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3411 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3412 error ("-mdirect-move requires -mvsx");
3413 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3416 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3418 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3419 error ("-mpower8-vector requires -maltivec");
3420 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3423 if (TARGET_P8_VECTOR && !TARGET_VSX)
3425 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3426 error ("-mpower8-vector requires -mvsx");
3427 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3430 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3432 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3433 error ("-mvsx-timode requires -mvsx");
3434 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3437 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3439 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3440 error ("-mhard-dfp requires -mhard-float");
3441 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3444 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3445 silently turn off quad memory mode. */
3446 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3448 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3449 warning (0, N_("-mquad-memory requires 64-bit mode"));
3451 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3452 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3454 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3455 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3458 /* Non-atomic quad memory load/store are disabled for little endian, since
3459 the words are reversed, but atomic operations can still be done by
3460 swapping the words. */
3461 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3463 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3464 warning (0, N_("-mquad-memory is not available in little endian mode"));
3466 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3469 /* Assume if the user asked for normal quad memory instructions, they want
3470 the atomic versions as well, unless they explicity told us not to use quad
3471 word atomic instructions. */
3472 if (TARGET_QUAD_MEMORY
3473 && !TARGET_QUAD_MEMORY_ATOMIC
3474 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3475 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3477 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3478 generating power8 instructions. */
3479 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3480 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3481 & OPTION_MASK_P8_FUSION);
3483 /* Power8 does not fuse sign extended loads with the addis. If we are
3484 optimizing at high levels for speed, convert a sign extended load into a
3485 zero extending load, and an explicit sign extension. */
3486 if (TARGET_P8_FUSION
3487 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3488 && optimize_function_for_speed_p (cfun)
3489 && optimize >= 3)
3490 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3492 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3493 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3495 /* E500mc does "better" if we inline more aggressively. Respect the
3496 user's opinion, though. */
3497 if (rs6000_block_move_inline_limit == 0
3498 && (rs6000_cpu == PROCESSOR_PPCE500MC
3499 || rs6000_cpu == PROCESSOR_PPCE500MC64
3500 || rs6000_cpu == PROCESSOR_PPCE5500
3501 || rs6000_cpu == PROCESSOR_PPCE6500))
3502 rs6000_block_move_inline_limit = 128;
3504 /* store_one_arg depends on expand_block_move to handle at least the
3505 size of reg_parm_stack_space. */
3506 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3507 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3509 if (global_init_p)
3511 /* If the appropriate debug option is enabled, replace the target hooks
3512 with debug versions that call the real version and then prints
3513 debugging information. */
3514 if (TARGET_DEBUG_COST)
3516 targetm.rtx_costs = rs6000_debug_rtx_costs;
3517 targetm.address_cost = rs6000_debug_address_cost;
3518 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3521 if (TARGET_DEBUG_ADDR)
3523 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3524 targetm.legitimize_address = rs6000_debug_legitimize_address;
3525 rs6000_secondary_reload_class_ptr
3526 = rs6000_debug_secondary_reload_class;
3527 rs6000_secondary_memory_needed_ptr
3528 = rs6000_debug_secondary_memory_needed;
3529 rs6000_cannot_change_mode_class_ptr
3530 = rs6000_debug_cannot_change_mode_class;
3531 rs6000_preferred_reload_class_ptr
3532 = rs6000_debug_preferred_reload_class;
3533 rs6000_legitimize_reload_address_ptr
3534 = rs6000_debug_legitimize_reload_address;
3535 rs6000_mode_dependent_address_ptr
3536 = rs6000_debug_mode_dependent_address;
3539 if (rs6000_veclibabi_name)
3541 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3542 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3543 else
3545 error ("unknown vectorization library ABI type (%s) for "
3546 "-mveclibabi= switch", rs6000_veclibabi_name);
3547 ret = false;
3552 if (!global_options_set.x_rs6000_long_double_type_size)
3554 if (main_target_opt != NULL
3555 && (main_target_opt->x_rs6000_long_double_type_size
3556 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3557 error ("target attribute or pragma changes long double size");
3558 else
3559 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3562 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3563 if (!global_options_set.x_rs6000_ieeequad)
3564 rs6000_ieeequad = 1;
3565 #endif
3567 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3568 target attribute or pragma which automatically enables both options,
3569 unless the altivec ABI was set. This is set by default for 64-bit, but
3570 not for 32-bit. */
3571 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3572 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3573 & ~rs6000_isa_flags_explicit);
3575 /* Enable Altivec ABI for AIX -maltivec. */
3576 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3578 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3579 error ("target attribute or pragma changes AltiVec ABI");
3580 else
3581 rs6000_altivec_abi = 1;
3584 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3585 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3586 be explicitly overridden in either case. */
3587 if (TARGET_ELF)
3589 if (!global_options_set.x_rs6000_altivec_abi
3590 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3592 if (main_target_opt != NULL &&
3593 !main_target_opt->x_rs6000_altivec_abi)
3594 error ("target attribute or pragma changes AltiVec ABI");
3595 else
3596 rs6000_altivec_abi = 1;
3600 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3601 So far, the only darwin64 targets are also MACH-O. */
3602 if (TARGET_MACHO
3603 && DEFAULT_ABI == ABI_DARWIN
3604 && TARGET_64BIT)
3606 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3607 error ("target attribute or pragma changes darwin64 ABI");
3608 else
3610 rs6000_darwin64_abi = 1;
3611 /* Default to natural alignment, for better performance. */
3612 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3616 /* Place FP constants in the constant pool instead of TOC
3617 if section anchors enabled. */
3618 if (flag_section_anchors
3619 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3620 TARGET_NO_FP_IN_TOC = 1;
3622 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3623 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3625 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3626 SUBTARGET_OVERRIDE_OPTIONS;
3627 #endif
3628 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3629 SUBSUBTARGET_OVERRIDE_OPTIONS;
3630 #endif
3631 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3632 SUB3TARGET_OVERRIDE_OPTIONS;
3633 #endif
3635 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3636 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3638 /* For the E500 family of cores, reset the single/double FP flags to let us
3639 check that they remain constant across attributes or pragmas. Also,
3640 clear a possible request for string instructions, not supported and which
3641 we might have silently queried above for -Os.
3643 For other families, clear ISEL in case it was set implicitly.
3646 switch (rs6000_cpu)
3648 case PROCESSOR_PPC8540:
3649 case PROCESSOR_PPC8548:
3650 case PROCESSOR_PPCE500MC:
3651 case PROCESSOR_PPCE500MC64:
3652 case PROCESSOR_PPCE5500:
3653 case PROCESSOR_PPCE6500:
3655 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3656 rs6000_double_float = TARGET_E500_DOUBLE;
3658 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3660 break;
3662 default:
3664 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3665 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3667 break;
3670 if (main_target_opt)
3672 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3673 error ("target attribute or pragma changes single precision floating "
3674 "point");
3675 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3676 error ("target attribute or pragma changes double precision floating "
3677 "point");
3680 /* Detect invalid option combinations with E500. */
3681 CHECK_E500_OPTIONS;
3683 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3684 && rs6000_cpu != PROCESSOR_POWER5
3685 && rs6000_cpu != PROCESSOR_POWER6
3686 && rs6000_cpu != PROCESSOR_POWER7
3687 && rs6000_cpu != PROCESSOR_POWER8
3688 && rs6000_cpu != PROCESSOR_PPCA2
3689 && rs6000_cpu != PROCESSOR_CELL
3690 && rs6000_cpu != PROCESSOR_PPC476);
3691 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3692 || rs6000_cpu == PROCESSOR_POWER5
3693 || rs6000_cpu == PROCESSOR_POWER7
3694 || rs6000_cpu == PROCESSOR_POWER8);
3695 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3696 || rs6000_cpu == PROCESSOR_POWER5
3697 || rs6000_cpu == PROCESSOR_POWER6
3698 || rs6000_cpu == PROCESSOR_POWER7
3699 || rs6000_cpu == PROCESSOR_POWER8
3700 || rs6000_cpu == PROCESSOR_PPCE500MC
3701 || rs6000_cpu == PROCESSOR_PPCE500MC64
3702 || rs6000_cpu == PROCESSOR_PPCE5500
3703 || rs6000_cpu == PROCESSOR_PPCE6500);
3705 /* Allow debug switches to override the above settings. These are set to -1
3706 in rs6000.opt to indicate the user hasn't directly set the switch. */
3707 if (TARGET_ALWAYS_HINT >= 0)
3708 rs6000_always_hint = TARGET_ALWAYS_HINT;
3710 if (TARGET_SCHED_GROUPS >= 0)
3711 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3713 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3714 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3716 rs6000_sched_restricted_insns_priority
3717 = (rs6000_sched_groups ? 1 : 0);
3719 /* Handle -msched-costly-dep option. */
3720 rs6000_sched_costly_dep
3721 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3723 if (rs6000_sched_costly_dep_str)
3725 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3726 rs6000_sched_costly_dep = no_dep_costly;
3727 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3728 rs6000_sched_costly_dep = all_deps_costly;
3729 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3730 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3731 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3732 rs6000_sched_costly_dep = store_to_load_dep_costly;
3733 else
3734 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3735 atoi (rs6000_sched_costly_dep_str));
3738 /* Handle -minsert-sched-nops option. */
3739 rs6000_sched_insert_nops
3740 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3742 if (rs6000_sched_insert_nops_str)
3744 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3745 rs6000_sched_insert_nops = sched_finish_none;
3746 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3747 rs6000_sched_insert_nops = sched_finish_pad_groups;
3748 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3749 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3750 else
3751 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3752 atoi (rs6000_sched_insert_nops_str));
3755 if (global_init_p)
3757 #ifdef TARGET_REGNAMES
3758 /* If the user desires alternate register names, copy in the
3759 alternate names now. */
3760 if (TARGET_REGNAMES)
3761 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3762 #endif
3764 /* Set aix_struct_return last, after the ABI is determined.
3765 If -maix-struct-return or -msvr4-struct-return was explicitly
3766 used, don't override with the ABI default. */
3767 if (!global_options_set.x_aix_struct_return)
3768 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3770 #if 0
3771 /* IBM XL compiler defaults to unsigned bitfields. */
3772 if (TARGET_XL_COMPAT)
3773 flag_signed_bitfields = 0;
3774 #endif
3776 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3777 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3779 if (TARGET_TOC)
3780 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3782 /* We can only guarantee the availability of DI pseudo-ops when
3783 assembling for 64-bit targets. */
3784 if (!TARGET_64BIT)
3786 targetm.asm_out.aligned_op.di = NULL;
3787 targetm.asm_out.unaligned_op.di = NULL;
3791 /* Set branch target alignment, if not optimizing for size. */
3792 if (!optimize_size)
3794 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3795 aligned 8byte to avoid misprediction by the branch predictor. */
3796 if (rs6000_cpu == PROCESSOR_TITAN
3797 || rs6000_cpu == PROCESSOR_CELL)
3799 if (align_functions <= 0)
3800 align_functions = 8;
3801 if (align_jumps <= 0)
3802 align_jumps = 8;
3803 if (align_loops <= 0)
3804 align_loops = 8;
3806 if (rs6000_align_branch_targets)
3808 if (align_functions <= 0)
3809 align_functions = 16;
3810 if (align_jumps <= 0)
3811 align_jumps = 16;
3812 if (align_loops <= 0)
3814 can_override_loop_align = 1;
3815 align_loops = 16;
3818 if (align_jumps_max_skip <= 0)
3819 align_jumps_max_skip = 15;
3820 if (align_loops_max_skip <= 0)
3821 align_loops_max_skip = 15;
3824 /* Arrange to save and restore machine status around nested functions. */
3825 init_machine_status = rs6000_init_machine_status;
3827 /* We should always be splitting complex arguments, but we can't break
3828 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3829 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3830 targetm.calls.split_complex_arg = NULL;
3833 /* Initialize rs6000_cost with the appropriate target costs. */
3834 if (optimize_size)
3835 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3836 else
3837 switch (rs6000_cpu)
3839 case PROCESSOR_RS64A:
3840 rs6000_cost = &rs64a_cost;
3841 break;
3843 case PROCESSOR_MPCCORE:
3844 rs6000_cost = &mpccore_cost;
3845 break;
3847 case PROCESSOR_PPC403:
3848 rs6000_cost = &ppc403_cost;
3849 break;
3851 case PROCESSOR_PPC405:
3852 rs6000_cost = &ppc405_cost;
3853 break;
3855 case PROCESSOR_PPC440:
3856 rs6000_cost = &ppc440_cost;
3857 break;
3859 case PROCESSOR_PPC476:
3860 rs6000_cost = &ppc476_cost;
3861 break;
3863 case PROCESSOR_PPC601:
3864 rs6000_cost = &ppc601_cost;
3865 break;
3867 case PROCESSOR_PPC603:
3868 rs6000_cost = &ppc603_cost;
3869 break;
3871 case PROCESSOR_PPC604:
3872 rs6000_cost = &ppc604_cost;
3873 break;
3875 case PROCESSOR_PPC604e:
3876 rs6000_cost = &ppc604e_cost;
3877 break;
3879 case PROCESSOR_PPC620:
3880 rs6000_cost = &ppc620_cost;
3881 break;
3883 case PROCESSOR_PPC630:
3884 rs6000_cost = &ppc630_cost;
3885 break;
3887 case PROCESSOR_CELL:
3888 rs6000_cost = &ppccell_cost;
3889 break;
3891 case PROCESSOR_PPC750:
3892 case PROCESSOR_PPC7400:
3893 rs6000_cost = &ppc750_cost;
3894 break;
3896 case PROCESSOR_PPC7450:
3897 rs6000_cost = &ppc7450_cost;
3898 break;
3900 case PROCESSOR_PPC8540:
3901 case PROCESSOR_PPC8548:
3902 rs6000_cost = &ppc8540_cost;
3903 break;
3905 case PROCESSOR_PPCE300C2:
3906 case PROCESSOR_PPCE300C3:
3907 rs6000_cost = &ppce300c2c3_cost;
3908 break;
3910 case PROCESSOR_PPCE500MC:
3911 rs6000_cost = &ppce500mc_cost;
3912 break;
3914 case PROCESSOR_PPCE500MC64:
3915 rs6000_cost = &ppce500mc64_cost;
3916 break;
3918 case PROCESSOR_PPCE5500:
3919 rs6000_cost = &ppce5500_cost;
3920 break;
3922 case PROCESSOR_PPCE6500:
3923 rs6000_cost = &ppce6500_cost;
3924 break;
3926 case PROCESSOR_TITAN:
3927 rs6000_cost = &titan_cost;
3928 break;
3930 case PROCESSOR_POWER4:
3931 case PROCESSOR_POWER5:
3932 rs6000_cost = &power4_cost;
3933 break;
3935 case PROCESSOR_POWER6:
3936 rs6000_cost = &power6_cost;
3937 break;
3939 case PROCESSOR_POWER7:
3940 rs6000_cost = &power7_cost;
3941 break;
3943 case PROCESSOR_POWER8:
3944 rs6000_cost = &power8_cost;
3945 break;
3947 case PROCESSOR_PPCA2:
3948 rs6000_cost = &ppca2_cost;
3949 break;
3951 default:
3952 gcc_unreachable ();
3955 if (global_init_p)
3957 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3958 rs6000_cost->simultaneous_prefetches,
3959 global_options.x_param_values,
3960 global_options_set.x_param_values);
3961 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
3962 global_options.x_param_values,
3963 global_options_set.x_param_values);
3964 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3965 rs6000_cost->cache_line_size,
3966 global_options.x_param_values,
3967 global_options_set.x_param_values);
3968 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
3969 global_options.x_param_values,
3970 global_options_set.x_param_values);
3972 /* Increase loop peeling limits based on performance analysis. */
3973 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
3974 global_options.x_param_values,
3975 global_options_set.x_param_values);
3976 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
3977 global_options.x_param_values,
3978 global_options_set.x_param_values);
3980 /* If using typedef char *va_list, signal that
3981 __builtin_va_start (&ap, 0) can be optimized to
3982 ap = __builtin_next_arg (0). */
3983 if (DEFAULT_ABI != ABI_V4)
3984 targetm.expand_builtin_va_start = NULL;
3987 /* Set up single/double float flags.
3988 If TARGET_HARD_FLOAT is set, but neither single or double is set,
3989 then set both flags. */
3990 if (TARGET_HARD_FLOAT && TARGET_FPRS
3991 && rs6000_single_float == 0 && rs6000_double_float == 0)
3992 rs6000_single_float = rs6000_double_float = 1;
3994 /* If not explicitly specified via option, decide whether to generate indexed
3995 load/store instructions. */
3996 if (TARGET_AVOID_XFORM == -1)
3997 /* Avoid indexed addressing when targeting Power6 in order to avoid the
3998 DERAT mispredict penalty. However the LVE and STVE altivec instructions
3999 need indexed accesses and the type used is the scalar type of the element
4000 being loaded or stored. */
4001 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4002 && !TARGET_ALTIVEC);
4004 /* Set the -mrecip options. */
4005 if (rs6000_recip_name)
4007 char *p = ASTRDUP (rs6000_recip_name);
4008 char *q;
4009 unsigned int mask, i;
4010 bool invert;
4012 while ((q = strtok (p, ",")) != NULL)
4014 p = NULL;
4015 if (*q == '!')
4017 invert = true;
4018 q++;
4020 else
4021 invert = false;
4023 if (!strcmp (q, "default"))
4024 mask = ((TARGET_RECIP_PRECISION)
4025 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4026 else
4028 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4029 if (!strcmp (q, recip_options[i].string))
4031 mask = recip_options[i].mask;
4032 break;
4035 if (i == ARRAY_SIZE (recip_options))
4037 error ("unknown option for -mrecip=%s", q);
4038 invert = false;
4039 mask = 0;
4040 ret = false;
4044 if (invert)
4045 rs6000_recip_control &= ~mask;
4046 else
4047 rs6000_recip_control |= mask;
4051 /* Set the builtin mask of the various options used that could affect which
4052 builtins were used. In the past we used target_flags, but we've run out
4053 of bits, and some options like SPE and PAIRED are no longer in
4054 target_flags. */
4055 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4056 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4058 fprintf (stderr,
4059 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4060 rs6000_builtin_mask);
4061 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4064 /* Initialize all of the registers. */
4065 rs6000_init_hard_regno_mode_ok (global_init_p);
4067 /* Save the initial options in case the user does function specific options */
4068 if (global_init_p)
4069 target_option_default_node = target_option_current_node
4070 = build_target_option_node (&global_options);
4072 /* If not explicitly specified via option, decide whether to generate the
4073 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4074 if (TARGET_LINK_STACK == -1)
4075 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4077 return ret;
4080 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4081 define the target cpu type. */
4083 static void
4084 rs6000_option_override (void)
4086 (void) rs6000_option_override_internal (true);
4090 /* Implement targetm.vectorize.builtin_mask_for_load. */
4091 static tree
4092 rs6000_builtin_mask_for_load (void)
4094 if (TARGET_ALTIVEC || TARGET_VSX)
4095 return altivec_builtin_mask_for_load;
4096 else
4097 return 0;
4100 /* Implement LOOP_ALIGN. */
4102 rs6000_loop_align (rtx label)
4104 basic_block bb;
4105 int ninsns;
4107 /* Don't override loop alignment if -falign-loops was specified. */
4108 if (!can_override_loop_align)
4109 return align_loops_log;
4111 bb = BLOCK_FOR_INSN (label);
4112 ninsns = num_loop_insns(bb->loop_father);
4114 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4115 if (ninsns > 4 && ninsns <= 8
4116 && (rs6000_cpu == PROCESSOR_POWER4
4117 || rs6000_cpu == PROCESSOR_POWER5
4118 || rs6000_cpu == PROCESSOR_POWER6
4119 || rs6000_cpu == PROCESSOR_POWER7
4120 || rs6000_cpu == PROCESSOR_POWER8))
4121 return 5;
4122 else
4123 return align_loops_log;
4126 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4127 static int
4128 rs6000_loop_align_max_skip (rtx label)
4130 return (1 << rs6000_loop_align (label)) - 1;
4133 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4134 after applying N number of iterations. This routine does not determine
4135 how may iterations are required to reach desired alignment. */
4137 static bool
4138 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4140 if (is_packed)
4141 return false;
4143 if (TARGET_32BIT)
4145 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4146 return true;
4148 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4149 return true;
4151 return false;
4153 else
4155 if (TARGET_MACHO)
4156 return false;
4158 /* Assuming that all other types are naturally aligned. CHECKME! */
4159 return true;
4163 /* Return true if the vector misalignment factor is supported by the
4164 target. */
4165 static bool
4166 rs6000_builtin_support_vector_misalignment (enum machine_mode mode,
4167 const_tree type,
4168 int misalignment,
4169 bool is_packed)
4171 if (TARGET_VSX)
4173 /* Return if movmisalign pattern is not supported for this mode. */
4174 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4175 return false;
4177 if (misalignment == -1)
4179 /* Misalignment factor is unknown at compile time but we know
4180 it's word aligned. */
4181 if (rs6000_vector_alignment_reachable (type, is_packed))
4183 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4185 if (element_size == 64 || element_size == 32)
4186 return true;
4189 return false;
4192 /* VSX supports word-aligned vector. */
4193 if (misalignment % 4 == 0)
4194 return true;
4196 return false;
4199 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4200 static int
4201 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4202 tree vectype, int misalign)
4204 unsigned elements;
4205 tree elem_type;
4207 switch (type_of_cost)
4209 case scalar_stmt:
4210 case scalar_load:
4211 case scalar_store:
4212 case vector_stmt:
4213 case vector_load:
4214 case vector_store:
4215 case vec_to_scalar:
4216 case scalar_to_vec:
4217 case cond_branch_not_taken:
4218 return 1;
4220 case vec_perm:
4221 if (TARGET_VSX)
4222 return 3;
4223 else
4224 return 1;
4226 case vec_promote_demote:
4227 if (TARGET_VSX)
4228 return 4;
4229 else
4230 return 1;
4232 case cond_branch_taken:
4233 return 3;
4235 case unaligned_load:
4236 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4238 elements = TYPE_VECTOR_SUBPARTS (vectype);
4239 if (elements == 2)
4240 /* Double word aligned. */
4241 return 2;
4243 if (elements == 4)
4245 switch (misalign)
4247 case 8:
4248 /* Double word aligned. */
4249 return 2;
4251 case -1:
4252 /* Unknown misalignment. */
4253 case 4:
4254 case 12:
4255 /* Word aligned. */
4256 return 22;
4258 default:
4259 gcc_unreachable ();
4264 if (TARGET_ALTIVEC)
4265 /* Misaligned loads are not supported. */
4266 gcc_unreachable ();
4268 return 2;
4270 case unaligned_store:
4271 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4273 elements = TYPE_VECTOR_SUBPARTS (vectype);
4274 if (elements == 2)
4275 /* Double word aligned. */
4276 return 2;
4278 if (elements == 4)
4280 switch (misalign)
4282 case 8:
4283 /* Double word aligned. */
4284 return 2;
4286 case -1:
4287 /* Unknown misalignment. */
4288 case 4:
4289 case 12:
4290 /* Word aligned. */
4291 return 23;
4293 default:
4294 gcc_unreachable ();
4299 if (TARGET_ALTIVEC)
4300 /* Misaligned stores are not supported. */
4301 gcc_unreachable ();
4303 return 2;
4305 case vec_construct:
4306 elements = TYPE_VECTOR_SUBPARTS (vectype);
4307 elem_type = TREE_TYPE (vectype);
4308 /* 32-bit vectors loaded into registers are stored as double
4309 precision, so we need n/2 converts in addition to the usual
4310 n/2 merges to construct a vector of short floats from them. */
4311 if (SCALAR_FLOAT_TYPE_P (elem_type)
4312 && TYPE_PRECISION (elem_type) == 32)
4313 return elements + 1;
4314 else
4315 return elements / 2 + 1;
4317 default:
4318 gcc_unreachable ();
4322 /* Implement targetm.vectorize.preferred_simd_mode. */
4324 static enum machine_mode
4325 rs6000_preferred_simd_mode (enum machine_mode mode)
4327 if (TARGET_VSX)
4328 switch (mode)
4330 case DFmode:
4331 return V2DFmode;
4332 default:;
4334 if (TARGET_ALTIVEC || TARGET_VSX)
4335 switch (mode)
4337 case SFmode:
4338 return V4SFmode;
4339 case TImode:
4340 return V1TImode;
4341 case DImode:
4342 return V2DImode;
4343 case SImode:
4344 return V4SImode;
4345 case HImode:
4346 return V8HImode;
4347 case QImode:
4348 return V16QImode;
4349 default:;
4351 if (TARGET_SPE)
4352 switch (mode)
4354 case SFmode:
4355 return V2SFmode;
4356 case SImode:
4357 return V2SImode;
4358 default:;
4360 if (TARGET_PAIRED_FLOAT
4361 && mode == SFmode)
4362 return V2SFmode;
4363 return word_mode;
4366 typedef struct _rs6000_cost_data
4368 struct loop *loop_info;
4369 unsigned cost[3];
4370 } rs6000_cost_data;
4372 /* Test for likely overcommitment of vector hardware resources. If a
4373 loop iteration is relatively large, and too large a percentage of
4374 instructions in the loop are vectorized, the cost model may not
4375 adequately reflect delays from unavailable vector resources.
4376 Penalize the loop body cost for this case. */
4378 static void
4379 rs6000_density_test (rs6000_cost_data *data)
4381 const int DENSITY_PCT_THRESHOLD = 85;
4382 const int DENSITY_SIZE_THRESHOLD = 70;
4383 const int DENSITY_PENALTY = 10;
4384 struct loop *loop = data->loop_info;
4385 basic_block *bbs = get_loop_body (loop);
4386 int nbbs = loop->num_nodes;
4387 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4388 int i, density_pct;
4390 for (i = 0; i < nbbs; i++)
4392 basic_block bb = bbs[i];
4393 gimple_stmt_iterator gsi;
4395 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4397 gimple stmt = gsi_stmt (gsi);
4398 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4400 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4401 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4402 not_vec_cost++;
4406 free (bbs);
4407 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4409 if (density_pct > DENSITY_PCT_THRESHOLD
4410 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4412 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4413 if (dump_enabled_p ())
4414 dump_printf_loc (MSG_NOTE, vect_location,
4415 "density %d%%, cost %d exceeds threshold, penalizing "
4416 "loop body cost by %d%%", density_pct,
4417 vec_cost + not_vec_cost, DENSITY_PENALTY);
4421 /* Implement targetm.vectorize.init_cost. */
4423 static void *
4424 rs6000_init_cost (struct loop *loop_info)
4426 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4427 data->loop_info = loop_info;
4428 data->cost[vect_prologue] = 0;
4429 data->cost[vect_body] = 0;
4430 data->cost[vect_epilogue] = 0;
4431 return data;
4434 /* Implement targetm.vectorize.add_stmt_cost. */
4436 static unsigned
4437 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4438 struct _stmt_vec_info *stmt_info, int misalign,
4439 enum vect_cost_model_location where)
4441 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4442 unsigned retval = 0;
4444 if (flag_vect_cost_model)
4446 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4447 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4448 misalign);
4449 /* Statements in an inner loop relative to the loop being
4450 vectorized are weighted more heavily. The value here is
4451 arbitrary and could potentially be improved with analysis. */
4452 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4453 count *= 50; /* FIXME. */
4455 retval = (unsigned) (count * stmt_cost);
4456 cost_data->cost[where] += retval;
4459 return retval;
4462 /* Implement targetm.vectorize.finish_cost. */
4464 static void
4465 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4466 unsigned *body_cost, unsigned *epilogue_cost)
4468 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4470 if (cost_data->loop_info)
4471 rs6000_density_test (cost_data);
4473 *prologue_cost = cost_data->cost[vect_prologue];
4474 *body_cost = cost_data->cost[vect_body];
4475 *epilogue_cost = cost_data->cost[vect_epilogue];
4478 /* Implement targetm.vectorize.destroy_cost_data. */
4480 static void
4481 rs6000_destroy_cost_data (void *data)
4483 free (data);
4486 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4487 library with vectorized intrinsics. */
4489 static tree
4490 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4492 char name[32];
4493 const char *suffix = NULL;
4494 tree fntype, new_fndecl, bdecl = NULL_TREE;
4495 int n_args = 1;
4496 const char *bname;
4497 enum machine_mode el_mode, in_mode;
4498 int n, in_n;
4500 /* Libmass is suitable for unsafe math only as it does not correctly support
4501 parts of IEEE with the required precision such as denormals. Only support
4502 it if we have VSX to use the simd d2 or f4 functions.
4503 XXX: Add variable length support. */
4504 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4505 return NULL_TREE;
4507 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4508 n = TYPE_VECTOR_SUBPARTS (type_out);
4509 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4510 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4511 if (el_mode != in_mode
4512 || n != in_n)
4513 return NULL_TREE;
4515 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4517 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4518 switch (fn)
4520 case BUILT_IN_ATAN2:
4521 case BUILT_IN_HYPOT:
4522 case BUILT_IN_POW:
4523 n_args = 2;
4524 /* fall through */
4526 case BUILT_IN_ACOS:
4527 case BUILT_IN_ACOSH:
4528 case BUILT_IN_ASIN:
4529 case BUILT_IN_ASINH:
4530 case BUILT_IN_ATAN:
4531 case BUILT_IN_ATANH:
4532 case BUILT_IN_CBRT:
4533 case BUILT_IN_COS:
4534 case BUILT_IN_COSH:
4535 case BUILT_IN_ERF:
4536 case BUILT_IN_ERFC:
4537 case BUILT_IN_EXP2:
4538 case BUILT_IN_EXP:
4539 case BUILT_IN_EXPM1:
4540 case BUILT_IN_LGAMMA:
4541 case BUILT_IN_LOG10:
4542 case BUILT_IN_LOG1P:
4543 case BUILT_IN_LOG2:
4544 case BUILT_IN_LOG:
4545 case BUILT_IN_SIN:
4546 case BUILT_IN_SINH:
4547 case BUILT_IN_SQRT:
4548 case BUILT_IN_TAN:
4549 case BUILT_IN_TANH:
4550 bdecl = builtin_decl_implicit (fn);
4551 suffix = "d2"; /* pow -> powd2 */
4552 if (el_mode != DFmode
4553 || n != 2
4554 || !bdecl)
4555 return NULL_TREE;
4556 break;
4558 case BUILT_IN_ATAN2F:
4559 case BUILT_IN_HYPOTF:
4560 case BUILT_IN_POWF:
4561 n_args = 2;
4562 /* fall through */
4564 case BUILT_IN_ACOSF:
4565 case BUILT_IN_ACOSHF:
4566 case BUILT_IN_ASINF:
4567 case BUILT_IN_ASINHF:
4568 case BUILT_IN_ATANF:
4569 case BUILT_IN_ATANHF:
4570 case BUILT_IN_CBRTF:
4571 case BUILT_IN_COSF:
4572 case BUILT_IN_COSHF:
4573 case BUILT_IN_ERFF:
4574 case BUILT_IN_ERFCF:
4575 case BUILT_IN_EXP2F:
4576 case BUILT_IN_EXPF:
4577 case BUILT_IN_EXPM1F:
4578 case BUILT_IN_LGAMMAF:
4579 case BUILT_IN_LOG10F:
4580 case BUILT_IN_LOG1PF:
4581 case BUILT_IN_LOG2F:
4582 case BUILT_IN_LOGF:
4583 case BUILT_IN_SINF:
4584 case BUILT_IN_SINHF:
4585 case BUILT_IN_SQRTF:
4586 case BUILT_IN_TANF:
4587 case BUILT_IN_TANHF:
4588 bdecl = builtin_decl_implicit (fn);
4589 suffix = "4"; /* powf -> powf4 */
4590 if (el_mode != SFmode
4591 || n != 4
4592 || !bdecl)
4593 return NULL_TREE;
4594 break;
4596 default:
4597 return NULL_TREE;
4600 else
4601 return NULL_TREE;
4603 gcc_assert (suffix != NULL);
4604 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4605 if (!bname)
4606 return NULL_TREE;
4608 strcpy (name, bname + sizeof ("__builtin_") - 1);
4609 strcat (name, suffix);
4611 if (n_args == 1)
4612 fntype = build_function_type_list (type_out, type_in, NULL);
4613 else if (n_args == 2)
4614 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4615 else
4616 gcc_unreachable ();
4618 /* Build a function declaration for the vectorized function. */
4619 new_fndecl = build_decl (BUILTINS_LOCATION,
4620 FUNCTION_DECL, get_identifier (name), fntype);
4621 TREE_PUBLIC (new_fndecl) = 1;
4622 DECL_EXTERNAL (new_fndecl) = 1;
4623 DECL_IS_NOVOPS (new_fndecl) = 1;
4624 TREE_READONLY (new_fndecl) = 1;
4626 return new_fndecl;
4629 /* Returns a function decl for a vectorized version of the builtin function
4630 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4631 if it is not available. */
4633 static tree
4634 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4635 tree type_in)
4637 enum machine_mode in_mode, out_mode;
4638 int in_n, out_n;
4640 if (TARGET_DEBUG_BUILTIN)
4641 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4642 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4643 GET_MODE_NAME (TYPE_MODE (type_out)),
4644 GET_MODE_NAME (TYPE_MODE (type_in)));
4646 if (TREE_CODE (type_out) != VECTOR_TYPE
4647 || TREE_CODE (type_in) != VECTOR_TYPE
4648 || !TARGET_VECTORIZE_BUILTINS)
4649 return NULL_TREE;
4651 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4652 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4653 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4654 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4656 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4658 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4659 switch (fn)
4661 case BUILT_IN_CLZIMAX:
4662 case BUILT_IN_CLZLL:
4663 case BUILT_IN_CLZL:
4664 case BUILT_IN_CLZ:
4665 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4667 if (out_mode == QImode && out_n == 16)
4668 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4669 else if (out_mode == HImode && out_n == 8)
4670 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4671 else if (out_mode == SImode && out_n == 4)
4672 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4673 else if (out_mode == DImode && out_n == 2)
4674 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4676 break;
4677 case BUILT_IN_COPYSIGN:
4678 if (VECTOR_UNIT_VSX_P (V2DFmode)
4679 && out_mode == DFmode && out_n == 2
4680 && in_mode == DFmode && in_n == 2)
4681 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4682 break;
4683 case BUILT_IN_COPYSIGNF:
4684 if (out_mode != SFmode || out_n != 4
4685 || in_mode != SFmode || in_n != 4)
4686 break;
4687 if (VECTOR_UNIT_VSX_P (V4SFmode))
4688 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4689 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4690 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4691 break;
4692 case BUILT_IN_POPCOUNTIMAX:
4693 case BUILT_IN_POPCOUNTLL:
4694 case BUILT_IN_POPCOUNTL:
4695 case BUILT_IN_POPCOUNT:
4696 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4698 if (out_mode == QImode && out_n == 16)
4699 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4700 else if (out_mode == HImode && out_n == 8)
4701 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4702 else if (out_mode == SImode && out_n == 4)
4703 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4704 else if (out_mode == DImode && out_n == 2)
4705 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4707 break;
4708 case BUILT_IN_SQRT:
4709 if (VECTOR_UNIT_VSX_P (V2DFmode)
4710 && out_mode == DFmode && out_n == 2
4711 && in_mode == DFmode && in_n == 2)
4712 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4713 break;
4714 case BUILT_IN_SQRTF:
4715 if (VECTOR_UNIT_VSX_P (V4SFmode)
4716 && out_mode == SFmode && out_n == 4
4717 && in_mode == SFmode && in_n == 4)
4718 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4719 break;
4720 case BUILT_IN_CEIL:
4721 if (VECTOR_UNIT_VSX_P (V2DFmode)
4722 && out_mode == DFmode && out_n == 2
4723 && in_mode == DFmode && in_n == 2)
4724 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4725 break;
4726 case BUILT_IN_CEILF:
4727 if (out_mode != SFmode || out_n != 4
4728 || in_mode != SFmode || in_n != 4)
4729 break;
4730 if (VECTOR_UNIT_VSX_P (V4SFmode))
4731 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4732 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4733 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4734 break;
4735 case BUILT_IN_FLOOR:
4736 if (VECTOR_UNIT_VSX_P (V2DFmode)
4737 && out_mode == DFmode && out_n == 2
4738 && in_mode == DFmode && in_n == 2)
4739 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4740 break;
4741 case BUILT_IN_FLOORF:
4742 if (out_mode != SFmode || out_n != 4
4743 || in_mode != SFmode || in_n != 4)
4744 break;
4745 if (VECTOR_UNIT_VSX_P (V4SFmode))
4746 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4747 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4748 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4749 break;
4750 case BUILT_IN_FMA:
4751 if (VECTOR_UNIT_VSX_P (V2DFmode)
4752 && out_mode == DFmode && out_n == 2
4753 && in_mode == DFmode && in_n == 2)
4754 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4755 break;
4756 case BUILT_IN_FMAF:
4757 if (VECTOR_UNIT_VSX_P (V4SFmode)
4758 && out_mode == SFmode && out_n == 4
4759 && in_mode == SFmode && in_n == 4)
4760 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4761 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4762 && out_mode == SFmode && out_n == 4
4763 && in_mode == SFmode && in_n == 4)
4764 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4765 break;
4766 case BUILT_IN_TRUNC:
4767 if (VECTOR_UNIT_VSX_P (V2DFmode)
4768 && out_mode == DFmode && out_n == 2
4769 && in_mode == DFmode && in_n == 2)
4770 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4771 break;
4772 case BUILT_IN_TRUNCF:
4773 if (out_mode != SFmode || out_n != 4
4774 || in_mode != SFmode || in_n != 4)
4775 break;
4776 if (VECTOR_UNIT_VSX_P (V4SFmode))
4777 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4778 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4779 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4780 break;
4781 case BUILT_IN_NEARBYINT:
4782 if (VECTOR_UNIT_VSX_P (V2DFmode)
4783 && flag_unsafe_math_optimizations
4784 && out_mode == DFmode && out_n == 2
4785 && in_mode == DFmode && in_n == 2)
4786 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4787 break;
4788 case BUILT_IN_NEARBYINTF:
4789 if (VECTOR_UNIT_VSX_P (V4SFmode)
4790 && flag_unsafe_math_optimizations
4791 && out_mode == SFmode && out_n == 4
4792 && in_mode == SFmode && in_n == 4)
4793 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4794 break;
4795 case BUILT_IN_RINT:
4796 if (VECTOR_UNIT_VSX_P (V2DFmode)
4797 && !flag_trapping_math
4798 && out_mode == DFmode && out_n == 2
4799 && in_mode == DFmode && in_n == 2)
4800 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4801 break;
4802 case BUILT_IN_RINTF:
4803 if (VECTOR_UNIT_VSX_P (V4SFmode)
4804 && !flag_trapping_math
4805 && out_mode == SFmode && out_n == 4
4806 && in_mode == SFmode && in_n == 4)
4807 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4808 break;
4809 default:
4810 break;
4814 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4816 enum rs6000_builtins fn
4817 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4818 switch (fn)
4820 case RS6000_BUILTIN_RSQRTF:
4821 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4822 && out_mode == SFmode && out_n == 4
4823 && in_mode == SFmode && in_n == 4)
4824 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4825 break;
4826 case RS6000_BUILTIN_RSQRT:
4827 if (VECTOR_UNIT_VSX_P (V2DFmode)
4828 && out_mode == DFmode && out_n == 2
4829 && in_mode == DFmode && in_n == 2)
4830 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4831 break;
4832 case RS6000_BUILTIN_RECIPF:
4833 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4834 && out_mode == SFmode && out_n == 4
4835 && in_mode == SFmode && in_n == 4)
4836 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4837 break;
4838 case RS6000_BUILTIN_RECIP:
4839 if (VECTOR_UNIT_VSX_P (V2DFmode)
4840 && out_mode == DFmode && out_n == 2
4841 && in_mode == DFmode && in_n == 2)
4842 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
4843 break;
4844 default:
4845 break;
4849 /* Generate calls to libmass if appropriate. */
4850 if (rs6000_veclib_handler)
4851 return rs6000_veclib_handler (fndecl, type_out, type_in);
4853 return NULL_TREE;
4856 /* Default CPU string for rs6000*_file_start functions. */
4857 static const char *rs6000_default_cpu;
4859 /* Do anything needed at the start of the asm file. */
4861 static void
4862 rs6000_file_start (void)
4864 char buffer[80];
4865 const char *start = buffer;
4866 FILE *file = asm_out_file;
4868 rs6000_default_cpu = TARGET_CPU_DEFAULT;
4870 default_file_start ();
4872 if (flag_verbose_asm)
4874 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
4876 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
4878 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
4879 start = "";
4882 if (global_options_set.x_rs6000_cpu_index)
4884 fprintf (file, "%s -mcpu=%s", start,
4885 processor_target_table[rs6000_cpu_index].name);
4886 start = "";
4889 if (global_options_set.x_rs6000_tune_index)
4891 fprintf (file, "%s -mtune=%s", start,
4892 processor_target_table[rs6000_tune_index].name);
4893 start = "";
4896 if (PPC405_ERRATUM77)
4898 fprintf (file, "%s PPC405CR_ERRATUM77", start);
4899 start = "";
4902 #ifdef USING_ELFOS_H
4903 switch (rs6000_sdata)
4905 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
4906 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
4907 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
4908 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
4911 if (rs6000_sdata && g_switch_value)
4913 fprintf (file, "%s -G %d", start,
4914 g_switch_value);
4915 start = "";
4917 #endif
4919 if (*start == '\0')
4920 putc ('\n', file);
4923 if (DEFAULT_ABI == ABI_ELFv2)
4924 fprintf (file, "\t.abiversion 2\n");
4926 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
4927 || (TARGET_ELF && flag_pic == 2))
4929 switch_to_section (toc_section);
4930 switch_to_section (text_section);
4935 /* Return nonzero if this function is known to have a null epilogue. */
4938 direct_return (void)
4940 if (reload_completed)
4942 rs6000_stack_t *info = rs6000_stack_info ();
4944 if (info->first_gp_reg_save == 32
4945 && info->first_fp_reg_save == 64
4946 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
4947 && ! info->lr_save_p
4948 && ! info->cr_save_p
4949 && info->vrsave_mask == 0
4950 && ! info->push_p)
4951 return 1;
4954 return 0;
4957 /* Return the number of instructions it takes to form a constant in an
4958 integer register. */
4961 num_insns_constant_wide (HOST_WIDE_INT value)
4963 /* signed constant loadable with addi */
4964 if ((unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000)
4965 return 1;
4967 /* constant loadable with addis */
4968 else if ((value & 0xffff) == 0
4969 && (value >> 31 == -1 || value >> 31 == 0))
4970 return 1;
4972 else if (TARGET_POWERPC64)
4974 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
4975 HOST_WIDE_INT high = value >> 31;
4977 if (high == 0 || high == -1)
4978 return 2;
4980 high >>= 1;
4982 if (low == 0)
4983 return num_insns_constant_wide (high) + 1;
4984 else if (high == 0)
4985 return num_insns_constant_wide (low) + 1;
4986 else
4987 return (num_insns_constant_wide (high)
4988 + num_insns_constant_wide (low) + 1);
4991 else
4992 return 2;
4996 num_insns_constant (rtx op, enum machine_mode mode)
4998 HOST_WIDE_INT low, high;
5000 switch (GET_CODE (op))
5002 case CONST_INT:
5003 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5004 && mask64_operand (op, mode))
5005 return 2;
5006 else
5007 return num_insns_constant_wide (INTVAL (op));
5009 case CONST_DOUBLE:
5010 if (mode == SFmode || mode == SDmode)
5012 long l;
5013 REAL_VALUE_TYPE rv;
5015 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5016 if (DECIMAL_FLOAT_MODE_P (mode))
5017 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5018 else
5019 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5020 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5023 long l[2];
5024 REAL_VALUE_TYPE rv;
5026 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5027 if (DECIMAL_FLOAT_MODE_P (mode))
5028 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5029 else
5030 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5031 high = l[WORDS_BIG_ENDIAN == 0];
5032 low = l[WORDS_BIG_ENDIAN != 0];
5034 if (TARGET_32BIT)
5035 return (num_insns_constant_wide (low)
5036 + num_insns_constant_wide (high));
5037 else
5039 if ((high == 0 && low >= 0)
5040 || (high == -1 && low < 0))
5041 return num_insns_constant_wide (low);
5043 else if (mask64_operand (op, mode))
5044 return 2;
5046 else if (low == 0)
5047 return num_insns_constant_wide (high) + 1;
5049 else
5050 return (num_insns_constant_wide (high)
5051 + num_insns_constant_wide (low) + 1);
5054 default:
5055 gcc_unreachable ();
5059 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5060 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5061 corresponding element of the vector, but for V4SFmode and V2SFmode,
5062 the corresponding "float" is interpreted as an SImode integer. */
5064 HOST_WIDE_INT
5065 const_vector_elt_as_int (rtx op, unsigned int elt)
5067 rtx tmp;
5069 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5070 gcc_assert (GET_MODE (op) != V2DImode
5071 && GET_MODE (op) != V2DFmode);
5073 tmp = CONST_VECTOR_ELT (op, elt);
5074 if (GET_MODE (op) == V4SFmode
5075 || GET_MODE (op) == V2SFmode)
5076 tmp = gen_lowpart (SImode, tmp);
5077 return INTVAL (tmp);
5080 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5081 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5082 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5083 all items are set to the same value and contain COPIES replicas of the
5084 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5085 operand and the others are set to the value of the operand's msb. */
5087 static bool
5088 vspltis_constant (rtx op, unsigned step, unsigned copies)
5090 enum machine_mode mode = GET_MODE (op);
5091 enum machine_mode inner = GET_MODE_INNER (mode);
5093 unsigned i;
5094 unsigned nunits;
5095 unsigned bitsize;
5096 unsigned mask;
5098 HOST_WIDE_INT val;
5099 HOST_WIDE_INT splat_val;
5100 HOST_WIDE_INT msb_val;
5102 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5103 return false;
5105 nunits = GET_MODE_NUNITS (mode);
5106 bitsize = GET_MODE_BITSIZE (inner);
5107 mask = GET_MODE_MASK (inner);
5109 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5110 splat_val = val;
5111 msb_val = val >= 0 ? 0 : -1;
5113 /* Construct the value to be splatted, if possible. If not, return 0. */
5114 for (i = 2; i <= copies; i *= 2)
5116 HOST_WIDE_INT small_val;
5117 bitsize /= 2;
5118 small_val = splat_val >> bitsize;
5119 mask >>= bitsize;
5120 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5121 return false;
5122 splat_val = small_val;
5125 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5126 if (EASY_VECTOR_15 (splat_val))
5129 /* Also check if we can splat, and then add the result to itself. Do so if
5130 the value is positive, of if the splat instruction is using OP's mode;
5131 for splat_val < 0, the splat and the add should use the same mode. */
5132 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5133 && (splat_val >= 0 || (step == 1 && copies == 1)))
5136 /* Also check if are loading up the most significant bit which can be done by
5137 loading up -1 and shifting the value left by -1. */
5138 else if (EASY_VECTOR_MSB (splat_val, inner))
5141 else
5142 return false;
5144 /* Check if VAL is present in every STEP-th element, and the
5145 other elements are filled with its most significant bit. */
5146 for (i = 1; i < nunits; ++i)
5148 HOST_WIDE_INT desired_val;
5149 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5150 if ((i & (step - 1)) == 0)
5151 desired_val = val;
5152 else
5153 desired_val = msb_val;
5155 if (desired_val != const_vector_elt_as_int (op, elt))
5156 return false;
5159 return true;
5163 /* Return true if OP is of the given MODE and can be synthesized
5164 with a vspltisb, vspltish or vspltisw. */
5166 bool
5167 easy_altivec_constant (rtx op, enum machine_mode mode)
5169 unsigned step, copies;
5171 if (mode == VOIDmode)
5172 mode = GET_MODE (op);
5173 else if (mode != GET_MODE (op))
5174 return false;
5176 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5177 constants. */
5178 if (mode == V2DFmode)
5179 return zero_constant (op, mode);
5181 else if (mode == V2DImode)
5183 /* In case the compiler is built 32-bit, CONST_DOUBLE constants are not
5184 easy. */
5185 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5186 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5187 return false;
5189 if (zero_constant (op, mode))
5190 return true;
5192 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5193 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5194 return true;
5196 return false;
5199 /* V1TImode is a special container for TImode. Ignore for now. */
5200 else if (mode == V1TImode)
5201 return false;
5203 /* Start with a vspltisw. */
5204 step = GET_MODE_NUNITS (mode) / 4;
5205 copies = 1;
5207 if (vspltis_constant (op, step, copies))
5208 return true;
5210 /* Then try with a vspltish. */
5211 if (step == 1)
5212 copies <<= 1;
5213 else
5214 step >>= 1;
5216 if (vspltis_constant (op, step, copies))
5217 return true;
5219 /* And finally a vspltisb. */
5220 if (step == 1)
5221 copies <<= 1;
5222 else
5223 step >>= 1;
5225 if (vspltis_constant (op, step, copies))
5226 return true;
5228 return false;
5231 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5232 result is OP. Abort if it is not possible. */
5235 gen_easy_altivec_constant (rtx op)
5237 enum machine_mode mode = GET_MODE (op);
5238 int nunits = GET_MODE_NUNITS (mode);
5239 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5240 unsigned step = nunits / 4;
5241 unsigned copies = 1;
5243 /* Start with a vspltisw. */
5244 if (vspltis_constant (op, step, copies))
5245 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5247 /* Then try with a vspltish. */
5248 if (step == 1)
5249 copies <<= 1;
5250 else
5251 step >>= 1;
5253 if (vspltis_constant (op, step, copies))
5254 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5256 /* And finally a vspltisb. */
5257 if (step == 1)
5258 copies <<= 1;
5259 else
5260 step >>= 1;
5262 if (vspltis_constant (op, step, copies))
5263 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5265 gcc_unreachable ();
5268 const char *
5269 output_vec_const_move (rtx *operands)
5271 int cst, cst2;
5272 enum machine_mode mode;
5273 rtx dest, vec;
5275 dest = operands[0];
5276 vec = operands[1];
5277 mode = GET_MODE (dest);
5279 if (TARGET_VSX)
5281 if (zero_constant (vec, mode))
5282 return "xxlxor %x0,%x0,%x0";
5284 if ((mode == V2DImode || mode == V1TImode)
5285 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5286 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5287 return "vspltisw %0,-1";
5290 if (TARGET_ALTIVEC)
5292 rtx splat_vec;
5293 if (zero_constant (vec, mode))
5294 return "vxor %0,%0,%0";
5296 splat_vec = gen_easy_altivec_constant (vec);
5297 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5298 operands[1] = XEXP (splat_vec, 0);
5299 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5300 return "#";
5302 switch (GET_MODE (splat_vec))
5304 case V4SImode:
5305 return "vspltisw %0,%1";
5307 case V8HImode:
5308 return "vspltish %0,%1";
5310 case V16QImode:
5311 return "vspltisb %0,%1";
5313 default:
5314 gcc_unreachable ();
5318 gcc_assert (TARGET_SPE);
5320 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5321 pattern of V1DI, V4HI, and V2SF.
5323 FIXME: We should probably return # and add post reload
5324 splitters for these, but this way is so easy ;-). */
5325 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5326 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5327 operands[1] = CONST_VECTOR_ELT (vec, 0);
5328 operands[2] = CONST_VECTOR_ELT (vec, 1);
5329 if (cst == cst2)
5330 return "li %0,%1\n\tevmergelo %0,%0,%0";
5331 else
5332 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5335 /* Initialize TARGET of vector PAIRED to VALS. */
5337 void
5338 paired_expand_vector_init (rtx target, rtx vals)
5340 enum machine_mode mode = GET_MODE (target);
5341 int n_elts = GET_MODE_NUNITS (mode);
5342 int n_var = 0;
5343 rtx x, new_rtx, tmp, constant_op, op1, op2;
5344 int i;
5346 for (i = 0; i < n_elts; ++i)
5348 x = XVECEXP (vals, 0, i);
5349 if (!(CONST_INT_P (x)
5350 || GET_CODE (x) == CONST_DOUBLE
5351 || GET_CODE (x) == CONST_FIXED))
5352 ++n_var;
5354 if (n_var == 0)
5356 /* Load from constant pool. */
5357 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5358 return;
5361 if (n_var == 2)
5363 /* The vector is initialized only with non-constants. */
5364 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5365 XVECEXP (vals, 0, 1));
5367 emit_move_insn (target, new_rtx);
5368 return;
5371 /* One field is non-constant and the other one is a constant. Load the
5372 constant from the constant pool and use ps_merge instruction to
5373 construct the whole vector. */
5374 op1 = XVECEXP (vals, 0, 0);
5375 op2 = XVECEXP (vals, 0, 1);
5377 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5379 tmp = gen_reg_rtx (GET_MODE (constant_op));
5380 emit_move_insn (tmp, constant_op);
5382 if (CONSTANT_P (op1))
5383 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5384 else
5385 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5387 emit_move_insn (target, new_rtx);
5390 void
5391 paired_expand_vector_move (rtx operands[])
5393 rtx op0 = operands[0], op1 = operands[1];
5395 emit_move_insn (op0, op1);
5398 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5399 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5400 operands for the relation operation COND. This is a recursive
5401 function. */
5403 static void
5404 paired_emit_vector_compare (enum rtx_code rcode,
5405 rtx dest, rtx op0, rtx op1,
5406 rtx cc_op0, rtx cc_op1)
5408 rtx tmp = gen_reg_rtx (V2SFmode);
5409 rtx tmp1, max, min;
5411 gcc_assert (TARGET_PAIRED_FLOAT);
5412 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5414 switch (rcode)
5416 case LT:
5417 case LTU:
5418 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5419 return;
5420 case GE:
5421 case GEU:
5422 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5423 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5424 return;
5425 case LE:
5426 case LEU:
5427 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5428 return;
5429 case GT:
5430 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5431 return;
5432 case EQ:
5433 tmp1 = gen_reg_rtx (V2SFmode);
5434 max = gen_reg_rtx (V2SFmode);
5435 min = gen_reg_rtx (V2SFmode);
5436 gen_reg_rtx (V2SFmode);
5438 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5439 emit_insn (gen_selv2sf4
5440 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5441 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5442 emit_insn (gen_selv2sf4
5443 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5444 emit_insn (gen_subv2sf3 (tmp1, min, max));
5445 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5446 return;
5447 case NE:
5448 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5449 return;
5450 case UNLE:
5451 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5452 return;
5453 case UNLT:
5454 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5455 return;
5456 case UNGE:
5457 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5458 return;
5459 case UNGT:
5460 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5461 return;
5462 default:
5463 gcc_unreachable ();
5466 return;
5469 /* Emit vector conditional expression.
5470 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5471 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5474 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5475 rtx cond, rtx cc_op0, rtx cc_op1)
5477 enum rtx_code rcode = GET_CODE (cond);
5479 if (!TARGET_PAIRED_FLOAT)
5480 return 0;
5482 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5484 return 1;
5487 /* Initialize vector TARGET to VALS. */
5489 void
5490 rs6000_expand_vector_init (rtx target, rtx vals)
5492 enum machine_mode mode = GET_MODE (target);
5493 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5494 int n_elts = GET_MODE_NUNITS (mode);
5495 int n_var = 0, one_var = -1;
5496 bool all_same = true, all_const_zero = true;
5497 rtx x, mem;
5498 int i;
5500 for (i = 0; i < n_elts; ++i)
5502 x = XVECEXP (vals, 0, i);
5503 if (!(CONST_INT_P (x)
5504 || GET_CODE (x) == CONST_DOUBLE
5505 || GET_CODE (x) == CONST_FIXED))
5506 ++n_var, one_var = i;
5507 else if (x != CONST0_RTX (inner_mode))
5508 all_const_zero = false;
5510 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5511 all_same = false;
5514 if (n_var == 0)
5516 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5517 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5518 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5520 /* Zero register. */
5521 emit_insn (gen_rtx_SET (VOIDmode, target,
5522 gen_rtx_XOR (mode, target, target)));
5523 return;
5525 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5527 /* Splat immediate. */
5528 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5529 return;
5531 else
5533 /* Load from constant pool. */
5534 emit_move_insn (target, const_vec);
5535 return;
5539 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5540 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5542 rtx op0 = XVECEXP (vals, 0, 0);
5543 rtx op1 = XVECEXP (vals, 0, 1);
5544 if (all_same)
5546 if (!MEM_P (op0) && !REG_P (op0))
5547 op0 = force_reg (inner_mode, op0);
5548 if (mode == V2DFmode)
5549 emit_insn (gen_vsx_splat_v2df (target, op0));
5550 else
5551 emit_insn (gen_vsx_splat_v2di (target, op0));
5553 else
5555 op0 = force_reg (inner_mode, op0);
5556 op1 = force_reg (inner_mode, op1);
5557 if (mode == V2DFmode)
5558 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5559 else
5560 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5562 return;
5565 /* With single precision floating point on VSX, know that internally single
5566 precision is actually represented as a double, and either make 2 V2DF
5567 vectors, and convert these vectors to single precision, or do one
5568 conversion, and splat the result to the other elements. */
5569 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5571 if (all_same)
5573 rtx freg = gen_reg_rtx (V4SFmode);
5574 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5575 rtx cvt = ((TARGET_XSCVDPSPN)
5576 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5577 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5579 emit_insn (cvt);
5580 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5582 else
5584 rtx dbl_even = gen_reg_rtx (V2DFmode);
5585 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5586 rtx flt_even = gen_reg_rtx (V4SFmode);
5587 rtx flt_odd = gen_reg_rtx (V4SFmode);
5588 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5589 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5590 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5591 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5593 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5594 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5595 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5596 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5597 rs6000_expand_extract_even (target, flt_even, flt_odd);
5599 return;
5602 /* Store value to stack temp. Load vector element. Splat. However, splat
5603 of 64-bit items is not supported on Altivec. */
5604 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5606 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5607 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5608 XVECEXP (vals, 0, 0));
5609 x = gen_rtx_UNSPEC (VOIDmode,
5610 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5611 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5612 gen_rtvec (2,
5613 gen_rtx_SET (VOIDmode,
5614 target, mem),
5615 x)));
5616 x = gen_rtx_VEC_SELECT (inner_mode, target,
5617 gen_rtx_PARALLEL (VOIDmode,
5618 gen_rtvec (1, const0_rtx)));
5619 emit_insn (gen_rtx_SET (VOIDmode, target,
5620 gen_rtx_VEC_DUPLICATE (mode, x)));
5621 return;
5624 /* One field is non-constant. Load constant then overwrite
5625 varying field. */
5626 if (n_var == 1)
5628 rtx copy = copy_rtx (vals);
5630 /* Load constant part of vector, substitute neighboring value for
5631 varying element. */
5632 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5633 rs6000_expand_vector_init (target, copy);
5635 /* Insert variable. */
5636 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5637 return;
5640 /* Construct the vector in memory one field at a time
5641 and load the whole vector. */
5642 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5643 for (i = 0; i < n_elts; i++)
5644 emit_move_insn (adjust_address_nv (mem, inner_mode,
5645 i * GET_MODE_SIZE (inner_mode)),
5646 XVECEXP (vals, 0, i));
5647 emit_move_insn (target, mem);
5650 /* Set field ELT of TARGET to VAL. */
5652 void
5653 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5655 enum machine_mode mode = GET_MODE (target);
5656 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5657 rtx reg = gen_reg_rtx (mode);
5658 rtx mask, mem, x;
5659 int width = GET_MODE_SIZE (inner_mode);
5660 int i;
5662 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5664 rtx (*set_func) (rtx, rtx, rtx, rtx)
5665 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5666 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5667 return;
5670 /* Simplify setting single element vectors like V1TImode. */
5671 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5673 emit_move_insn (target, gen_lowpart (mode, val));
5674 return;
5677 /* Load single variable value. */
5678 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5679 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5680 x = gen_rtx_UNSPEC (VOIDmode,
5681 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5682 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5683 gen_rtvec (2,
5684 gen_rtx_SET (VOIDmode,
5685 reg, mem),
5686 x)));
5688 /* Linear sequence. */
5689 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5690 for (i = 0; i < 16; ++i)
5691 XVECEXP (mask, 0, i) = GEN_INT (i);
5693 /* Set permute mask to insert element into target. */
5694 for (i = 0; i < width; ++i)
5695 XVECEXP (mask, 0, elt*width + i)
5696 = GEN_INT (i + 0x10);
5697 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5699 if (BYTES_BIG_ENDIAN)
5700 x = gen_rtx_UNSPEC (mode,
5701 gen_rtvec (3, target, reg,
5702 force_reg (V16QImode, x)),
5703 UNSPEC_VPERM);
5704 else
5706 /* Invert selector. We prefer to generate VNAND on P8 so
5707 that future fusion opportunities can kick in, but must
5708 generate VNOR elsewhere. */
5709 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5710 rtx iorx = (TARGET_P8_VECTOR
5711 ? gen_rtx_IOR (V16QImode, notx, notx)
5712 : gen_rtx_AND (V16QImode, notx, notx));
5713 rtx tmp = gen_reg_rtx (V16QImode);
5714 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5716 /* Permute with operands reversed and adjusted selector. */
5717 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5718 UNSPEC_VPERM);
5721 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5724 /* Extract field ELT from VEC into TARGET. */
5726 void
5727 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5729 enum machine_mode mode = GET_MODE (vec);
5730 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5731 rtx mem;
5733 if (VECTOR_MEM_VSX_P (mode))
5735 switch (mode)
5737 default:
5738 break;
5739 case V1TImode:
5740 gcc_assert (elt == 0 && inner_mode == TImode);
5741 emit_move_insn (target, gen_lowpart (TImode, vec));
5742 break;
5743 case V2DFmode:
5744 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5745 return;
5746 case V2DImode:
5747 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5748 return;
5749 case V4SFmode:
5750 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5751 return;
5755 /* Allocate mode-sized buffer. */
5756 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5758 emit_move_insn (mem, vec);
5760 /* Add offset to field within buffer matching vector element. */
5761 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5763 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5766 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5767 implement ANDing by the mask IN. */
5768 void
5769 build_mask64_2_operands (rtx in, rtx *out)
5771 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5772 int shift;
5774 gcc_assert (GET_CODE (in) == CONST_INT);
5776 c = INTVAL (in);
5777 if (c & 1)
5779 /* Assume c initially something like 0x00fff000000fffff. The idea
5780 is to rotate the word so that the middle ^^^^^^ group of zeros
5781 is at the MS end and can be cleared with an rldicl mask. We then
5782 rotate back and clear off the MS ^^ group of zeros with a
5783 second rldicl. */
5784 c = ~c; /* c == 0xff000ffffff00000 */
5785 lsb = c & -c; /* lsb == 0x0000000000100000 */
5786 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5787 c = ~c; /* c == 0x00fff000000fffff */
5788 c &= -lsb; /* c == 0x00fff00000000000 */
5789 lsb = c & -c; /* lsb == 0x0000100000000000 */
5790 c = ~c; /* c == 0xff000fffffffffff */
5791 c &= -lsb; /* c == 0xff00000000000000 */
5792 shift = 0;
5793 while ((lsb >>= 1) != 0)
5794 shift++; /* shift == 44 on exit from loop */
5795 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5796 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5797 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5799 else
5801 /* Assume c initially something like 0xff000f0000000000. The idea
5802 is to rotate the word so that the ^^^ middle group of zeros
5803 is at the LS end and can be cleared with an rldicr mask. We then
5804 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5805 a second rldicr. */
5806 lsb = c & -c; /* lsb == 0x0000010000000000 */
5807 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5808 c = ~c; /* c == 0x00fff0ffffffffff */
5809 c &= -lsb; /* c == 0x00fff00000000000 */
5810 lsb = c & -c; /* lsb == 0x0000100000000000 */
5811 c = ~c; /* c == 0xff000fffffffffff */
5812 c &= -lsb; /* c == 0xff00000000000000 */
5813 shift = 0;
5814 while ((lsb >>= 1) != 0)
5815 shift++; /* shift == 44 on exit from loop */
5816 m1 = ~c; /* m1 == 0x00ffffffffffffff */
5817 m1 >>= shift; /* m1 == 0x0000000000000fff */
5818 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
5821 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
5822 masks will be all 1's. We are guaranteed more than one transition. */
5823 out[0] = GEN_INT (64 - shift);
5824 out[1] = GEN_INT (m1);
5825 out[2] = GEN_INT (shift);
5826 out[3] = GEN_INT (m2);
5829 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
5831 bool
5832 invalid_e500_subreg (rtx op, enum machine_mode mode)
5834 if (TARGET_E500_DOUBLE)
5836 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
5837 subreg:TI and reg:TF. Decimal float modes are like integer
5838 modes (only low part of each register used) for this
5839 purpose. */
5840 if (GET_CODE (op) == SUBREG
5841 && (mode == SImode || mode == DImode || mode == TImode
5842 || mode == DDmode || mode == TDmode || mode == PTImode)
5843 && REG_P (SUBREG_REG (op))
5844 && (GET_MODE (SUBREG_REG (op)) == DFmode
5845 || GET_MODE (SUBREG_REG (op)) == TFmode))
5846 return true;
5848 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
5849 reg:TI. */
5850 if (GET_CODE (op) == SUBREG
5851 && (mode == DFmode || mode == TFmode)
5852 && REG_P (SUBREG_REG (op))
5853 && (GET_MODE (SUBREG_REG (op)) == DImode
5854 || GET_MODE (SUBREG_REG (op)) == TImode
5855 || GET_MODE (SUBREG_REG (op)) == PTImode
5856 || GET_MODE (SUBREG_REG (op)) == DDmode
5857 || GET_MODE (SUBREG_REG (op)) == TDmode))
5858 return true;
5861 if (TARGET_SPE
5862 && GET_CODE (op) == SUBREG
5863 && mode == SImode
5864 && REG_P (SUBREG_REG (op))
5865 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
5866 return true;
5868 return false;
5871 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
5872 selects whether the alignment is abi mandated, optional, or
5873 both abi and optional alignment. */
5875 unsigned int
5876 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
5878 if (how != align_opt)
5880 if (TREE_CODE (type) == VECTOR_TYPE)
5882 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
5883 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
5885 if (align < 64)
5886 align = 64;
5888 else if (align < 128)
5889 align = 128;
5891 else if (TARGET_E500_DOUBLE
5892 && TREE_CODE (type) == REAL_TYPE
5893 && TYPE_MODE (type) == DFmode)
5895 if (align < 64)
5896 align = 64;
5900 if (how != align_abi)
5902 if (TREE_CODE (type) == ARRAY_TYPE
5903 && TYPE_MODE (TREE_TYPE (type)) == QImode)
5905 if (align < BITS_PER_WORD)
5906 align = BITS_PER_WORD;
5910 return align;
5913 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
5915 bool
5916 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
5918 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5920 if (computed != 128)
5922 static bool warned;
5923 if (!warned && warn_psabi)
5925 warned = true;
5926 inform (input_location,
5927 "the layout of aggregates containing vectors with"
5928 " %d-byte alignment will change in a future GCC release",
5929 computed / BITS_PER_UNIT);
5932 /* GCC 4.8/4.9 Note: To avoid any ABI change on a release branch, we
5933 keep the special treatment of vector types, but warn if there will
5934 be differences in future GCC releases. */
5935 return true;
5938 return false;
5941 /* AIX increases natural record alignment to doubleword if the first
5942 field is an FP double while the FP fields remain word aligned. */
5944 unsigned int
5945 rs6000_special_round_type_align (tree type, unsigned int computed,
5946 unsigned int specified)
5948 unsigned int align = MAX (computed, specified);
5949 tree field = TYPE_FIELDS (type);
5951 /* Skip all non field decls */
5952 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
5953 field = DECL_CHAIN (field);
5955 if (field != NULL && field != type)
5957 type = TREE_TYPE (field);
5958 while (TREE_CODE (type) == ARRAY_TYPE)
5959 type = TREE_TYPE (type);
5961 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
5962 align = MAX (align, 64);
5965 return align;
5968 /* Darwin increases record alignment to the natural alignment of
5969 the first field. */
5971 unsigned int
5972 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
5973 unsigned int specified)
5975 unsigned int align = MAX (computed, specified);
5977 if (TYPE_PACKED (type))
5978 return align;
5980 /* Find the first field, looking down into aggregates. */
5981 do {
5982 tree field = TYPE_FIELDS (type);
5983 /* Skip all non field decls */
5984 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
5985 field = DECL_CHAIN (field);
5986 if (! field)
5987 break;
5988 /* A packed field does not contribute any extra alignment. */
5989 if (DECL_PACKED (field))
5990 return align;
5991 type = TREE_TYPE (field);
5992 while (TREE_CODE (type) == ARRAY_TYPE)
5993 type = TREE_TYPE (type);
5994 } while (AGGREGATE_TYPE_P (type));
5996 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
5997 align = MAX (align, TYPE_ALIGN (type));
5999 return align;
6002 /* Return 1 for an operand in small memory on V.4/eabi. */
6005 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6006 enum machine_mode mode ATTRIBUTE_UNUSED)
6008 #if TARGET_ELF
6009 rtx sym_ref;
6011 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6012 return 0;
6014 if (DEFAULT_ABI != ABI_V4)
6015 return 0;
6017 /* Vector and float memory instructions have a limited offset on the
6018 SPE, so using a vector or float variable directly as an operand is
6019 not useful. */
6020 if (TARGET_SPE
6021 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6022 return 0;
6024 if (GET_CODE (op) == SYMBOL_REF)
6025 sym_ref = op;
6027 else if (GET_CODE (op) != CONST
6028 || GET_CODE (XEXP (op, 0)) != PLUS
6029 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6030 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6031 return 0;
6033 else
6035 rtx sum = XEXP (op, 0);
6036 HOST_WIDE_INT summand;
6038 /* We have to be careful here, because it is the referenced address
6039 that must be 32k from _SDA_BASE_, not just the symbol. */
6040 summand = INTVAL (XEXP (sum, 1));
6041 if (summand < 0 || summand > g_switch_value)
6042 return 0;
6044 sym_ref = XEXP (sum, 0);
6047 return SYMBOL_REF_SMALL_P (sym_ref);
6048 #else
6049 return 0;
6050 #endif
6053 /* Return true if either operand is a general purpose register. */
6055 bool
6056 gpr_or_gpr_p (rtx op0, rtx op1)
6058 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6059 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6062 /* Return true if this is a move direct operation between GPR registers and
6063 floating point/VSX registers. */
6065 bool
6066 direct_move_p (rtx op0, rtx op1)
6068 int regno0, regno1;
6070 if (!REG_P (op0) || !REG_P (op1))
6071 return false;
6073 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6074 return false;
6076 regno0 = REGNO (op0);
6077 regno1 = REGNO (op1);
6078 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6079 return false;
6081 if (INT_REGNO_P (regno0))
6082 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6084 else if (INT_REGNO_P (regno1))
6086 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6087 return true;
6089 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6090 return true;
6093 return false;
6096 /* Return true if this is a load or store quad operation. This function does
6097 not handle the atomic quad memory instructions. */
6099 bool
6100 quad_load_store_p (rtx op0, rtx op1)
6102 bool ret;
6104 if (!TARGET_QUAD_MEMORY)
6105 ret = false;
6107 else if (REG_P (op0) && MEM_P (op1))
6108 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6109 && quad_memory_operand (op1, GET_MODE (op1))
6110 && !reg_overlap_mentioned_p (op0, op1));
6112 else if (MEM_P (op0) && REG_P (op1))
6113 ret = (quad_memory_operand (op0, GET_MODE (op0))
6114 && quad_int_reg_operand (op1, GET_MODE (op1)));
6116 else
6117 ret = false;
6119 if (TARGET_DEBUG_ADDR)
6121 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6122 ret ? "true" : "false");
6123 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6126 return ret;
6129 /* Given an address, return a constant offset term if one exists. */
6131 static rtx
6132 address_offset (rtx op)
6134 if (GET_CODE (op) == PRE_INC
6135 || GET_CODE (op) == PRE_DEC)
6136 op = XEXP (op, 0);
6137 else if (GET_CODE (op) == PRE_MODIFY
6138 || GET_CODE (op) == LO_SUM)
6139 op = XEXP (op, 1);
6141 if (GET_CODE (op) == CONST)
6142 op = XEXP (op, 0);
6144 if (GET_CODE (op) == PLUS)
6145 op = XEXP (op, 1);
6147 if (CONST_INT_P (op))
6148 return op;
6150 return NULL_RTX;
6153 /* Return true if the MEM operand is a memory operand suitable for use
6154 with a (full width, possibly multiple) gpr load/store. On
6155 powerpc64 this means the offset must be divisible by 4.
6156 Implements 'Y' constraint.
6158 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6159 a constraint function we know the operand has satisfied a suitable
6160 memory predicate. Also accept some odd rtl generated by reload
6161 (see rs6000_legitimize_reload_address for various forms). It is
6162 important that reload rtl be accepted by appropriate constraints
6163 but not by the operand predicate.
6165 Offsetting a lo_sum should not be allowed, except where we know by
6166 alignment that a 32k boundary is not crossed, but see the ???
6167 comment in rs6000_legitimize_reload_address. Note that by
6168 "offsetting" here we mean a further offset to access parts of the
6169 MEM. It's fine to have a lo_sum where the inner address is offset
6170 from a sym, since the same sym+offset will appear in the high part
6171 of the address calculation. */
6173 bool
6174 mem_operand_gpr (rtx op, enum machine_mode mode)
6176 unsigned HOST_WIDE_INT offset;
6177 int extra;
6178 rtx addr = XEXP (op, 0);
6180 op = address_offset (addr);
6181 if (op == NULL_RTX)
6182 return true;
6184 offset = INTVAL (op);
6185 if (TARGET_POWERPC64 && (offset & 3) != 0)
6186 return false;
6188 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6189 if (extra < 0)
6190 extra = 0;
6192 if (GET_CODE (addr) == LO_SUM)
6193 /* For lo_sum addresses, we must allow any offset except one that
6194 causes a wrap, so test only the low 16 bits. */
6195 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6197 return offset + 0x8000 < 0x10000u - extra;
6200 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6202 static bool
6203 reg_offset_addressing_ok_p (enum machine_mode mode)
6205 switch (mode)
6207 case V16QImode:
6208 case V8HImode:
6209 case V4SFmode:
6210 case V4SImode:
6211 case V2DFmode:
6212 case V2DImode:
6213 case V1TImode:
6214 case TImode:
6215 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6216 TImode is not a vector mode, if we want to use the VSX registers to
6217 move it around, we need to restrict ourselves to reg+reg
6218 addressing. */
6219 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6220 return false;
6221 break;
6223 case V4HImode:
6224 case V2SImode:
6225 case V1DImode:
6226 case V2SFmode:
6227 /* Paired vector modes. Only reg+reg addressing is valid. */
6228 if (TARGET_PAIRED_FLOAT)
6229 return false;
6230 break;
6232 case SDmode:
6233 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6234 addressing for the LFIWZX and STFIWX instructions. */
6235 if (TARGET_NO_SDMODE_STACK)
6236 return false;
6237 break;
6239 default:
6240 break;
6243 return true;
6246 static bool
6247 virtual_stack_registers_memory_p (rtx op)
6249 int regnum;
6251 if (GET_CODE (op) == REG)
6252 regnum = REGNO (op);
6254 else if (GET_CODE (op) == PLUS
6255 && GET_CODE (XEXP (op, 0)) == REG
6256 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6257 regnum = REGNO (XEXP (op, 0));
6259 else
6260 return false;
6262 return (regnum >= FIRST_VIRTUAL_REGISTER
6263 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6266 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6267 is known to not straddle a 32k boundary. */
6269 static bool
6270 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6271 enum machine_mode mode)
6273 tree decl, type;
6274 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6276 if (GET_CODE (op) != SYMBOL_REF)
6277 return false;
6279 dsize = GET_MODE_SIZE (mode);
6280 decl = SYMBOL_REF_DECL (op);
6281 if (!decl)
6283 if (dsize == 0)
6284 return false;
6286 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6287 replacing memory addresses with an anchor plus offset. We
6288 could find the decl by rummaging around in the block->objects
6289 VEC for the given offset but that seems like too much work. */
6290 dalign = BITS_PER_UNIT;
6291 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6292 && SYMBOL_REF_ANCHOR_P (op)
6293 && SYMBOL_REF_BLOCK (op) != NULL)
6295 struct object_block *block = SYMBOL_REF_BLOCK (op);
6297 dalign = block->alignment;
6298 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6300 else if (CONSTANT_POOL_ADDRESS_P (op))
6302 /* It would be nice to have get_pool_align().. */
6303 enum machine_mode cmode = get_pool_mode (op);
6305 dalign = GET_MODE_ALIGNMENT (cmode);
6308 else if (DECL_P (decl))
6310 dalign = DECL_ALIGN (decl);
6312 if (dsize == 0)
6314 /* Allow BLKmode when the entire object is known to not
6315 cross a 32k boundary. */
6316 if (!DECL_SIZE_UNIT (decl))
6317 return false;
6319 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6320 return false;
6322 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6323 if (dsize > 32768)
6324 return false;
6326 return dalign / BITS_PER_UNIT >= dsize;
6329 else
6331 type = TREE_TYPE (decl);
6333 dalign = TYPE_ALIGN (type);
6334 if (CONSTANT_CLASS_P (decl))
6335 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6336 else
6337 dalign = DATA_ALIGNMENT (decl, dalign);
6339 if (dsize == 0)
6341 /* BLKmode, check the entire object. */
6342 if (TREE_CODE (decl) == STRING_CST)
6343 dsize = TREE_STRING_LENGTH (decl);
6344 else if (TYPE_SIZE_UNIT (type)
6345 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6346 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6347 else
6348 return false;
6349 if (dsize > 32768)
6350 return false;
6352 return dalign / BITS_PER_UNIT >= dsize;
6356 /* Find how many bits of the alignment we know for this access. */
6357 mask = dalign / BITS_PER_UNIT - 1;
6358 lsb = offset & -offset;
6359 mask &= lsb - 1;
6360 dalign = mask + 1;
6362 return dalign >= dsize;
6365 static bool
6366 constant_pool_expr_p (rtx op)
6368 rtx base, offset;
6370 split_const (op, &base, &offset);
6371 return (GET_CODE (base) == SYMBOL_REF
6372 && CONSTANT_POOL_ADDRESS_P (base)
6373 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6376 static const_rtx tocrel_base, tocrel_offset;
6378 /* Return true if OP is a toc pointer relative address (the output
6379 of create_TOC_reference). If STRICT, do not match high part or
6380 non-split -mcmodel=large/medium toc pointer relative addresses. */
6382 bool
6383 toc_relative_expr_p (const_rtx op, bool strict)
6385 if (!TARGET_TOC)
6386 return false;
6388 if (TARGET_CMODEL != CMODEL_SMALL)
6390 /* Only match the low part. */
6391 if (GET_CODE (op) == LO_SUM
6392 && REG_P (XEXP (op, 0))
6393 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6394 op = XEXP (op, 1);
6395 else if (strict)
6396 return false;
6399 tocrel_base = op;
6400 tocrel_offset = const0_rtx;
6401 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6403 tocrel_base = XEXP (op, 0);
6404 tocrel_offset = XEXP (op, 1);
6407 return (GET_CODE (tocrel_base) == UNSPEC
6408 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6411 /* Return true if X is a constant pool address, and also for cmodel=medium
6412 if X is a toc-relative address known to be offsettable within MODE. */
6414 bool
6415 legitimate_constant_pool_address_p (const_rtx x, enum machine_mode mode,
6416 bool strict)
6418 return (toc_relative_expr_p (x, strict)
6419 && (TARGET_CMODEL != CMODEL_MEDIUM
6420 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6421 || mode == QImode
6422 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6423 INTVAL (tocrel_offset), mode)));
6426 static bool
6427 legitimate_small_data_p (enum machine_mode mode, rtx x)
6429 return (DEFAULT_ABI == ABI_V4
6430 && !flag_pic && !TARGET_TOC
6431 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6432 && small_data_operand (x, mode));
6435 /* SPE offset addressing is limited to 5-bits worth of double words. */
6436 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6438 bool
6439 rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
6440 bool strict, bool worst_case)
6442 unsigned HOST_WIDE_INT offset;
6443 unsigned int extra;
6445 if (GET_CODE (x) != PLUS)
6446 return false;
6447 if (!REG_P (XEXP (x, 0)))
6448 return false;
6449 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6450 return false;
6451 if (!reg_offset_addressing_ok_p (mode))
6452 return virtual_stack_registers_memory_p (x);
6453 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6454 return true;
6455 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6456 return false;
6458 offset = INTVAL (XEXP (x, 1));
6459 extra = 0;
6460 switch (mode)
6462 case V4HImode:
6463 case V2SImode:
6464 case V1DImode:
6465 case V2SFmode:
6466 /* SPE vector modes. */
6467 return SPE_CONST_OFFSET_OK (offset);
6469 case DFmode:
6470 case DDmode:
6471 case DImode:
6472 /* On e500v2, we may have:
6474 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6476 Which gets addressed with evldd instructions. */
6477 if (TARGET_E500_DOUBLE)
6478 return SPE_CONST_OFFSET_OK (offset);
6480 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6481 addressing. */
6482 if (VECTOR_MEM_VSX_P (mode))
6483 return false;
6485 if (!worst_case)
6486 break;
6487 if (!TARGET_POWERPC64)
6488 extra = 4;
6489 else if (offset & 3)
6490 return false;
6491 break;
6493 case TFmode:
6494 if (TARGET_E500_DOUBLE)
6495 return (SPE_CONST_OFFSET_OK (offset)
6496 && SPE_CONST_OFFSET_OK (offset + 8));
6497 /* fall through */
6499 case TDmode:
6500 case TImode:
6501 case PTImode:
6502 extra = 8;
6503 if (!worst_case)
6504 break;
6505 if (!TARGET_POWERPC64)
6506 extra = 12;
6507 else if (offset & 3)
6508 return false;
6509 break;
6511 default:
6512 break;
6515 offset += 0x8000;
6516 return offset < 0x10000 - extra;
6519 bool
6520 legitimate_indexed_address_p (rtx x, int strict)
6522 rtx op0, op1;
6524 if (GET_CODE (x) != PLUS)
6525 return false;
6527 op0 = XEXP (x, 0);
6528 op1 = XEXP (x, 1);
6530 /* Recognize the rtl generated by reload which we know will later be
6531 replaced with proper base and index regs. */
6532 if (!strict
6533 && reload_in_progress
6534 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6535 && REG_P (op1))
6536 return true;
6538 return (REG_P (op0) && REG_P (op1)
6539 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6540 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6541 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6542 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6545 bool
6546 avoiding_indexed_address_p (enum machine_mode mode)
6548 /* Avoid indexed addressing for modes that have non-indexed
6549 load/store instruction forms. */
6550 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6553 bool
6554 legitimate_indirect_address_p (rtx x, int strict)
6556 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6559 bool
6560 macho_lo_sum_memory_operand (rtx x, enum machine_mode mode)
6562 if (!TARGET_MACHO || !flag_pic
6563 || mode != SImode || GET_CODE (x) != MEM)
6564 return false;
6565 x = XEXP (x, 0);
6567 if (GET_CODE (x) != LO_SUM)
6568 return false;
6569 if (GET_CODE (XEXP (x, 0)) != REG)
6570 return false;
6571 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6572 return false;
6573 x = XEXP (x, 1);
6575 return CONSTANT_P (x);
6578 static bool
6579 legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
6581 if (GET_CODE (x) != LO_SUM)
6582 return false;
6583 if (GET_CODE (XEXP (x, 0)) != REG)
6584 return false;
6585 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6586 return false;
6587 /* Restrict addressing for DI because of our SUBREG hackery. */
6588 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6589 return false;
6590 x = XEXP (x, 1);
6592 if (TARGET_ELF || TARGET_MACHO)
6594 bool large_toc_ok;
6596 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6597 return false;
6598 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6599 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6600 recognizes some LO_SUM addresses as valid although this
6601 function says opposite. In most cases, LRA through different
6602 transformations can generate correct code for address reloads.
6603 It can not manage only some LO_SUM cases. So we need to add
6604 code analogous to one in rs6000_legitimize_reload_address for
6605 LOW_SUM here saying that some addresses are still valid. */
6606 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6607 && small_toc_ref (x, VOIDmode));
6608 if (TARGET_TOC && ! large_toc_ok)
6609 return false;
6610 if (GET_MODE_NUNITS (mode) != 1)
6611 return false;
6612 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6613 && !(/* ??? Assume floating point reg based on mode? */
6614 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6615 && (mode == DFmode || mode == DDmode)))
6616 return false;
6618 return CONSTANT_P (x) || large_toc_ok;
6621 return false;
6625 /* Try machine-dependent ways of modifying an illegitimate address
6626 to be legitimate. If we find one, return the new, valid address.
6627 This is used from only one place: `memory_address' in explow.c.
6629 OLDX is the address as it was before break_out_memory_refs was
6630 called. In some cases it is useful to look at this to decide what
6631 needs to be done.
6633 It is always safe for this function to do nothing. It exists to
6634 recognize opportunities to optimize the output.
6636 On RS/6000, first check for the sum of a register with a constant
6637 integer that is out of range. If so, generate code to add the
6638 constant with the low-order 16 bits masked to the register and force
6639 this result into another register (this can be done with `cau').
6640 Then generate an address of REG+(CONST&0xffff), allowing for the
6641 possibility of bit 16 being a one.
6643 Then check for the sum of a register and something not constant, try to
6644 load the other things into a register and return the sum. */
6646 static rtx
6647 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6648 enum machine_mode mode)
6650 unsigned int extra;
6652 if (!reg_offset_addressing_ok_p (mode))
6654 if (virtual_stack_registers_memory_p (x))
6655 return x;
6657 /* In theory we should not be seeing addresses of the form reg+0,
6658 but just in case it is generated, optimize it away. */
6659 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6660 return force_reg (Pmode, XEXP (x, 0));
6662 /* For TImode with load/store quad, restrict addresses to just a single
6663 pointer, so it works with both GPRs and VSX registers. */
6664 /* Make sure both operands are registers. */
6665 else if (GET_CODE (x) == PLUS
6666 && (mode != TImode || !TARGET_QUAD_MEMORY))
6667 return gen_rtx_PLUS (Pmode,
6668 force_reg (Pmode, XEXP (x, 0)),
6669 force_reg (Pmode, XEXP (x, 1)));
6670 else
6671 return force_reg (Pmode, x);
6673 if (GET_CODE (x) == SYMBOL_REF)
6675 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6676 if (model != 0)
6677 return rs6000_legitimize_tls_address (x, model);
6680 extra = 0;
6681 switch (mode)
6683 case TFmode:
6684 case TDmode:
6685 case TImode:
6686 case PTImode:
6687 /* As in legitimate_offset_address_p we do not assume
6688 worst-case. The mode here is just a hint as to the registers
6689 used. A TImode is usually in gprs, but may actually be in
6690 fprs. Leave worst-case scenario for reload to handle via
6691 insn constraints. PTImode is only GPRs. */
6692 extra = 8;
6693 break;
6694 default:
6695 break;
6698 if (GET_CODE (x) == PLUS
6699 && GET_CODE (XEXP (x, 0)) == REG
6700 && GET_CODE (XEXP (x, 1)) == CONST_INT
6701 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6702 >= 0x10000 - extra)
6703 && !(SPE_VECTOR_MODE (mode)
6704 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6706 HOST_WIDE_INT high_int, low_int;
6707 rtx sum;
6708 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6709 if (low_int >= 0x8000 - extra)
6710 low_int = 0;
6711 high_int = INTVAL (XEXP (x, 1)) - low_int;
6712 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6713 GEN_INT (high_int)), 0);
6714 return plus_constant (Pmode, sum, low_int);
6716 else if (GET_CODE (x) == PLUS
6717 && GET_CODE (XEXP (x, 0)) == REG
6718 && GET_CODE (XEXP (x, 1)) != CONST_INT
6719 && GET_MODE_NUNITS (mode) == 1
6720 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6721 || (/* ??? Assume floating point reg based on mode? */
6722 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6723 && (mode == DFmode || mode == DDmode)))
6724 && !avoiding_indexed_address_p (mode))
6726 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6727 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6729 else if (SPE_VECTOR_MODE (mode)
6730 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6732 if (mode == DImode)
6733 return x;
6734 /* We accept [reg + reg] and [reg + OFFSET]. */
6736 if (GET_CODE (x) == PLUS)
6738 rtx op1 = XEXP (x, 0);
6739 rtx op2 = XEXP (x, 1);
6740 rtx y;
6742 op1 = force_reg (Pmode, op1);
6744 if (GET_CODE (op2) != REG
6745 && (GET_CODE (op2) != CONST_INT
6746 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6747 || (GET_MODE_SIZE (mode) > 8
6748 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6749 op2 = force_reg (Pmode, op2);
6751 /* We can't always do [reg + reg] for these, because [reg +
6752 reg + offset] is not a legitimate addressing mode. */
6753 y = gen_rtx_PLUS (Pmode, op1, op2);
6755 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6756 return force_reg (Pmode, y);
6757 else
6758 return y;
6761 return force_reg (Pmode, x);
6763 else if ((TARGET_ELF
6764 #if TARGET_MACHO
6765 || !MACHO_DYNAMIC_NO_PIC_P
6766 #endif
6768 && TARGET_32BIT
6769 && TARGET_NO_TOC
6770 && ! flag_pic
6771 && GET_CODE (x) != CONST_INT
6772 && GET_CODE (x) != CONST_DOUBLE
6773 && CONSTANT_P (x)
6774 && GET_MODE_NUNITS (mode) == 1
6775 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6776 || (/* ??? Assume floating point reg based on mode? */
6777 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6778 && (mode == DFmode || mode == DDmode))))
6780 rtx reg = gen_reg_rtx (Pmode);
6781 if (TARGET_ELF)
6782 emit_insn (gen_elf_high (reg, x));
6783 else
6784 emit_insn (gen_macho_high (reg, x));
6785 return gen_rtx_LO_SUM (Pmode, reg, x);
6787 else if (TARGET_TOC
6788 && GET_CODE (x) == SYMBOL_REF
6789 && constant_pool_expr_p (x)
6790 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6791 return create_TOC_reference (x, NULL_RTX);
6792 else
6793 return x;
6796 /* Debug version of rs6000_legitimize_address. */
6797 static rtx
6798 rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
6800 rtx ret;
6801 rtx insns;
6803 start_sequence ();
6804 ret = rs6000_legitimize_address (x, oldx, mode);
6805 insns = get_insns ();
6806 end_sequence ();
6808 if (ret != x)
6810 fprintf (stderr,
6811 "\nrs6000_legitimize_address: mode %s, old code %s, "
6812 "new code %s, modified\n",
6813 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6814 GET_RTX_NAME (GET_CODE (ret)));
6816 fprintf (stderr, "Original address:\n");
6817 debug_rtx (x);
6819 fprintf (stderr, "oldx:\n");
6820 debug_rtx (oldx);
6822 fprintf (stderr, "New address:\n");
6823 debug_rtx (ret);
6825 if (insns)
6827 fprintf (stderr, "Insns added:\n");
6828 debug_rtx_list (insns, 20);
6831 else
6833 fprintf (stderr,
6834 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
6835 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
6837 debug_rtx (x);
6840 if (insns)
6841 emit_insn (insns);
6843 return ret;
6846 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6847 We need to emit DTP-relative relocations. */
6849 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6850 static void
6851 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
6853 switch (size)
6855 case 4:
6856 fputs ("\t.long\t", file);
6857 break;
6858 case 8:
6859 fputs (DOUBLE_INT_ASM_OP, file);
6860 break;
6861 default:
6862 gcc_unreachable ();
6864 output_addr_const (file, x);
6865 fputs ("@dtprel+0x8000", file);
6868 /* In the name of slightly smaller debug output, and to cater to
6869 general assembler lossage, recognize various UNSPEC sequences
6870 and turn them back into a direct symbol reference. */
6872 static rtx
6873 rs6000_delegitimize_address (rtx orig_x)
6875 rtx x, y, offset;
6877 orig_x = delegitimize_mem_from_attrs (orig_x);
6878 x = orig_x;
6879 if (MEM_P (x))
6880 x = XEXP (x, 0);
6882 y = x;
6883 if (TARGET_CMODEL != CMODEL_SMALL
6884 && GET_CODE (y) == LO_SUM)
6885 y = XEXP (y, 1);
6887 offset = NULL_RTX;
6888 if (GET_CODE (y) == PLUS
6889 && GET_MODE (y) == Pmode
6890 && CONST_INT_P (XEXP (y, 1)))
6892 offset = XEXP (y, 1);
6893 y = XEXP (y, 0);
6896 if (GET_CODE (y) == UNSPEC
6897 && XINT (y, 1) == UNSPEC_TOCREL)
6899 #ifdef ENABLE_CHECKING
6900 if (REG_P (XVECEXP (y, 0, 1))
6901 && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
6903 /* All good. */
6905 else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
6907 /* Weirdness alert. df_note_compute can replace r2 with a
6908 debug_expr when this unspec is in a debug_insn.
6909 Seen in gcc.dg/pr51957-1.c */
6911 else
6913 debug_rtx (orig_x);
6914 abort ();
6916 #endif
6917 y = XVECEXP (y, 0, 0);
6919 #ifdef HAVE_AS_TLS
6920 /* Do not associate thread-local symbols with the original
6921 constant pool symbol. */
6922 if (TARGET_XCOFF
6923 && GET_CODE (y) == SYMBOL_REF
6924 && CONSTANT_POOL_ADDRESS_P (y)
6925 && SYMBOL_REF_TLS_MODEL (get_pool_constant (y)) >= TLS_MODEL_REAL)
6926 return orig_x;
6927 #endif
6929 if (offset != NULL_RTX)
6930 y = gen_rtx_PLUS (Pmode, y, offset);
6931 if (!MEM_P (orig_x))
6932 return y;
6933 else
6934 return replace_equiv_address_nv (orig_x, y);
6937 if (TARGET_MACHO
6938 && GET_CODE (orig_x) == LO_SUM
6939 && GET_CODE (XEXP (orig_x, 1)) == CONST)
6941 y = XEXP (XEXP (orig_x, 1), 0);
6942 if (GET_CODE (y) == UNSPEC
6943 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
6944 return XVECEXP (y, 0, 0);
6947 return orig_x;
6950 /* Return true if X shouldn't be emitted into the debug info.
6951 The linker doesn't like .toc section references from
6952 .debug_* sections, so reject .toc section symbols. */
6954 static bool
6955 rs6000_const_not_ok_for_debug_p (rtx x)
6957 if (GET_CODE (x) == SYMBOL_REF
6958 && CONSTANT_POOL_ADDRESS_P (x))
6960 rtx c = get_pool_constant (x);
6961 enum machine_mode cmode = get_pool_mode (x);
6962 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
6963 return true;
6966 return false;
6969 /* Construct the SYMBOL_REF for the tls_get_addr function. */
6971 static GTY(()) rtx rs6000_tls_symbol;
6972 static rtx
6973 rs6000_tls_get_addr (void)
6975 if (!rs6000_tls_symbol)
6976 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
6978 return rs6000_tls_symbol;
6981 /* Construct the SYMBOL_REF for TLS GOT references. */
6983 static GTY(()) rtx rs6000_got_symbol;
6984 static rtx
6985 rs6000_got_sym (void)
6987 if (!rs6000_got_symbol)
6989 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
6990 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
6991 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
6994 return rs6000_got_symbol;
6997 /* AIX Thread-Local Address support. */
6999 static rtx
7000 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7002 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7003 const char *name;
7004 char *tlsname;
7006 name = XSTR (addr, 0);
7007 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7008 or the symbol will be in TLS private data section. */
7009 if (name[strlen (name) - 1] != ']'
7010 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7011 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7013 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7014 strcpy (tlsname, name);
7015 strcat (tlsname,
7016 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7017 tlsaddr = copy_rtx (addr);
7018 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7020 else
7021 tlsaddr = addr;
7023 /* Place addr into TOC constant pool. */
7024 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7026 /* Output the TOC entry and create the MEM referencing the value. */
7027 if (constant_pool_expr_p (XEXP (sym, 0))
7028 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7030 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7031 mem = gen_const_mem (Pmode, tocref);
7032 set_mem_alias_set (mem, get_TOC_alias_set ());
7034 else
7035 return sym;
7037 /* Use global-dynamic for local-dynamic. */
7038 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7039 || model == TLS_MODEL_LOCAL_DYNAMIC)
7041 /* Create new TOC reference for @m symbol. */
7042 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7043 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7044 strcpy (tlsname, "*LCM");
7045 strcat (tlsname, name + 3);
7046 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7047 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7048 tocref = create_TOC_reference (modaddr, NULL_RTX);
7049 rtx modmem = gen_const_mem (Pmode, tocref);
7050 set_mem_alias_set (modmem, get_TOC_alias_set ());
7052 rtx modreg = gen_reg_rtx (Pmode);
7053 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7055 tmpreg = gen_reg_rtx (Pmode);
7056 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7058 dest = gen_reg_rtx (Pmode);
7059 if (TARGET_32BIT)
7060 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7061 else
7062 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7063 return dest;
7065 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7066 else if (TARGET_32BIT)
7068 tlsreg = gen_reg_rtx (SImode);
7069 emit_insn (gen_tls_get_tpointer (tlsreg));
7071 else
7072 tlsreg = gen_rtx_REG (DImode, 13);
7074 /* Load the TOC value into temporary register. */
7075 tmpreg = gen_reg_rtx (Pmode);
7076 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7077 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7078 gen_rtx_MINUS (Pmode, addr, tlsreg));
7080 /* Add TOC symbol value to TLS pointer. */
7081 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7083 return dest;
7086 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7087 this (thread-local) address. */
7089 static rtx
7090 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7092 rtx dest, insn;
7094 if (TARGET_XCOFF)
7095 return rs6000_legitimize_tls_address_aix (addr, model);
7097 dest = gen_reg_rtx (Pmode);
7098 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7100 rtx tlsreg;
7102 if (TARGET_64BIT)
7104 tlsreg = gen_rtx_REG (Pmode, 13);
7105 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7107 else
7109 tlsreg = gen_rtx_REG (Pmode, 2);
7110 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7112 emit_insn (insn);
7114 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7116 rtx tlsreg, tmp;
7118 tmp = gen_reg_rtx (Pmode);
7119 if (TARGET_64BIT)
7121 tlsreg = gen_rtx_REG (Pmode, 13);
7122 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7124 else
7126 tlsreg = gen_rtx_REG (Pmode, 2);
7127 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7129 emit_insn (insn);
7130 if (TARGET_64BIT)
7131 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7132 else
7133 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7134 emit_insn (insn);
7136 else
7138 rtx r3, got, tga, tmp1, tmp2, call_insn;
7140 /* We currently use relocations like @got@tlsgd for tls, which
7141 means the linker will handle allocation of tls entries, placing
7142 them in the .got section. So use a pointer to the .got section,
7143 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7144 or to secondary GOT sections used by 32-bit -fPIC. */
7145 if (TARGET_64BIT)
7146 got = gen_rtx_REG (Pmode, 2);
7147 else
7149 if (flag_pic == 1)
7150 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7151 else
7153 rtx gsym = rs6000_got_sym ();
7154 got = gen_reg_rtx (Pmode);
7155 if (flag_pic == 0)
7156 rs6000_emit_move (got, gsym, Pmode);
7157 else
7159 rtx mem, lab, last;
7161 tmp1 = gen_reg_rtx (Pmode);
7162 tmp2 = gen_reg_rtx (Pmode);
7163 mem = gen_const_mem (Pmode, tmp1);
7164 lab = gen_label_rtx ();
7165 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7166 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7167 if (TARGET_LINK_STACK)
7168 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7169 emit_move_insn (tmp2, mem);
7170 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7171 set_unique_reg_note (last, REG_EQUAL, gsym);
7176 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7178 tga = rs6000_tls_get_addr ();
7179 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7180 1, const0_rtx, Pmode);
7182 r3 = gen_rtx_REG (Pmode, 3);
7183 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7185 if (TARGET_64BIT)
7186 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7187 else
7188 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7190 else if (DEFAULT_ABI == ABI_V4)
7191 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7192 else
7193 gcc_unreachable ();
7194 call_insn = last_call_insn ();
7195 PATTERN (call_insn) = insn;
7196 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7197 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7198 pic_offset_table_rtx);
7200 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7202 tga = rs6000_tls_get_addr ();
7203 tmp1 = gen_reg_rtx (Pmode);
7204 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7205 1, const0_rtx, Pmode);
7207 r3 = gen_rtx_REG (Pmode, 3);
7208 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7210 if (TARGET_64BIT)
7211 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7212 else
7213 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7215 else if (DEFAULT_ABI == ABI_V4)
7216 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7217 else
7218 gcc_unreachable ();
7219 call_insn = last_call_insn ();
7220 PATTERN (call_insn) = insn;
7221 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7222 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7223 pic_offset_table_rtx);
7225 if (rs6000_tls_size == 16)
7227 if (TARGET_64BIT)
7228 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7229 else
7230 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7232 else if (rs6000_tls_size == 32)
7234 tmp2 = gen_reg_rtx (Pmode);
7235 if (TARGET_64BIT)
7236 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7237 else
7238 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7239 emit_insn (insn);
7240 if (TARGET_64BIT)
7241 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7242 else
7243 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7245 else
7247 tmp2 = gen_reg_rtx (Pmode);
7248 if (TARGET_64BIT)
7249 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7250 else
7251 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7252 emit_insn (insn);
7253 insn = gen_rtx_SET (Pmode, dest,
7254 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7256 emit_insn (insn);
7258 else
7260 /* IE, or 64-bit offset LE. */
7261 tmp2 = gen_reg_rtx (Pmode);
7262 if (TARGET_64BIT)
7263 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7264 else
7265 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7266 emit_insn (insn);
7267 if (TARGET_64BIT)
7268 insn = gen_tls_tls_64 (dest, tmp2, addr);
7269 else
7270 insn = gen_tls_tls_32 (dest, tmp2, addr);
7271 emit_insn (insn);
7275 return dest;
7278 /* Return 1 if X contains a thread-local symbol. */
7280 static bool
7281 rs6000_tls_referenced_p (rtx x)
7283 if (! TARGET_HAVE_TLS)
7284 return false;
7286 return for_each_rtx (&x, &rs6000_tls_symbol_ref_1, 0);
7289 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7291 static bool
7292 rs6000_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7294 if (GET_CODE (x) == HIGH
7295 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7296 return true;
7298 /* A TLS symbol in the TOC cannot contain a sum. */
7299 if (GET_CODE (x) == CONST
7300 && GET_CODE (XEXP (x, 0)) == PLUS
7301 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7302 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7303 return true;
7305 /* Do not place an ELF TLS symbol in the constant pool. */
7306 return TARGET_ELF && rs6000_tls_referenced_p (x);
7309 /* Return 1 if *X is a thread-local symbol. This is the same as
7310 rs6000_tls_symbol_ref except for the type of the unused argument. */
7312 static int
7313 rs6000_tls_symbol_ref_1 (rtx *x, void *data ATTRIBUTE_UNUSED)
7315 return RS6000_SYMBOL_REF_TLS_P (*x);
7318 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7319 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7320 can be addressed relative to the toc pointer. */
7322 static bool
7323 use_toc_relative_ref (rtx sym)
7325 return ((constant_pool_expr_p (sym)
7326 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7327 get_pool_mode (sym)))
7328 || (TARGET_CMODEL == CMODEL_MEDIUM
7329 && SYMBOL_REF_LOCAL_P (sym)));
7332 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7333 replace the input X, or the original X if no replacement is called for.
7334 The output parameter *WIN is 1 if the calling macro should goto WIN,
7335 0 if it should not.
7337 For RS/6000, we wish to handle large displacements off a base
7338 register by splitting the addend across an addiu/addis and the mem insn.
7339 This cuts number of extra insns needed from 3 to 1.
7341 On Darwin, we use this to generate code for floating point constants.
7342 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7343 The Darwin code is inside #if TARGET_MACHO because only then are the
7344 machopic_* functions defined. */
7345 static rtx
7346 rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
7347 int opnum, int type,
7348 int ind_levels ATTRIBUTE_UNUSED, int *win)
7350 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7352 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7353 DFmode/DImode MEM. */
7354 if (reg_offset_p
7355 && opnum == 1
7356 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7357 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7358 reg_offset_p = false;
7360 /* We must recognize output that we have already generated ourselves. */
7361 if (GET_CODE (x) == PLUS
7362 && GET_CODE (XEXP (x, 0)) == PLUS
7363 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7364 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7365 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7367 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7368 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7369 opnum, (enum reload_type) type);
7370 *win = 1;
7371 return x;
7374 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7375 if (GET_CODE (x) == LO_SUM
7376 && GET_CODE (XEXP (x, 0)) == HIGH)
7378 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7379 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7380 opnum, (enum reload_type) type);
7381 *win = 1;
7382 return x;
7385 #if TARGET_MACHO
7386 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7387 && GET_CODE (x) == LO_SUM
7388 && GET_CODE (XEXP (x, 0)) == PLUS
7389 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7390 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7391 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7392 && machopic_operand_p (XEXP (x, 1)))
7394 /* Result of previous invocation of this function on Darwin
7395 floating point constant. */
7396 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7397 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7398 opnum, (enum reload_type) type);
7399 *win = 1;
7400 return x;
7402 #endif
7404 if (TARGET_CMODEL != CMODEL_SMALL
7405 && reg_offset_p
7406 && small_toc_ref (x, VOIDmode))
7408 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7409 x = gen_rtx_LO_SUM (Pmode, hi, x);
7410 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7411 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7412 opnum, (enum reload_type) type);
7413 *win = 1;
7414 return x;
7417 if (GET_CODE (x) == PLUS
7418 && GET_CODE (XEXP (x, 0)) == REG
7419 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7420 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7421 && GET_CODE (XEXP (x, 1)) == CONST_INT
7422 && reg_offset_p
7423 && !SPE_VECTOR_MODE (mode)
7424 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7425 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7427 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7428 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7429 HOST_WIDE_INT high
7430 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7432 /* Check for 32-bit overflow. */
7433 if (high + low != val)
7435 *win = 0;
7436 return x;
7439 /* Reload the high part into a base reg; leave the low part
7440 in the mem directly. */
7442 x = gen_rtx_PLUS (GET_MODE (x),
7443 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7444 GEN_INT (high)),
7445 GEN_INT (low));
7447 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7448 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7449 opnum, (enum reload_type) type);
7450 *win = 1;
7451 return x;
7454 if (GET_CODE (x) == SYMBOL_REF
7455 && reg_offset_p
7456 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7457 && !SPE_VECTOR_MODE (mode)
7458 #if TARGET_MACHO
7459 && DEFAULT_ABI == ABI_DARWIN
7460 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7461 && machopic_symbol_defined_p (x)
7462 #else
7463 && DEFAULT_ABI == ABI_V4
7464 && !flag_pic
7465 #endif
7466 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7467 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7468 without fprs.
7469 ??? Assume floating point reg based on mode? This assumption is
7470 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7471 where reload ends up doing a DFmode load of a constant from
7472 mem using two gprs. Unfortunately, at this point reload
7473 hasn't yet selected regs so poking around in reload data
7474 won't help and even if we could figure out the regs reliably,
7475 we'd still want to allow this transformation when the mem is
7476 naturally aligned. Since we say the address is good here, we
7477 can't disable offsets from LO_SUMs in mem_operand_gpr.
7478 FIXME: Allow offset from lo_sum for other modes too, when
7479 mem is sufficiently aligned. */
7480 && mode != TFmode
7481 && mode != TDmode
7482 && (mode != TImode || !TARGET_VSX_TIMODE)
7483 && mode != PTImode
7484 && (mode != DImode || TARGET_POWERPC64)
7485 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7486 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7488 #if TARGET_MACHO
7489 if (flag_pic)
7491 rtx offset = machopic_gen_offset (x);
7492 x = gen_rtx_LO_SUM (GET_MODE (x),
7493 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7494 gen_rtx_HIGH (Pmode, offset)), offset);
7496 else
7497 #endif
7498 x = gen_rtx_LO_SUM (GET_MODE (x),
7499 gen_rtx_HIGH (Pmode, x), x);
7501 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7502 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7503 opnum, (enum reload_type) type);
7504 *win = 1;
7505 return x;
7508 /* Reload an offset address wrapped by an AND that represents the
7509 masking of the lower bits. Strip the outer AND and let reload
7510 convert the offset address into an indirect address. For VSX,
7511 force reload to create the address with an AND in a separate
7512 register, because we can't guarantee an altivec register will
7513 be used. */
7514 if (VECTOR_MEM_ALTIVEC_P (mode)
7515 && GET_CODE (x) == AND
7516 && GET_CODE (XEXP (x, 0)) == PLUS
7517 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7518 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7519 && GET_CODE (XEXP (x, 1)) == CONST_INT
7520 && INTVAL (XEXP (x, 1)) == -16)
7522 x = XEXP (x, 0);
7523 *win = 1;
7524 return x;
7527 if (TARGET_TOC
7528 && reg_offset_p
7529 && GET_CODE (x) == SYMBOL_REF
7530 && use_toc_relative_ref (x))
7532 x = create_TOC_reference (x, NULL_RTX);
7533 if (TARGET_CMODEL != CMODEL_SMALL)
7534 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7535 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7536 opnum, (enum reload_type) type);
7537 *win = 1;
7538 return x;
7540 *win = 0;
7541 return x;
7544 /* Debug version of rs6000_legitimize_reload_address. */
7545 static rtx
7546 rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
7547 int opnum, int type,
7548 int ind_levels, int *win)
7550 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7551 ind_levels, win);
7552 fprintf (stderr,
7553 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7554 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7555 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7556 debug_rtx (x);
7558 if (x == ret)
7559 fprintf (stderr, "Same address returned\n");
7560 else if (!ret)
7561 fprintf (stderr, "NULL returned\n");
7562 else
7564 fprintf (stderr, "New address:\n");
7565 debug_rtx (ret);
7568 return ret;
7571 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7572 that is a valid memory address for an instruction.
7573 The MODE argument is the machine mode for the MEM expression
7574 that wants to use this address.
7576 On the RS/6000, there are four valid address: a SYMBOL_REF that
7577 refers to a constant pool entry of an address (or the sum of it
7578 plus a constant), a short (16-bit signed) constant plus a register,
7579 the sum of two registers, or a register indirect, possibly with an
7580 auto-increment. For DFmode, DDmode and DImode with a constant plus
7581 register, we must ensure that both words are addressable or PowerPC64
7582 with offset word aligned.
7584 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7585 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7586 because adjacent memory cells are accessed by adding word-sized offsets
7587 during assembly output. */
7588 static bool
7589 rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
7591 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7593 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7594 if (VECTOR_MEM_ALTIVEC_P (mode)
7595 && GET_CODE (x) == AND
7596 && GET_CODE (XEXP (x, 1)) == CONST_INT
7597 && INTVAL (XEXP (x, 1)) == -16)
7598 x = XEXP (x, 0);
7600 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7601 return 0;
7602 if (legitimate_indirect_address_p (x, reg_ok_strict))
7603 return 1;
7604 if (TARGET_UPDATE
7605 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7606 && mode_supports_pre_incdec_p (mode)
7607 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7608 return 1;
7609 if (virtual_stack_registers_memory_p (x))
7610 return 1;
7611 if (reg_offset_p && legitimate_small_data_p (mode, x))
7612 return 1;
7613 if (reg_offset_p
7614 && legitimate_constant_pool_address_p (x, mode,
7615 reg_ok_strict || lra_in_progress))
7616 return 1;
7617 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7618 allow register indirect addresses. This will allow the values to go in
7619 either GPRs or VSX registers without reloading. The vector types would
7620 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7621 somewhat split, in that some uses are GPR based, and some VSX based. */
7622 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7623 return 0;
7624 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7625 if (! reg_ok_strict
7626 && reg_offset_p
7627 && GET_CODE (x) == PLUS
7628 && GET_CODE (XEXP (x, 0)) == REG
7629 && (XEXP (x, 0) == virtual_stack_vars_rtx
7630 || XEXP (x, 0) == arg_pointer_rtx)
7631 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7632 return 1;
7633 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7634 return 1;
7635 if (mode != TFmode
7636 && mode != TDmode
7637 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7638 || TARGET_POWERPC64
7639 || (mode != DFmode && mode != DDmode)
7640 || (TARGET_E500_DOUBLE && mode != DDmode))
7641 && (TARGET_POWERPC64 || mode != DImode)
7642 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7643 && mode != PTImode
7644 && !avoiding_indexed_address_p (mode)
7645 && legitimate_indexed_address_p (x, reg_ok_strict))
7646 return 1;
7647 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7648 && mode_supports_pre_modify_p (mode)
7649 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7650 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7651 reg_ok_strict, false)
7652 || (!avoiding_indexed_address_p (mode)
7653 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7654 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7655 return 1;
7656 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7657 return 1;
7658 return 0;
7661 /* Debug version of rs6000_legitimate_address_p. */
7662 static bool
7663 rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
7664 bool reg_ok_strict)
7666 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7667 fprintf (stderr,
7668 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7669 "strict = %d, reload = %s, code = %s\n",
7670 ret ? "true" : "false",
7671 GET_MODE_NAME (mode),
7672 reg_ok_strict,
7673 (reload_completed
7674 ? "after"
7675 : (reload_in_progress ? "progress" : "before")),
7676 GET_RTX_NAME (GET_CODE (x)));
7677 debug_rtx (x);
7679 return ret;
7682 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7684 static bool
7685 rs6000_mode_dependent_address_p (const_rtx addr,
7686 addr_space_t as ATTRIBUTE_UNUSED)
7688 return rs6000_mode_dependent_address_ptr (addr);
7691 /* Go to LABEL if ADDR (a legitimate address expression)
7692 has an effect that depends on the machine mode it is used for.
7694 On the RS/6000 this is true of all integral offsets (since AltiVec
7695 and VSX modes don't allow them) or is a pre-increment or decrement.
7697 ??? Except that due to conceptual problems in offsettable_address_p
7698 we can't really report the problems of integral offsets. So leave
7699 this assuming that the adjustable offset must be valid for the
7700 sub-words of a TFmode operand, which is what we had before. */
7702 static bool
7703 rs6000_mode_dependent_address (const_rtx addr)
7705 switch (GET_CODE (addr))
7707 case PLUS:
7708 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7709 is considered a legitimate address before reload, so there
7710 are no offset restrictions in that case. Note that this
7711 condition is safe in strict mode because any address involving
7712 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7713 been rejected as illegitimate. */
7714 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7715 && XEXP (addr, 0) != arg_pointer_rtx
7716 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7718 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7719 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7721 break;
7723 case LO_SUM:
7724 /* Anything in the constant pool is sufficiently aligned that
7725 all bytes have the same high part address. */
7726 return !legitimate_constant_pool_address_p (addr, QImode, false);
7728 /* Auto-increment cases are now treated generically in recog.c. */
7729 case PRE_MODIFY:
7730 return TARGET_UPDATE;
7732 /* AND is only allowed in Altivec loads. */
7733 case AND:
7734 return true;
7736 default:
7737 break;
7740 return false;
7743 /* Debug version of rs6000_mode_dependent_address. */
7744 static bool
7745 rs6000_debug_mode_dependent_address (const_rtx addr)
7747 bool ret = rs6000_mode_dependent_address (addr);
7749 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7750 ret ? "true" : "false");
7751 debug_rtx (addr);
7753 return ret;
7756 /* Implement FIND_BASE_TERM. */
7759 rs6000_find_base_term (rtx op)
7761 rtx base;
7763 base = op;
7764 if (GET_CODE (base) == CONST)
7765 base = XEXP (base, 0);
7766 if (GET_CODE (base) == PLUS)
7767 base = XEXP (base, 0);
7768 if (GET_CODE (base) == UNSPEC)
7769 switch (XINT (base, 1))
7771 case UNSPEC_TOCREL:
7772 case UNSPEC_MACHOPIC_OFFSET:
7773 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7774 for aliasing purposes. */
7775 return XVECEXP (base, 0, 0);
7778 return op;
7781 /* More elaborate version of recog's offsettable_memref_p predicate
7782 that works around the ??? note of rs6000_mode_dependent_address.
7783 In particular it accepts
7785 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7787 in 32-bit mode, that the recog predicate rejects. */
7789 static bool
7790 rs6000_offsettable_memref_p (rtx op, enum machine_mode reg_mode)
7792 bool worst_case;
7794 if (!MEM_P (op))
7795 return false;
7797 /* First mimic offsettable_memref_p. */
7798 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7799 return true;
7801 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7802 the latter predicate knows nothing about the mode of the memory
7803 reference and, therefore, assumes that it is the largest supported
7804 mode (TFmode). As a consequence, legitimate offsettable memory
7805 references are rejected. rs6000_legitimate_offset_address_p contains
7806 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7807 at least with a little bit of help here given that we know the
7808 actual registers used. */
7809 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7810 || GET_MODE_SIZE (reg_mode) == 4);
7811 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7812 true, worst_case);
7815 /* Change register usage conditional on target flags. */
7816 static void
7817 rs6000_conditional_register_usage (void)
7819 int i;
7821 if (TARGET_DEBUG_TARGET)
7822 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7824 /* Set MQ register fixed (already call_used) so that it will not be
7825 allocated. */
7826 fixed_regs[64] = 1;
7828 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7829 if (TARGET_64BIT)
7830 fixed_regs[13] = call_used_regs[13]
7831 = call_really_used_regs[13] = 1;
7833 /* Conditionally disable FPRs. */
7834 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7835 for (i = 32; i < 64; i++)
7836 fixed_regs[i] = call_used_regs[i]
7837 = call_really_used_regs[i] = 1;
7839 /* The TOC register is not killed across calls in a way that is
7840 visible to the compiler. */
7841 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7842 call_really_used_regs[2] = 0;
7844 if (DEFAULT_ABI == ABI_V4
7845 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7846 && flag_pic == 2)
7847 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7849 if (DEFAULT_ABI == ABI_V4
7850 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7851 && flag_pic == 1)
7852 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7853 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7854 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7856 if (DEFAULT_ABI == ABI_DARWIN
7857 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
7858 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7859 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7860 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7862 if (TARGET_TOC && TARGET_MINIMAL_TOC)
7863 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7864 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7866 if (TARGET_SPE)
7868 global_regs[SPEFSCR_REGNO] = 1;
7869 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
7870 registers in prologues and epilogues. We no longer use r14
7871 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
7872 pool for link-compatibility with older versions of GCC. Once
7873 "old" code has died out, we can return r14 to the allocation
7874 pool. */
7875 fixed_regs[14]
7876 = call_used_regs[14]
7877 = call_really_used_regs[14] = 1;
7880 if (!TARGET_ALTIVEC && !TARGET_VSX)
7882 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
7883 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7884 call_really_used_regs[VRSAVE_REGNO] = 1;
7887 if (TARGET_ALTIVEC || TARGET_VSX)
7888 global_regs[VSCR_REGNO] = 1;
7890 if (TARGET_ALTIVEC_ABI)
7892 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
7893 call_used_regs[i] = call_really_used_regs[i] = 1;
7895 /* AIX reserves VR20:31 in non-extended ABI mode. */
7896 if (TARGET_XCOFF)
7897 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
7898 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7903 /* Try to output insns to set TARGET equal to the constant C if it can
7904 be done in less than N insns. Do all computations in MODE.
7905 Returns the place where the output has been placed if it can be
7906 done and the insns have been emitted. If it would take more than N
7907 insns, zero is returned and no insns and emitted. */
7910 rs6000_emit_set_const (rtx dest, enum machine_mode mode,
7911 rtx source, int n ATTRIBUTE_UNUSED)
7913 rtx result, insn, set;
7914 HOST_WIDE_INT c0, c1;
7916 switch (mode)
7918 case QImode:
7919 case HImode:
7920 if (dest == NULL)
7921 dest = gen_reg_rtx (mode);
7922 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
7923 return dest;
7925 case SImode:
7926 result = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
7928 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (result),
7929 GEN_INT (INTVAL (source)
7930 & (~ (HOST_WIDE_INT) 0xffff))));
7931 emit_insn (gen_rtx_SET (VOIDmode, dest,
7932 gen_rtx_IOR (SImode, copy_rtx (result),
7933 GEN_INT (INTVAL (source) & 0xffff))));
7934 result = dest;
7935 break;
7937 case DImode:
7938 switch (GET_CODE (source))
7940 case CONST_INT:
7941 c0 = INTVAL (source);
7942 c1 = -(c0 < 0);
7943 break;
7945 default:
7946 gcc_unreachable ();
7949 result = rs6000_emit_set_long_const (dest, c0, c1);
7950 break;
7952 default:
7953 gcc_unreachable ();
7956 insn = get_last_insn ();
7957 set = single_set (insn);
7958 if (! CONSTANT_P (SET_SRC (set)))
7959 set_unique_reg_note (insn, REG_EQUAL, source);
7961 return result;
7964 /* Having failed to find a 3 insn sequence in rs6000_emit_set_const,
7965 fall back to a straight forward decomposition. We do this to avoid
7966 exponential run times encountered when looking for longer sequences
7967 with rs6000_emit_set_const. */
7968 static rtx
7969 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c1, HOST_WIDE_INT c2)
7971 if (!TARGET_POWERPC64)
7973 rtx operand1, operand2;
7975 operand1 = operand_subword_force (dest, WORDS_BIG_ENDIAN == 0,
7976 DImode);
7977 operand2 = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN != 0,
7978 DImode);
7979 emit_move_insn (operand1, GEN_INT (c1));
7980 emit_move_insn (operand2, GEN_INT (c2));
7982 else
7984 HOST_WIDE_INT ud1, ud2, ud3, ud4;
7986 ud1 = c1 & 0xffff;
7987 ud2 = (c1 & 0xffff0000) >> 16;
7988 c2 = c1 >> 32;
7989 ud3 = c2 & 0xffff;
7990 ud4 = (c2 & 0xffff0000) >> 16;
7992 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
7993 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
7994 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
7996 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
7997 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
7999 emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
8000 - 0x80000000));
8001 if (ud1 != 0)
8002 emit_move_insn (copy_rtx (dest),
8003 gen_rtx_IOR (DImode, copy_rtx (dest),
8004 GEN_INT (ud1)));
8006 else if (ud3 == 0 && ud4 == 0)
8008 gcc_assert (ud2 & 0x8000);
8009 emit_move_insn (dest, GEN_INT (((ud2 << 16) ^ 0x80000000)
8010 - 0x80000000));
8011 if (ud1 != 0)
8012 emit_move_insn (copy_rtx (dest),
8013 gen_rtx_IOR (DImode, copy_rtx (dest),
8014 GEN_INT (ud1)));
8015 emit_move_insn (copy_rtx (dest),
8016 gen_rtx_ZERO_EXTEND (DImode,
8017 gen_lowpart (SImode,
8018 copy_rtx (dest))));
8020 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8021 || (ud4 == 0 && ! (ud3 & 0x8000)))
8023 emit_move_insn (dest, GEN_INT (((ud3 << 16) ^ 0x80000000)
8024 - 0x80000000));
8025 if (ud2 != 0)
8026 emit_move_insn (copy_rtx (dest),
8027 gen_rtx_IOR (DImode, copy_rtx (dest),
8028 GEN_INT (ud2)));
8029 emit_move_insn (copy_rtx (dest),
8030 gen_rtx_ASHIFT (DImode, copy_rtx (dest),
8031 GEN_INT (16)));
8032 if (ud1 != 0)
8033 emit_move_insn (copy_rtx (dest),
8034 gen_rtx_IOR (DImode, copy_rtx (dest),
8035 GEN_INT (ud1)));
8037 else
8039 emit_move_insn (dest, GEN_INT (((ud4 << 16) ^ 0x80000000)
8040 - 0x80000000));
8041 if (ud3 != 0)
8042 emit_move_insn (copy_rtx (dest),
8043 gen_rtx_IOR (DImode, copy_rtx (dest),
8044 GEN_INT (ud3)));
8046 emit_move_insn (copy_rtx (dest),
8047 gen_rtx_ASHIFT (DImode, copy_rtx (dest),
8048 GEN_INT (32)));
8049 if (ud2 != 0)
8050 emit_move_insn (copy_rtx (dest),
8051 gen_rtx_IOR (DImode, copy_rtx (dest),
8052 GEN_INT (ud2 << 16)));
8053 if (ud1 != 0)
8054 emit_move_insn (copy_rtx (dest),
8055 gen_rtx_IOR (DImode, copy_rtx (dest),
8056 GEN_INT (ud1)));
8059 return dest;
8062 /* Helper for the following. Get rid of [r+r] memory refs
8063 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8065 static void
8066 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8068 if (reload_in_progress)
8069 return;
8071 if (GET_CODE (operands[0]) == MEM
8072 && GET_CODE (XEXP (operands[0], 0)) != REG
8073 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8074 GET_MODE (operands[0]), false))
8075 operands[0]
8076 = replace_equiv_address (operands[0],
8077 copy_addr_to_reg (XEXP (operands[0], 0)));
8079 if (GET_CODE (operands[1]) == MEM
8080 && GET_CODE (XEXP (operands[1], 0)) != REG
8081 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8082 GET_MODE (operands[1]), false))
8083 operands[1]
8084 = replace_equiv_address (operands[1],
8085 copy_addr_to_reg (XEXP (operands[1], 0)));
8088 /* Generate a vector of constants to permute MODE for a little-endian
8089 storage operation by swapping the two halves of a vector. */
8090 static rtvec
8091 rs6000_const_vec (enum machine_mode mode)
8093 int i, subparts;
8094 rtvec v;
8096 switch (mode)
8098 case V1TImode:
8099 subparts = 1;
8100 break;
8101 case V2DFmode:
8102 case V2DImode:
8103 subparts = 2;
8104 break;
8105 case V4SFmode:
8106 case V4SImode:
8107 subparts = 4;
8108 break;
8109 case V8HImode:
8110 subparts = 8;
8111 break;
8112 case V16QImode:
8113 subparts = 16;
8114 break;
8115 default:
8116 gcc_unreachable();
8119 v = rtvec_alloc (subparts);
8121 for (i = 0; i < subparts / 2; ++i)
8122 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8123 for (i = subparts / 2; i < subparts; ++i)
8124 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8126 return v;
8129 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8130 for a VSX load or store operation. */
8132 rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
8134 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8135 return gen_rtx_VEC_SELECT (mode, source, par);
8138 /* Emit a little-endian load from vector memory location SOURCE to VSX
8139 register DEST in mode MODE. The load is done with two permuting
8140 insn's that represent an lxvd2x and xxpermdi. */
8141 void
8142 rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
8144 rtx tmp, permute_mem, permute_reg;
8146 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8147 V1TImode). */
8148 if (mode == TImode || mode == V1TImode)
8150 mode = V2DImode;
8151 dest = gen_lowpart (V2DImode, dest);
8152 source = adjust_address (source, V2DImode, 0);
8155 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8156 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8157 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8158 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8159 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8162 /* Emit a little-endian store to vector memory location DEST from VSX
8163 register SOURCE in mode MODE. The store is done with two permuting
8164 insn's that represent an xxpermdi and an stxvd2x. */
8165 void
8166 rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
8168 rtx tmp, permute_src, permute_tmp;
8170 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8171 V1TImode). */
8172 if (mode == TImode || mode == V1TImode)
8174 mode = V2DImode;
8175 dest = adjust_address (dest, V2DImode, 0);
8176 source = gen_lowpart (V2DImode, source);
8179 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8180 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8181 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8182 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8183 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8186 /* Emit a sequence representing a little-endian VSX load or store,
8187 moving data from SOURCE to DEST in mode MODE. This is done
8188 separately from rs6000_emit_move to ensure it is called only
8189 during expand. LE VSX loads and stores introduced later are
8190 handled with a split. The expand-time RTL generation allows
8191 us to optimize away redundant pairs of register-permutes. */
8192 void
8193 rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
8195 gcc_assert (!BYTES_BIG_ENDIAN
8196 && VECTOR_MEM_VSX_P (mode)
8197 && !gpr_or_gpr_p (dest, source)
8198 && (MEM_P (source) ^ MEM_P (dest)));
8200 if (MEM_P (source))
8202 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8203 rs6000_emit_le_vsx_load (dest, source, mode);
8205 else
8207 if (!REG_P (source))
8208 source = force_reg (mode, source);
8209 rs6000_emit_le_vsx_store (dest, source, mode);
8213 /* Emit a move from SOURCE to DEST in mode MODE. */
8214 void
8215 rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
8217 rtx operands[2];
8218 operands[0] = dest;
8219 operands[1] = source;
8221 if (TARGET_DEBUG_ADDR)
8223 fprintf (stderr,
8224 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8225 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8226 GET_MODE_NAME (mode),
8227 reload_in_progress,
8228 reload_completed,
8229 can_create_pseudo_p ());
8230 debug_rtx (dest);
8231 fprintf (stderr, "source:\n");
8232 debug_rtx (source);
8235 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8236 if (GET_CODE (operands[1]) == CONST_DOUBLE
8237 && ! FLOAT_MODE_P (mode)
8238 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8240 /* FIXME. This should never happen. */
8241 /* Since it seems that it does, do the safe thing and convert
8242 to a CONST_INT. */
8243 operands[1] = gen_int_mode (CONST_DOUBLE_LOW (operands[1]), mode);
8245 gcc_assert (GET_CODE (operands[1]) != CONST_DOUBLE
8246 || FLOAT_MODE_P (mode)
8247 || ((CONST_DOUBLE_HIGH (operands[1]) != 0
8248 || CONST_DOUBLE_LOW (operands[1]) < 0)
8249 && (CONST_DOUBLE_HIGH (operands[1]) != -1
8250 || CONST_DOUBLE_LOW (operands[1]) >= 0)));
8252 /* Check if GCC is setting up a block move that will end up using FP
8253 registers as temporaries. We must make sure this is acceptable. */
8254 if (GET_CODE (operands[0]) == MEM
8255 && GET_CODE (operands[1]) == MEM
8256 && mode == DImode
8257 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8258 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8259 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8260 ? 32 : MEM_ALIGN (operands[0])))
8261 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8262 ? 32
8263 : MEM_ALIGN (operands[1]))))
8264 && ! MEM_VOLATILE_P (operands [0])
8265 && ! MEM_VOLATILE_P (operands [1]))
8267 emit_move_insn (adjust_address (operands[0], SImode, 0),
8268 adjust_address (operands[1], SImode, 0));
8269 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8270 adjust_address (copy_rtx (operands[1]), SImode, 4));
8271 return;
8274 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8275 && !gpc_reg_operand (operands[1], mode))
8276 operands[1] = force_reg (mode, operands[1]);
8278 /* Recognize the case where operand[1] is a reference to thread-local
8279 data and load its address to a register. */
8280 if (rs6000_tls_referenced_p (operands[1]))
8282 enum tls_model model;
8283 rtx tmp = operands[1];
8284 rtx addend = NULL;
8286 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8288 addend = XEXP (XEXP (tmp, 0), 1);
8289 tmp = XEXP (XEXP (tmp, 0), 0);
8292 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8293 model = SYMBOL_REF_TLS_MODEL (tmp);
8294 gcc_assert (model != 0);
8296 tmp = rs6000_legitimize_tls_address (tmp, model);
8297 if (addend)
8299 tmp = gen_rtx_PLUS (mode, tmp, addend);
8300 tmp = force_operand (tmp, operands[0]);
8302 operands[1] = tmp;
8305 /* Handle the case where reload calls us with an invalid address. */
8306 if (reload_in_progress && mode == Pmode
8307 && (! general_operand (operands[1], mode)
8308 || ! nonimmediate_operand (operands[0], mode)))
8309 goto emit_set;
8311 /* 128-bit constant floating-point values on Darwin should really be
8312 loaded as two parts. */
8313 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8314 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8316 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8317 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8318 DFmode);
8319 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8320 GET_MODE_SIZE (DFmode)),
8321 simplify_gen_subreg (DFmode, operands[1], mode,
8322 GET_MODE_SIZE (DFmode)),
8323 DFmode);
8324 return;
8327 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8328 cfun->machine->sdmode_stack_slot =
8329 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8332 if (lra_in_progress
8333 && mode == SDmode
8334 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8335 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8336 && (REG_P (operands[1])
8337 || (GET_CODE (operands[1]) == SUBREG
8338 && REG_P (SUBREG_REG (operands[1])))))
8340 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8341 ? SUBREG_REG (operands[1]) : operands[1]);
8342 enum reg_class cl;
8344 if (regno >= FIRST_PSEUDO_REGISTER)
8346 cl = reg_preferred_class (regno);
8347 gcc_assert (cl != NO_REGS);
8348 regno = ira_class_hard_regs[cl][0];
8350 if (FP_REGNO_P (regno))
8352 if (GET_MODE (operands[0]) != DDmode)
8353 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8354 emit_insn (gen_movsd_store (operands[0], operands[1]));
8356 else if (INT_REGNO_P (regno))
8357 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8358 else
8359 gcc_unreachable();
8360 return;
8362 if (lra_in_progress
8363 && mode == SDmode
8364 && (REG_P (operands[0])
8365 || (GET_CODE (operands[0]) == SUBREG
8366 && REG_P (SUBREG_REG (operands[0]))))
8367 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8368 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8370 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8371 ? SUBREG_REG (operands[0]) : operands[0]);
8372 enum reg_class cl;
8374 if (regno >= FIRST_PSEUDO_REGISTER)
8376 cl = reg_preferred_class (regno);
8377 gcc_assert (cl != NO_REGS);
8378 regno = ira_class_hard_regs[cl][0];
8380 if (FP_REGNO_P (regno))
8382 if (GET_MODE (operands[1]) != DDmode)
8383 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8384 emit_insn (gen_movsd_load (operands[0], operands[1]));
8386 else if (INT_REGNO_P (regno))
8387 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8388 else
8389 gcc_unreachable();
8390 return;
8393 if (reload_in_progress
8394 && mode == SDmode
8395 && cfun->machine->sdmode_stack_slot != NULL_RTX
8396 && MEM_P (operands[0])
8397 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8398 && REG_P (operands[1]))
8400 if (FP_REGNO_P (REGNO (operands[1])))
8402 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8403 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8404 emit_insn (gen_movsd_store (mem, operands[1]));
8406 else if (INT_REGNO_P (REGNO (operands[1])))
8408 rtx mem = operands[0];
8409 if (BYTES_BIG_ENDIAN)
8410 mem = adjust_address_nv (mem, mode, 4);
8411 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8412 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8414 else
8415 gcc_unreachable();
8416 return;
8418 if (reload_in_progress
8419 && mode == SDmode
8420 && REG_P (operands[0])
8421 && MEM_P (operands[1])
8422 && cfun->machine->sdmode_stack_slot != NULL_RTX
8423 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8425 if (FP_REGNO_P (REGNO (operands[0])))
8427 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8428 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8429 emit_insn (gen_movsd_load (operands[0], mem));
8431 else if (INT_REGNO_P (REGNO (operands[0])))
8433 rtx mem = operands[1];
8434 if (BYTES_BIG_ENDIAN)
8435 mem = adjust_address_nv (mem, mode, 4);
8436 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8437 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8439 else
8440 gcc_unreachable();
8441 return;
8444 /* FIXME: In the long term, this switch statement should go away
8445 and be replaced by a sequence of tests based on things like
8446 mode == Pmode. */
8447 switch (mode)
8449 case HImode:
8450 case QImode:
8451 if (CONSTANT_P (operands[1])
8452 && GET_CODE (operands[1]) != CONST_INT)
8453 operands[1] = force_const_mem (mode, operands[1]);
8454 break;
8456 case TFmode:
8457 case TDmode:
8458 rs6000_eliminate_indexed_memrefs (operands);
8459 /* fall through */
8461 case DFmode:
8462 case DDmode:
8463 case SFmode:
8464 case SDmode:
8465 if (CONSTANT_P (operands[1])
8466 && ! easy_fp_constant (operands[1], mode))
8467 operands[1] = force_const_mem (mode, operands[1]);
8468 break;
8470 case V16QImode:
8471 case V8HImode:
8472 case V4SFmode:
8473 case V4SImode:
8474 case V4HImode:
8475 case V2SFmode:
8476 case V2SImode:
8477 case V1DImode:
8478 case V2DFmode:
8479 case V2DImode:
8480 case V1TImode:
8481 if (CONSTANT_P (operands[1])
8482 && !easy_vector_constant (operands[1], mode))
8483 operands[1] = force_const_mem (mode, operands[1]);
8484 break;
8486 case SImode:
8487 case DImode:
8488 /* Use default pattern for address of ELF small data */
8489 if (TARGET_ELF
8490 && mode == Pmode
8491 && DEFAULT_ABI == ABI_V4
8492 && (GET_CODE (operands[1]) == SYMBOL_REF
8493 || GET_CODE (operands[1]) == CONST)
8494 && small_data_operand (operands[1], mode))
8496 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8497 return;
8500 if (DEFAULT_ABI == ABI_V4
8501 && mode == Pmode && mode == SImode
8502 && flag_pic == 1 && got_operand (operands[1], mode))
8504 emit_insn (gen_movsi_got (operands[0], operands[1]));
8505 return;
8508 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8509 && TARGET_NO_TOC
8510 && ! flag_pic
8511 && mode == Pmode
8512 && CONSTANT_P (operands[1])
8513 && GET_CODE (operands[1]) != HIGH
8514 && GET_CODE (operands[1]) != CONST_INT)
8516 rtx target = (!can_create_pseudo_p ()
8517 ? operands[0]
8518 : gen_reg_rtx (mode));
8520 /* If this is a function address on -mcall-aixdesc,
8521 convert it to the address of the descriptor. */
8522 if (DEFAULT_ABI == ABI_AIX
8523 && GET_CODE (operands[1]) == SYMBOL_REF
8524 && XSTR (operands[1], 0)[0] == '.')
8526 const char *name = XSTR (operands[1], 0);
8527 rtx new_ref;
8528 while (*name == '.')
8529 name++;
8530 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8531 CONSTANT_POOL_ADDRESS_P (new_ref)
8532 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8533 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8534 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8535 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8536 operands[1] = new_ref;
8539 if (DEFAULT_ABI == ABI_DARWIN)
8541 #if TARGET_MACHO
8542 if (MACHO_DYNAMIC_NO_PIC_P)
8544 /* Take care of any required data indirection. */
8545 operands[1] = rs6000_machopic_legitimize_pic_address (
8546 operands[1], mode, operands[0]);
8547 if (operands[0] != operands[1])
8548 emit_insn (gen_rtx_SET (VOIDmode,
8549 operands[0], operands[1]));
8550 return;
8552 #endif
8553 emit_insn (gen_macho_high (target, operands[1]));
8554 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8555 return;
8558 emit_insn (gen_elf_high (target, operands[1]));
8559 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8560 return;
8563 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8564 and we have put it in the TOC, we just need to make a TOC-relative
8565 reference to it. */
8566 if (TARGET_TOC
8567 && GET_CODE (operands[1]) == SYMBOL_REF
8568 && use_toc_relative_ref (operands[1]))
8569 operands[1] = create_TOC_reference (operands[1], operands[0]);
8570 else if (mode == Pmode
8571 && CONSTANT_P (operands[1])
8572 && GET_CODE (operands[1]) != HIGH
8573 && ((GET_CODE (operands[1]) != CONST_INT
8574 && ! easy_fp_constant (operands[1], mode))
8575 || (GET_CODE (operands[1]) == CONST_INT
8576 && (num_insns_constant (operands[1], mode)
8577 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8578 || (GET_CODE (operands[0]) == REG
8579 && FP_REGNO_P (REGNO (operands[0]))))
8580 && !toc_relative_expr_p (operands[1], false)
8581 && (TARGET_CMODEL == CMODEL_SMALL
8582 || can_create_pseudo_p ()
8583 || (REG_P (operands[0])
8584 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8587 #if TARGET_MACHO
8588 /* Darwin uses a special PIC legitimizer. */
8589 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8591 operands[1] =
8592 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8593 operands[0]);
8594 if (operands[0] != operands[1])
8595 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8596 return;
8598 #endif
8600 /* If we are to limit the number of things we put in the TOC and
8601 this is a symbol plus a constant we can add in one insn,
8602 just put the symbol in the TOC and add the constant. Don't do
8603 this if reload is in progress. */
8604 if (GET_CODE (operands[1]) == CONST
8605 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8606 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8607 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8608 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8609 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8610 && ! side_effects_p (operands[0]))
8612 rtx sym =
8613 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8614 rtx other = XEXP (XEXP (operands[1], 0), 1);
8616 sym = force_reg (mode, sym);
8617 emit_insn (gen_add3_insn (operands[0], sym, other));
8618 return;
8621 operands[1] = force_const_mem (mode, operands[1]);
8623 if (TARGET_TOC
8624 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8625 && constant_pool_expr_p (XEXP (operands[1], 0))
8626 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8627 get_pool_constant (XEXP (operands[1], 0)),
8628 get_pool_mode (XEXP (operands[1], 0))))
8630 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8631 operands[0]);
8632 operands[1] = gen_const_mem (mode, tocref);
8633 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8636 break;
8638 case TImode:
8639 if (!VECTOR_MEM_VSX_P (TImode))
8640 rs6000_eliminate_indexed_memrefs (operands);
8641 break;
8643 case PTImode:
8644 rs6000_eliminate_indexed_memrefs (operands);
8645 break;
8647 default:
8648 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8651 /* Above, we may have called force_const_mem which may have returned
8652 an invalid address. If we can, fix this up; otherwise, reload will
8653 have to deal with it. */
8654 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8655 operands[1] = validize_mem (operands[1]);
8657 emit_set:
8658 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8661 /* Return true if a structure, union or array containing FIELD should be
8662 accessed using `BLKMODE'.
8664 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8665 entire thing in a DI and use subregs to access the internals.
8666 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8667 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8668 best thing to do is set structs to BLKmode and avoid Severe Tire
8669 Damage.
8671 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8672 fit into 1, whereas DI still needs two. */
8674 static bool
8675 rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
8677 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8678 || (TARGET_E500_DOUBLE && mode == DFmode));
8681 /* Nonzero if we can use a floating-point register to pass this arg. */
8682 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8683 (SCALAR_FLOAT_MODE_P (MODE) \
8684 && (CUM)->fregno <= FP_ARG_MAX_REG \
8685 && TARGET_HARD_FLOAT && TARGET_FPRS)
8687 /* Nonzero if we can use an AltiVec register to pass this arg. */
8688 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8689 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8690 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8691 && TARGET_ALTIVEC_ABI \
8692 && (NAMED))
8694 /* Walk down the type tree of TYPE counting consecutive base elements.
8695 If *MODEP is VOIDmode, then set it to the first valid floating point
8696 or vector type. If a non-floating point or vector type is found, or
8697 if a floating point or vector type that doesn't match a non-VOIDmode
8698 *MODEP is found, then return -1, otherwise return the count in the
8699 sub-tree. */
8701 static int
8702 rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
8704 enum machine_mode mode;
8705 HOST_WIDE_INT size;
8707 switch (TREE_CODE (type))
8709 case REAL_TYPE:
8710 mode = TYPE_MODE (type);
8711 if (!SCALAR_FLOAT_MODE_P (mode))
8712 return -1;
8714 if (*modep == VOIDmode)
8715 *modep = mode;
8717 if (*modep == mode)
8718 return 1;
8720 break;
8722 case COMPLEX_TYPE:
8723 mode = TYPE_MODE (TREE_TYPE (type));
8724 if (!SCALAR_FLOAT_MODE_P (mode))
8725 return -1;
8727 if (*modep == VOIDmode)
8728 *modep = mode;
8730 if (*modep == mode)
8731 return 2;
8733 break;
8735 case VECTOR_TYPE:
8736 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8737 return -1;
8739 /* Use V4SImode as representative of all 128-bit vector types. */
8740 size = int_size_in_bytes (type);
8741 switch (size)
8743 case 16:
8744 mode = V4SImode;
8745 break;
8746 default:
8747 return -1;
8750 if (*modep == VOIDmode)
8751 *modep = mode;
8753 /* Vector modes are considered to be opaque: two vectors are
8754 equivalent for the purposes of being homogeneous aggregates
8755 if they are the same size. */
8756 if (*modep == mode)
8757 return 1;
8759 break;
8761 case ARRAY_TYPE:
8763 int count;
8764 tree index = TYPE_DOMAIN (type);
8766 /* Can't handle incomplete types. */
8767 if (!COMPLETE_TYPE_P (type))
8768 return -1;
8770 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8771 if (count == -1
8772 || !index
8773 || !TYPE_MAX_VALUE (index)
8774 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8775 || !TYPE_MIN_VALUE (index)
8776 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8777 || count < 0)
8778 return -1;
8780 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8781 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8783 /* There must be no padding. */
8784 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
8785 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
8786 != count * GET_MODE_BITSIZE (*modep)))
8787 return -1;
8789 return count;
8792 case RECORD_TYPE:
8794 int count = 0;
8795 int sub_count;
8796 tree field;
8798 /* Can't handle incomplete types. */
8799 if (!COMPLETE_TYPE_P (type))
8800 return -1;
8802 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8804 if (TREE_CODE (field) != FIELD_DECL)
8805 continue;
8807 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8808 if (sub_count < 0)
8809 return -1;
8810 count += sub_count;
8813 /* There must be no padding. */
8814 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
8815 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
8816 != count * GET_MODE_BITSIZE (*modep)))
8817 return -1;
8819 return count;
8822 case UNION_TYPE:
8823 case QUAL_UNION_TYPE:
8825 /* These aren't very interesting except in a degenerate case. */
8826 int count = 0;
8827 int sub_count;
8828 tree field;
8830 /* Can't handle incomplete types. */
8831 if (!COMPLETE_TYPE_P (type))
8832 return -1;
8834 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8836 if (TREE_CODE (field) != FIELD_DECL)
8837 continue;
8839 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8840 if (sub_count < 0)
8841 return -1;
8842 count = count > sub_count ? count : sub_count;
8845 /* There must be no padding. */
8846 if (!tree_fits_uhwi_p (TYPE_SIZE (type))
8847 || ((HOST_WIDE_INT) tree_to_uhwi (TYPE_SIZE (type))
8848 != count * GET_MODE_BITSIZE (*modep)))
8849 return -1;
8851 return count;
8854 default:
8855 break;
8858 return -1;
8861 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
8862 float or vector aggregate that shall be passed in FP/vector registers
8863 according to the ELFv2 ABI, return the homogeneous element mode in
8864 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
8866 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
8868 static bool
8869 rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
8870 enum machine_mode *elt_mode,
8871 int *n_elts)
8873 /* Note that we do not accept complex types at the top level as
8874 homogeneous aggregates; these types are handled via the
8875 targetm.calls.split_complex_arg mechanism. Complex types
8876 can be elements of homogeneous aggregates, however. */
8877 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
8879 enum machine_mode field_mode = VOIDmode;
8880 int field_count = rs6000_aggregate_candidate (type, &field_mode);
8882 if (field_count > 0)
8884 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
8885 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
8887 /* The ELFv2 ABI allows homogeneous aggregates to occupy
8888 up to AGGR_ARG_NUM_REG registers. */
8889 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
8891 if (elt_mode)
8892 *elt_mode = field_mode;
8893 if (n_elts)
8894 *n_elts = field_count;
8895 return true;
8900 if (elt_mode)
8901 *elt_mode = mode;
8902 if (n_elts)
8903 *n_elts = 1;
8904 return false;
8907 /* Return a nonzero value to say to return the function value in
8908 memory, just as large structures are always returned. TYPE will be
8909 the data type of the value, and FNTYPE will be the type of the
8910 function doing the returning, or @code{NULL} for libcalls.
8912 The AIX ABI for the RS/6000 specifies that all structures are
8913 returned in memory. The Darwin ABI does the same.
8915 For the Darwin 64 Bit ABI, a function result can be returned in
8916 registers or in memory, depending on the size of the return data
8917 type. If it is returned in registers, the value occupies the same
8918 registers as it would if it were the first and only function
8919 argument. Otherwise, the function places its result in memory at
8920 the location pointed to by GPR3.
8922 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
8923 but a draft put them in memory, and GCC used to implement the draft
8924 instead of the final standard. Therefore, aix_struct_return
8925 controls this instead of DEFAULT_ABI; V.4 targets needing backward
8926 compatibility can change DRAFT_V4_STRUCT_RET to override the
8927 default, and -m switches get the final word. See
8928 rs6000_option_override_internal for more details.
8930 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
8931 long double support is enabled. These values are returned in memory.
8933 int_size_in_bytes returns -1 for variable size objects, which go in
8934 memory always. The cast to unsigned makes -1 > 8. */
8936 static bool
8937 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8939 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
8940 if (TARGET_MACHO
8941 && rs6000_darwin64_abi
8942 && TREE_CODE (type) == RECORD_TYPE
8943 && int_size_in_bytes (type) > 0)
8945 CUMULATIVE_ARGS valcum;
8946 rtx valret;
8948 valcum.words = 0;
8949 valcum.fregno = FP_ARG_MIN_REG;
8950 valcum.vregno = ALTIVEC_ARG_MIN_REG;
8951 /* Do a trial code generation as if this were going to be passed
8952 as an argument; if any part goes in memory, we return NULL. */
8953 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
8954 if (valret)
8955 return false;
8956 /* Otherwise fall through to more conventional ABI rules. */
8959 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
8960 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
8961 NULL, NULL))
8962 return false;
8964 /* The ELFv2 ABI returns aggregates up to 16B in registers */
8965 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
8966 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
8967 return false;
8969 if (AGGREGATE_TYPE_P (type)
8970 && (aix_struct_return
8971 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
8972 return true;
8974 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
8975 modes only exist for GCC vector types if -maltivec. */
8976 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
8977 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
8978 return false;
8980 /* Return synthetic vectors in memory. */
8981 if (TREE_CODE (type) == VECTOR_TYPE
8982 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
8984 static bool warned_for_return_big_vectors = false;
8985 if (!warned_for_return_big_vectors)
8987 warning (0, "GCC vector returned by reference: "
8988 "non-standard ABI extension with no compatibility guarantee");
8989 warned_for_return_big_vectors = true;
8991 return true;
8994 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
8995 return true;
8997 return false;
9000 /* Specify whether values returned in registers should be at the most
9001 significant end of a register. We want aggregates returned by
9002 value to match the way aggregates are passed to functions. */
9004 static bool
9005 rs6000_return_in_msb (const_tree valtype)
9007 return (DEFAULT_ABI == ABI_ELFv2
9008 && BYTES_BIG_ENDIAN
9009 && AGGREGATE_TYPE_P (valtype)
9010 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9013 #ifdef HAVE_AS_GNU_ATTRIBUTE
9014 /* Return TRUE if a call to function FNDECL may be one that
9015 potentially affects the function calling ABI of the object file. */
9017 static bool
9018 call_ABI_of_interest (tree fndecl)
9020 if (cgraph_state == CGRAPH_STATE_EXPANSION)
9022 struct cgraph_node *c_node;
9024 /* Libcalls are always interesting. */
9025 if (fndecl == NULL_TREE)
9026 return true;
9028 /* Any call to an external function is interesting. */
9029 if (DECL_EXTERNAL (fndecl))
9030 return true;
9032 /* Interesting functions that we are emitting in this object file. */
9033 c_node = cgraph_get_node (fndecl);
9034 c_node = cgraph_function_or_thunk_node (c_node, NULL);
9035 return !cgraph_only_called_directly_p (c_node);
9037 return false;
9039 #endif
9041 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9042 for a call to a function whose data type is FNTYPE.
9043 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9045 For incoming args we set the number of arguments in the prototype large
9046 so we never return a PARALLEL. */
9048 void
9049 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9050 rtx libname ATTRIBUTE_UNUSED, int incoming,
9051 int libcall, int n_named_args,
9052 tree fndecl ATTRIBUTE_UNUSED,
9053 enum machine_mode return_mode ATTRIBUTE_UNUSED)
9055 static CUMULATIVE_ARGS zero_cumulative;
9057 *cum = zero_cumulative;
9058 cum->words = 0;
9059 cum->fregno = FP_ARG_MIN_REG;
9060 cum->vregno = ALTIVEC_ARG_MIN_REG;
9061 cum->prototype = (fntype && prototype_p (fntype));
9062 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9063 ? CALL_LIBCALL : CALL_NORMAL);
9064 cum->sysv_gregno = GP_ARG_MIN_REG;
9065 cum->stdarg = stdarg_p (fntype);
9067 cum->nargs_prototype = 0;
9068 if (incoming || cum->prototype)
9069 cum->nargs_prototype = n_named_args;
9071 /* Check for a longcall attribute. */
9072 if ((!fntype && rs6000_default_long_calls)
9073 || (fntype
9074 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9075 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9076 cum->call_cookie |= CALL_LONG;
9078 if (TARGET_DEBUG_ARG)
9080 fprintf (stderr, "\ninit_cumulative_args:");
9081 if (fntype)
9083 tree ret_type = TREE_TYPE (fntype);
9084 fprintf (stderr, " ret code = %s,",
9085 get_tree_code_name (TREE_CODE (ret_type)));
9088 if (cum->call_cookie & CALL_LONG)
9089 fprintf (stderr, " longcall,");
9091 fprintf (stderr, " proto = %d, nargs = %d\n",
9092 cum->prototype, cum->nargs_prototype);
9095 #ifdef HAVE_AS_GNU_ATTRIBUTE
9096 if (DEFAULT_ABI == ABI_V4)
9098 cum->escapes = call_ABI_of_interest (fndecl);
9099 if (cum->escapes)
9101 tree return_type;
9103 if (fntype)
9105 return_type = TREE_TYPE (fntype);
9106 return_mode = TYPE_MODE (return_type);
9108 else
9109 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9111 if (return_type != NULL)
9113 if (TREE_CODE (return_type) == RECORD_TYPE
9114 && TYPE_TRANSPARENT_AGGR (return_type))
9116 return_type = TREE_TYPE (first_field (return_type));
9117 return_mode = TYPE_MODE (return_type);
9119 if (AGGREGATE_TYPE_P (return_type)
9120 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9121 <= 8))
9122 rs6000_returns_struct = true;
9124 if (SCALAR_FLOAT_MODE_P (return_mode))
9125 rs6000_passes_float = true;
9126 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9127 || SPE_VECTOR_MODE (return_mode))
9128 rs6000_passes_vector = true;
9131 #endif
9133 if (fntype
9134 && !TARGET_ALTIVEC
9135 && TARGET_ALTIVEC_ABI
9136 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9138 error ("cannot return value in vector register because"
9139 " altivec instructions are disabled, use -maltivec"
9140 " to enable them");
9144 /* Return true if TYPE must be passed on the stack and not in registers. */
9146 static bool
9147 rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
9149 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9150 return must_pass_in_stack_var_size (mode, type);
9151 else
9152 return must_pass_in_stack_var_size_or_pad (mode, type);
9155 /* If defined, a C expression which determines whether, and in which
9156 direction, to pad out an argument with extra space. The value
9157 should be of type `enum direction': either `upward' to pad above
9158 the argument, `downward' to pad below, or `none' to inhibit
9159 padding.
9161 For the AIX ABI structs are always stored left shifted in their
9162 argument slot. */
9164 enum direction
9165 function_arg_padding (enum machine_mode mode, const_tree type)
9167 #ifndef AGGREGATE_PADDING_FIXED
9168 #define AGGREGATE_PADDING_FIXED 0
9169 #endif
9170 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9171 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9172 #endif
9174 if (!AGGREGATE_PADDING_FIXED)
9176 /* GCC used to pass structures of the same size as integer types as
9177 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9178 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9179 passed padded downward, except that -mstrict-align further
9180 muddied the water in that multi-component structures of 2 and 4
9181 bytes in size were passed padded upward.
9183 The following arranges for best compatibility with previous
9184 versions of gcc, but removes the -mstrict-align dependency. */
9185 if (BYTES_BIG_ENDIAN)
9187 HOST_WIDE_INT size = 0;
9189 if (mode == BLKmode)
9191 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9192 size = int_size_in_bytes (type);
9194 else
9195 size = GET_MODE_SIZE (mode);
9197 if (size == 1 || size == 2 || size == 4)
9198 return downward;
9200 return upward;
9203 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9205 if (type != 0 && AGGREGATE_TYPE_P (type))
9206 return upward;
9209 /* Fall back to the default. */
9210 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9213 /* If defined, a C expression that gives the alignment boundary, in bits,
9214 of an argument with the specified mode and type. If it is not defined,
9215 PARM_BOUNDARY is used for all arguments.
9217 V.4 wants long longs and doubles to be double word aligned. Just
9218 testing the mode size is a boneheaded way to do this as it means
9219 that other types such as complex int are also double word aligned.
9220 However, we're stuck with this because changing the ABI might break
9221 existing library interfaces.
9223 Doubleword align SPE vectors.
9224 Quadword align Altivec/VSX vectors.
9225 Quadword align large synthetic vector types. */
9227 static unsigned int
9228 rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
9230 enum machine_mode elt_mode;
9231 int n_elts;
9233 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9235 if (DEFAULT_ABI == ABI_V4
9236 && (GET_MODE_SIZE (mode) == 8
9237 || (TARGET_HARD_FLOAT
9238 && TARGET_FPRS
9239 && (mode == TFmode || mode == TDmode))))
9240 return 64;
9241 else if (SPE_VECTOR_MODE (mode)
9242 || (type && TREE_CODE (type) == VECTOR_TYPE
9243 && int_size_in_bytes (type) >= 8
9244 && int_size_in_bytes (type) < 16))
9245 return 64;
9246 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9247 || (type && TREE_CODE (type) == VECTOR_TYPE
9248 && int_size_in_bytes (type) >= 16))
9249 return 128;
9251 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9252 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9253 -mcompat-align-parm is used. */
9254 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9255 || DEFAULT_ABI == ABI_ELFv2)
9256 && type && TYPE_ALIGN (type) > 64)
9258 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9259 or homogeneous float/vector aggregates here. We already handled
9260 vector aggregates above, but still need to check for float here. */
9261 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9262 && !SCALAR_FLOAT_MODE_P (elt_mode));
9264 /* We used to check for BLKmode instead of the above aggregate type
9265 check. Warn when this results in any difference to the ABI. */
9266 if (aggregate_p != (mode == BLKmode))
9268 static bool warned;
9269 if (!warned && warn_psabi)
9271 warned = true;
9272 inform (input_location,
9273 "the ABI of passing aggregates with %d-byte alignment"
9274 " will change in a future GCC release",
9275 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9279 /* GCC 4.8/4.9 Note: To avoid any ABI change on a release branch, we
9280 keep using the BLKmode check, but warn if there will be differences
9281 in future GCC releases. */
9282 if (mode == BLKmode)
9283 return 128;
9286 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9287 implement the "aggregate type" check as a BLKmode check here; this
9288 means certain aggregate types are in fact not aligned. */
9289 if (TARGET_MACHO && rs6000_darwin64_abi
9290 && mode == BLKmode
9291 && type && TYPE_ALIGN (type) > 64)
9292 return 128;
9294 return PARM_BOUNDARY;
9297 /* The offset in words to the start of the parameter save area. */
9299 static unsigned int
9300 rs6000_parm_offset (void)
9302 return (DEFAULT_ABI == ABI_V4 ? 2
9303 : DEFAULT_ABI == ABI_ELFv2 ? 4
9304 : 6);
9307 /* For a function parm of MODE and TYPE, return the starting word in
9308 the parameter area. NWORDS of the parameter area are already used. */
9310 static unsigned int
9311 rs6000_parm_start (enum machine_mode mode, const_tree type,
9312 unsigned int nwords)
9314 unsigned int align;
9316 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9317 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9320 /* Compute the size (in words) of a function argument. */
9322 static unsigned long
9323 rs6000_arg_size (enum machine_mode mode, const_tree type)
9325 unsigned long size;
9327 if (mode != BLKmode)
9328 size = GET_MODE_SIZE (mode);
9329 else
9330 size = int_size_in_bytes (type);
9332 if (TARGET_32BIT)
9333 return (size + 3) >> 2;
9334 else
9335 return (size + 7) >> 3;
9338 /* Use this to flush pending int fields. */
9340 static void
9341 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9342 HOST_WIDE_INT bitpos, int final)
9344 unsigned int startbit, endbit;
9345 int intregs, intoffset;
9346 enum machine_mode mode;
9348 /* Handle the situations where a float is taking up the first half
9349 of the GPR, and the other half is empty (typically due to
9350 alignment restrictions). We can detect this by a 8-byte-aligned
9351 int field, or by seeing that this is the final flush for this
9352 argument. Count the word and continue on. */
9353 if (cum->floats_in_gpr == 1
9354 && (cum->intoffset % 64 == 0
9355 || (cum->intoffset == -1 && final)))
9357 cum->words++;
9358 cum->floats_in_gpr = 0;
9361 if (cum->intoffset == -1)
9362 return;
9364 intoffset = cum->intoffset;
9365 cum->intoffset = -1;
9366 cum->floats_in_gpr = 0;
9368 if (intoffset % BITS_PER_WORD != 0)
9370 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9371 MODE_INT, 0);
9372 if (mode == BLKmode)
9374 /* We couldn't find an appropriate mode, which happens,
9375 e.g., in packed structs when there are 3 bytes to load.
9376 Back intoffset back to the beginning of the word in this
9377 case. */
9378 intoffset = intoffset & -BITS_PER_WORD;
9382 startbit = intoffset & -BITS_PER_WORD;
9383 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9384 intregs = (endbit - startbit) / BITS_PER_WORD;
9385 cum->words += intregs;
9386 /* words should be unsigned. */
9387 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9389 int pad = (endbit/BITS_PER_WORD) - cum->words;
9390 cum->words += pad;
9394 /* The darwin64 ABI calls for us to recurse down through structs,
9395 looking for elements passed in registers. Unfortunately, we have
9396 to track int register count here also because of misalignments
9397 in powerpc alignment mode. */
9399 static void
9400 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9401 const_tree type,
9402 HOST_WIDE_INT startbitpos)
9404 tree f;
9406 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9407 if (TREE_CODE (f) == FIELD_DECL)
9409 HOST_WIDE_INT bitpos = startbitpos;
9410 tree ftype = TREE_TYPE (f);
9411 enum machine_mode mode;
9412 if (ftype == error_mark_node)
9413 continue;
9414 mode = TYPE_MODE (ftype);
9416 if (DECL_SIZE (f) != 0
9417 && tree_fits_uhwi_p (bit_position (f)))
9418 bitpos += int_bit_position (f);
9420 /* ??? FIXME: else assume zero offset. */
9422 if (TREE_CODE (ftype) == RECORD_TYPE)
9423 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9424 else if (USE_FP_FOR_ARG_P (cum, mode))
9426 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9427 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9428 cum->fregno += n_fpregs;
9429 /* Single-precision floats present a special problem for
9430 us, because they are smaller than an 8-byte GPR, and so
9431 the structure-packing rules combined with the standard
9432 varargs behavior mean that we want to pack float/float
9433 and float/int combinations into a single register's
9434 space. This is complicated by the arg advance flushing,
9435 which works on arbitrarily large groups of int-type
9436 fields. */
9437 if (mode == SFmode)
9439 if (cum->floats_in_gpr == 1)
9441 /* Two floats in a word; count the word and reset
9442 the float count. */
9443 cum->words++;
9444 cum->floats_in_gpr = 0;
9446 else if (bitpos % 64 == 0)
9448 /* A float at the beginning of an 8-byte word;
9449 count it and put off adjusting cum->words until
9450 we see if a arg advance flush is going to do it
9451 for us. */
9452 cum->floats_in_gpr++;
9454 else
9456 /* The float is at the end of a word, preceded
9457 by integer fields, so the arg advance flush
9458 just above has already set cum->words and
9459 everything is taken care of. */
9462 else
9463 cum->words += n_fpregs;
9465 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9467 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9468 cum->vregno++;
9469 cum->words += 2;
9471 else if (cum->intoffset == -1)
9472 cum->intoffset = bitpos;
9476 /* Check for an item that needs to be considered specially under the darwin 64
9477 bit ABI. These are record types where the mode is BLK or the structure is
9478 8 bytes in size. */
9479 static int
9480 rs6000_darwin64_struct_check_p (enum machine_mode mode, const_tree type)
9482 return rs6000_darwin64_abi
9483 && ((mode == BLKmode
9484 && TREE_CODE (type) == RECORD_TYPE
9485 && int_size_in_bytes (type) > 0)
9486 || (type && TREE_CODE (type) == RECORD_TYPE
9487 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9490 /* Update the data in CUM to advance over an argument
9491 of mode MODE and data type TYPE.
9492 (TYPE is null for libcalls where that information may not be available.)
9494 Note that for args passed by reference, function_arg will be called
9495 with MODE and TYPE set to that of the pointer to the arg, not the arg
9496 itself. */
9498 static void
9499 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9500 const_tree type, bool named, int depth)
9502 enum machine_mode elt_mode;
9503 int n_elts;
9505 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9507 /* Only tick off an argument if we're not recursing. */
9508 if (depth == 0)
9509 cum->nargs_prototype--;
9511 #ifdef HAVE_AS_GNU_ATTRIBUTE
9512 if (DEFAULT_ABI == ABI_V4
9513 && cum->escapes)
9515 if (SCALAR_FLOAT_MODE_P (mode))
9516 rs6000_passes_float = true;
9517 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9518 rs6000_passes_vector = true;
9519 else if (SPE_VECTOR_MODE (mode)
9520 && !cum->stdarg
9521 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9522 rs6000_passes_vector = true;
9524 #endif
9526 if (TARGET_ALTIVEC_ABI
9527 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9528 || (type && TREE_CODE (type) == VECTOR_TYPE
9529 && int_size_in_bytes (type) == 16)))
9531 bool stack = false;
9533 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9535 cum->vregno += n_elts;
9537 if (!TARGET_ALTIVEC)
9538 error ("cannot pass argument in vector register because"
9539 " altivec instructions are disabled, use -maltivec"
9540 " to enable them");
9542 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9543 even if it is going to be passed in a vector register.
9544 Darwin does the same for variable-argument functions. */
9545 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9546 && TARGET_64BIT)
9547 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9548 stack = true;
9550 else
9551 stack = true;
9553 if (stack)
9555 int align;
9557 /* Vector parameters must be 16-byte aligned. In 32-bit
9558 mode this means we need to take into account the offset
9559 to the parameter save area. In 64-bit mode, they just
9560 have to start on an even word, since the parameter save
9561 area is 16-byte aligned. */
9562 if (TARGET_32BIT)
9563 align = -(rs6000_parm_offset () + cum->words) & 3;
9564 else
9565 align = cum->words & 1;
9566 cum->words += align + rs6000_arg_size (mode, type);
9568 if (TARGET_DEBUG_ARG)
9570 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9571 cum->words, align);
9572 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9573 cum->nargs_prototype, cum->prototype,
9574 GET_MODE_NAME (mode));
9578 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9579 && !cum->stdarg
9580 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9581 cum->sysv_gregno++;
9583 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9585 int size = int_size_in_bytes (type);
9586 /* Variable sized types have size == -1 and are
9587 treated as if consisting entirely of ints.
9588 Pad to 16 byte boundary if needed. */
9589 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9590 && (cum->words % 2) != 0)
9591 cum->words++;
9592 /* For varargs, we can just go up by the size of the struct. */
9593 if (!named)
9594 cum->words += (size + 7) / 8;
9595 else
9597 /* It is tempting to say int register count just goes up by
9598 sizeof(type)/8, but this is wrong in a case such as
9599 { int; double; int; } [powerpc alignment]. We have to
9600 grovel through the fields for these too. */
9601 cum->intoffset = 0;
9602 cum->floats_in_gpr = 0;
9603 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9604 rs6000_darwin64_record_arg_advance_flush (cum,
9605 size * BITS_PER_UNIT, 1);
9607 if (TARGET_DEBUG_ARG)
9609 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9610 cum->words, TYPE_ALIGN (type), size);
9611 fprintf (stderr,
9612 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9613 cum->nargs_prototype, cum->prototype,
9614 GET_MODE_NAME (mode));
9617 else if (DEFAULT_ABI == ABI_V4)
9619 if (TARGET_HARD_FLOAT && TARGET_FPRS
9620 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9621 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9622 || (mode == TFmode && !TARGET_IEEEQUAD)
9623 || mode == SDmode || mode == DDmode || mode == TDmode))
9625 /* _Decimal128 must use an even/odd register pair. This assumes
9626 that the register number is odd when fregno is odd. */
9627 if (mode == TDmode && (cum->fregno % 2) == 1)
9628 cum->fregno++;
9630 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9631 <= FP_ARG_V4_MAX_REG)
9632 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9633 else
9635 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9636 if (mode == DFmode || mode == TFmode
9637 || mode == DDmode || mode == TDmode)
9638 cum->words += cum->words & 1;
9639 cum->words += rs6000_arg_size (mode, type);
9642 else
9644 int n_words = rs6000_arg_size (mode, type);
9645 int gregno = cum->sysv_gregno;
9647 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9648 (r7,r8) or (r9,r10). As does any other 2 word item such
9649 as complex int due to a historical mistake. */
9650 if (n_words == 2)
9651 gregno += (1 - gregno) & 1;
9653 /* Multi-reg args are not split between registers and stack. */
9654 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9656 /* Long long and SPE vectors are aligned on the stack.
9657 So are other 2 word items such as complex int due to
9658 a historical mistake. */
9659 if (n_words == 2)
9660 cum->words += cum->words & 1;
9661 cum->words += n_words;
9664 /* Note: continuing to accumulate gregno past when we've started
9665 spilling to the stack indicates the fact that we've started
9666 spilling to the stack to expand_builtin_saveregs. */
9667 cum->sysv_gregno = gregno + n_words;
9670 if (TARGET_DEBUG_ARG)
9672 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9673 cum->words, cum->fregno);
9674 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9675 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9676 fprintf (stderr, "mode = %4s, named = %d\n",
9677 GET_MODE_NAME (mode), named);
9680 else
9682 int n_words = rs6000_arg_size (mode, type);
9683 int start_words = cum->words;
9684 int align_words = rs6000_parm_start (mode, type, start_words);
9686 cum->words = align_words + n_words;
9688 if (SCALAR_FLOAT_MODE_P (elt_mode)
9689 && TARGET_HARD_FLOAT && TARGET_FPRS)
9691 /* _Decimal128 must be passed in an even/odd float register pair.
9692 This assumes that the register number is odd when fregno is
9693 odd. */
9694 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9695 cum->fregno++;
9696 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9699 if (TARGET_DEBUG_ARG)
9701 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9702 cum->words, cum->fregno);
9703 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9704 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9705 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9706 named, align_words - start_words, depth);
9711 static void
9712 rs6000_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
9713 const_tree type, bool named)
9715 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9719 static rtx
9720 spe_build_register_parallel (enum machine_mode mode, int gregno)
9722 rtx r1, r3, r5, r7;
9724 switch (mode)
9726 case DFmode:
9727 r1 = gen_rtx_REG (DImode, gregno);
9728 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9729 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9731 case DCmode:
9732 case TFmode:
9733 r1 = gen_rtx_REG (DImode, gregno);
9734 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9735 r3 = gen_rtx_REG (DImode, gregno + 2);
9736 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9737 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9739 case TCmode:
9740 r1 = gen_rtx_REG (DImode, gregno);
9741 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9742 r3 = gen_rtx_REG (DImode, gregno + 2);
9743 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9744 r5 = gen_rtx_REG (DImode, gregno + 4);
9745 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9746 r7 = gen_rtx_REG (DImode, gregno + 6);
9747 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9748 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9750 default:
9751 gcc_unreachable ();
9755 /* Determine where to put a SIMD argument on the SPE. */
9756 static rtx
9757 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
9758 const_tree type)
9760 int gregno = cum->sysv_gregno;
9762 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9763 are passed and returned in a pair of GPRs for ABI compatibility. */
9764 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9765 || mode == DCmode || mode == TCmode))
9767 int n_words = rs6000_arg_size (mode, type);
9769 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9770 if (mode == DFmode)
9771 gregno += (1 - gregno) & 1;
9773 /* Multi-reg args are not split between registers and stack. */
9774 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9775 return NULL_RTX;
9777 return spe_build_register_parallel (mode, gregno);
9779 if (cum->stdarg)
9781 int n_words = rs6000_arg_size (mode, type);
9783 /* SPE vectors are put in odd registers. */
9784 if (n_words == 2 && (gregno & 1) == 0)
9785 gregno += 1;
9787 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
9789 rtx r1, r2;
9790 enum machine_mode m = SImode;
9792 r1 = gen_rtx_REG (m, gregno);
9793 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
9794 r2 = gen_rtx_REG (m, gregno + 1);
9795 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
9796 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
9798 else
9799 return NULL_RTX;
9801 else
9803 if (gregno <= GP_ARG_MAX_REG)
9804 return gen_rtx_REG (mode, gregno);
9805 else
9806 return NULL_RTX;
9810 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
9811 structure between cum->intoffset and bitpos to integer registers. */
9813 static void
9814 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
9815 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
9817 enum machine_mode mode;
9818 unsigned int regno;
9819 unsigned int startbit, endbit;
9820 int this_regno, intregs, intoffset;
9821 rtx reg;
9823 if (cum->intoffset == -1)
9824 return;
9826 intoffset = cum->intoffset;
9827 cum->intoffset = -1;
9829 /* If this is the trailing part of a word, try to only load that
9830 much into the register. Otherwise load the whole register. Note
9831 that in the latter case we may pick up unwanted bits. It's not a
9832 problem at the moment but may wish to revisit. */
9834 if (intoffset % BITS_PER_WORD != 0)
9836 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9837 MODE_INT, 0);
9838 if (mode == BLKmode)
9840 /* We couldn't find an appropriate mode, which happens,
9841 e.g., in packed structs when there are 3 bytes to load.
9842 Back intoffset back to the beginning of the word in this
9843 case. */
9844 intoffset = intoffset & -BITS_PER_WORD;
9845 mode = word_mode;
9848 else
9849 mode = word_mode;
9851 startbit = intoffset & -BITS_PER_WORD;
9852 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9853 intregs = (endbit - startbit) / BITS_PER_WORD;
9854 this_regno = cum->words + intoffset / BITS_PER_WORD;
9856 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
9857 cum->use_stack = 1;
9859 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
9860 if (intregs <= 0)
9861 return;
9863 intoffset /= BITS_PER_UNIT;
9866 regno = GP_ARG_MIN_REG + this_regno;
9867 reg = gen_rtx_REG (mode, regno);
9868 rvec[(*k)++] =
9869 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
9871 this_regno += 1;
9872 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
9873 mode = word_mode;
9874 intregs -= 1;
9876 while (intregs > 0);
9879 /* Recursive workhorse for the following. */
9881 static void
9882 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
9883 HOST_WIDE_INT startbitpos, rtx rvec[],
9884 int *k)
9886 tree f;
9888 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9889 if (TREE_CODE (f) == FIELD_DECL)
9891 HOST_WIDE_INT bitpos = startbitpos;
9892 tree ftype = TREE_TYPE (f);
9893 enum machine_mode mode;
9894 if (ftype == error_mark_node)
9895 continue;
9896 mode = TYPE_MODE (ftype);
9898 if (DECL_SIZE (f) != 0
9899 && tree_fits_uhwi_p (bit_position (f)))
9900 bitpos += int_bit_position (f);
9902 /* ??? FIXME: else assume zero offset. */
9904 if (TREE_CODE (ftype) == RECORD_TYPE)
9905 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
9906 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
9908 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
9909 #if 0
9910 switch (mode)
9912 case SCmode: mode = SFmode; break;
9913 case DCmode: mode = DFmode; break;
9914 case TCmode: mode = TFmode; break;
9915 default: break;
9917 #endif
9918 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
9919 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
9921 gcc_assert (cum->fregno == FP_ARG_MAX_REG
9922 && (mode == TFmode || mode == TDmode));
9923 /* Long double or _Decimal128 split over regs and memory. */
9924 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
9925 cum->use_stack=1;
9927 rvec[(*k)++]
9928 = gen_rtx_EXPR_LIST (VOIDmode,
9929 gen_rtx_REG (mode, cum->fregno++),
9930 GEN_INT (bitpos / BITS_PER_UNIT));
9931 if (mode == TFmode || mode == TDmode)
9932 cum->fregno++;
9934 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9936 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
9937 rvec[(*k)++]
9938 = gen_rtx_EXPR_LIST (VOIDmode,
9939 gen_rtx_REG (mode, cum->vregno++),
9940 GEN_INT (bitpos / BITS_PER_UNIT));
9942 else if (cum->intoffset == -1)
9943 cum->intoffset = bitpos;
9947 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
9948 the register(s) to be used for each field and subfield of a struct
9949 being passed by value, along with the offset of where the
9950 register's value may be found in the block. FP fields go in FP
9951 register, vector fields go in vector registers, and everything
9952 else goes in int registers, packed as in memory.
9954 This code is also used for function return values. RETVAL indicates
9955 whether this is the case.
9957 Much of this is taken from the SPARC V9 port, which has a similar
9958 calling convention. */
9960 static rtx
9961 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
9962 bool named, bool retval)
9964 rtx rvec[FIRST_PSEUDO_REGISTER];
9965 int k = 1, kbase = 1;
9966 HOST_WIDE_INT typesize = int_size_in_bytes (type);
9967 /* This is a copy; modifications are not visible to our caller. */
9968 CUMULATIVE_ARGS copy_cum = *orig_cum;
9969 CUMULATIVE_ARGS *cum = &copy_cum;
9971 /* Pad to 16 byte boundary if needed. */
9972 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9973 && (cum->words % 2) != 0)
9974 cum->words++;
9976 cum->intoffset = 0;
9977 cum->use_stack = 0;
9978 cum->named = named;
9980 /* Put entries into rvec[] for individual FP and vector fields, and
9981 for the chunks of memory that go in int regs. Note we start at
9982 element 1; 0 is reserved for an indication of using memory, and
9983 may or may not be filled in below. */
9984 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
9985 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
9987 /* If any part of the struct went on the stack put all of it there.
9988 This hack is because the generic code for
9989 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
9990 parts of the struct are not at the beginning. */
9991 if (cum->use_stack)
9993 if (retval)
9994 return NULL_RTX; /* doesn't go in registers at all */
9995 kbase = 0;
9996 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
9998 if (k > 1 || cum->use_stack)
9999 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10000 else
10001 return NULL_RTX;
10004 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10006 static rtx
10007 rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
10008 int align_words)
10010 int n_units;
10011 int i, k;
10012 rtx rvec[GP_ARG_NUM_REG + 1];
10014 if (align_words >= GP_ARG_NUM_REG)
10015 return NULL_RTX;
10017 n_units = rs6000_arg_size (mode, type);
10019 /* Optimize the simple case where the arg fits in one gpr, except in
10020 the case of BLKmode due to assign_parms assuming that registers are
10021 BITS_PER_WORD wide. */
10022 if (n_units == 0
10023 || (n_units == 1 && mode != BLKmode))
10024 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10026 k = 0;
10027 if (align_words + n_units > GP_ARG_NUM_REG)
10028 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10029 using a magic NULL_RTX component.
10030 This is not strictly correct. Only some of the arg belongs in
10031 memory, not all of it. However, the normal scheme using
10032 function_arg_partial_nregs can result in unusual subregs, eg.
10033 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10034 store the whole arg to memory is often more efficient than code
10035 to store pieces, and we know that space is available in the right
10036 place for the whole arg. */
10037 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10039 i = 0;
10042 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10043 rtx off = GEN_INT (i++ * 4);
10044 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10046 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10048 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10051 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10052 but must also be copied into the parameter save area starting at
10053 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10054 to the GPRs and/or memory. Return the number of elements used. */
10056 static int
10057 rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
10058 int align_words, rtx *rvec)
10060 int k = 0;
10062 if (align_words < GP_ARG_NUM_REG)
10064 int n_words = rs6000_arg_size (mode, type);
10066 if (align_words + n_words > GP_ARG_NUM_REG
10067 || mode == BLKmode
10068 || (TARGET_32BIT && TARGET_POWERPC64))
10070 /* If this is partially on the stack, then we only
10071 include the portion actually in registers here. */
10072 enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10073 int i = 0;
10075 if (align_words + n_words > GP_ARG_NUM_REG)
10077 /* Not all of the arg fits in gprs. Say that it goes in memory
10078 too, using a magic NULL_RTX component. Also see comment in
10079 rs6000_mixed_function_arg for why the normal
10080 function_arg_partial_nregs scheme doesn't work in this case. */
10081 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10086 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10087 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10088 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10090 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10092 else
10094 /* The whole arg fits in gprs. */
10095 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10096 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10099 else
10101 /* It's entirely in memory. */
10102 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10105 return k;
10108 /* RVEC is a vector of K components of an argument of mode MODE.
10109 Construct the final function_arg return value from it. */
10111 static rtx
10112 rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
10114 gcc_assert (k >= 1);
10116 /* Avoid returning a PARALLEL in the trivial cases. */
10117 if (k == 1)
10119 if (XEXP (rvec[0], 0) == NULL_RTX)
10120 return NULL_RTX;
10122 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10123 return XEXP (rvec[0], 0);
10126 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10129 /* Determine where to put an argument to a function.
10130 Value is zero to push the argument on the stack,
10131 or a hard register in which to store the argument.
10133 MODE is the argument's machine mode.
10134 TYPE is the data type of the argument (as a tree).
10135 This is null for libcalls where that information may
10136 not be available.
10137 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10138 the preceding args and about the function being called. It is
10139 not modified in this routine.
10140 NAMED is nonzero if this argument is a named parameter
10141 (otherwise it is an extra parameter matching an ellipsis).
10143 On RS/6000 the first eight words of non-FP are normally in registers
10144 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10145 Under V.4, the first 8 FP args are in registers.
10147 If this is floating-point and no prototype is specified, we use
10148 both an FP and integer register (or possibly FP reg and stack). Library
10149 functions (when CALL_LIBCALL is set) always have the proper types for args,
10150 so we can pass the FP value just in one register. emit_library_function
10151 doesn't support PARALLEL anyway.
10153 Note that for args passed by reference, function_arg will be called
10154 with MODE and TYPE set to that of the pointer to the arg, not the arg
10155 itself. */
10157 static rtx
10158 rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
10159 const_tree type, bool named)
10161 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10162 enum rs6000_abi abi = DEFAULT_ABI;
10163 enum machine_mode elt_mode;
10164 int n_elts;
10166 /* Return a marker to indicate whether CR1 needs to set or clear the
10167 bit that V.4 uses to say fp args were passed in registers.
10168 Assume that we don't need the marker for software floating point,
10169 or compiler generated library calls. */
10170 if (mode == VOIDmode)
10172 if (abi == ABI_V4
10173 && (cum->call_cookie & CALL_LIBCALL) == 0
10174 && (cum->stdarg
10175 || (cum->nargs_prototype < 0
10176 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10178 /* For the SPE, we need to crxor CR6 always. */
10179 if (TARGET_SPE_ABI)
10180 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10181 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10182 return GEN_INT (cum->call_cookie
10183 | ((cum->fregno == FP_ARG_MIN_REG)
10184 ? CALL_V4_SET_FP_ARGS
10185 : CALL_V4_CLEAR_FP_ARGS));
10188 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10191 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10193 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10195 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10196 if (rslt != NULL_RTX)
10197 return rslt;
10198 /* Else fall through to usual handling. */
10201 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10203 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10204 rtx r, off;
10205 int i, k = 0;
10207 /* Do we also need to pass this argument in the parameter
10208 save area? */
10209 if (TARGET_64BIT && ! cum->prototype)
10211 int align_words = (cum->words + 1) & ~1;
10212 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10215 /* Describe where this argument goes in the vector registers. */
10216 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10218 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10219 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10220 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10223 return rs6000_finish_function_arg (mode, rvec, k);
10225 else if (TARGET_ALTIVEC_ABI
10226 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10227 || (type && TREE_CODE (type) == VECTOR_TYPE
10228 && int_size_in_bytes (type) == 16)))
10230 if (named || abi == ABI_V4)
10231 return NULL_RTX;
10232 else
10234 /* Vector parameters to varargs functions under AIX or Darwin
10235 get passed in memory and possibly also in GPRs. */
10236 int align, align_words, n_words;
10237 enum machine_mode part_mode;
10239 /* Vector parameters must be 16-byte aligned. In 32-bit
10240 mode this means we need to take into account the offset
10241 to the parameter save area. In 64-bit mode, they just
10242 have to start on an even word, since the parameter save
10243 area is 16-byte aligned. */
10244 if (TARGET_32BIT)
10245 align = -(rs6000_parm_offset () + cum->words) & 3;
10246 else
10247 align = cum->words & 1;
10248 align_words = cum->words + align;
10250 /* Out of registers? Memory, then. */
10251 if (align_words >= GP_ARG_NUM_REG)
10252 return NULL_RTX;
10254 if (TARGET_32BIT && TARGET_POWERPC64)
10255 return rs6000_mixed_function_arg (mode, type, align_words);
10257 /* The vector value goes in GPRs. Only the part of the
10258 value in GPRs is reported here. */
10259 part_mode = mode;
10260 n_words = rs6000_arg_size (mode, type);
10261 if (align_words + n_words > GP_ARG_NUM_REG)
10262 /* Fortunately, there are only two possibilities, the value
10263 is either wholly in GPRs or half in GPRs and half not. */
10264 part_mode = DImode;
10266 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10269 else if (TARGET_SPE_ABI && TARGET_SPE
10270 && (SPE_VECTOR_MODE (mode)
10271 || (TARGET_E500_DOUBLE && (mode == DFmode
10272 || mode == DCmode
10273 || mode == TFmode
10274 || mode == TCmode))))
10275 return rs6000_spe_function_arg (cum, mode, type);
10277 else if (abi == ABI_V4)
10279 if (TARGET_HARD_FLOAT && TARGET_FPRS
10280 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10281 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10282 || (mode == TFmode && !TARGET_IEEEQUAD)
10283 || mode == SDmode || mode == DDmode || mode == TDmode))
10285 /* _Decimal128 must use an even/odd register pair. This assumes
10286 that the register number is odd when fregno is odd. */
10287 if (mode == TDmode && (cum->fregno % 2) == 1)
10288 cum->fregno++;
10290 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10291 <= FP_ARG_V4_MAX_REG)
10292 return gen_rtx_REG (mode, cum->fregno);
10293 else
10294 return NULL_RTX;
10296 else
10298 int n_words = rs6000_arg_size (mode, type);
10299 int gregno = cum->sysv_gregno;
10301 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10302 (r7,r8) or (r9,r10). As does any other 2 word item such
10303 as complex int due to a historical mistake. */
10304 if (n_words == 2)
10305 gregno += (1 - gregno) & 1;
10307 /* Multi-reg args are not split between registers and stack. */
10308 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10309 return NULL_RTX;
10311 if (TARGET_32BIT && TARGET_POWERPC64)
10312 return rs6000_mixed_function_arg (mode, type,
10313 gregno - GP_ARG_MIN_REG);
10314 return gen_rtx_REG (mode, gregno);
10317 else
10319 int align_words = rs6000_parm_start (mode, type, cum->words);
10321 /* _Decimal128 must be passed in an even/odd float register pair.
10322 This assumes that the register number is odd when fregno is odd. */
10323 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10324 cum->fregno++;
10326 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10328 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10329 rtx r, off;
10330 int i, k = 0;
10331 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10332 int fpr_words;
10334 /* Do we also need to pass this argument in the parameter
10335 save area? */
10336 if (type && (cum->nargs_prototype <= 0
10337 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10338 && TARGET_XL_COMPAT
10339 && align_words >= GP_ARG_NUM_REG)))
10340 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10342 /* Describe where this argument goes in the fprs. */
10343 for (i = 0; i < n_elts
10344 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10346 /* Check if the argument is split over registers and memory.
10347 This can only ever happen for long double or _Decimal128;
10348 complex types are handled via split_complex_arg. */
10349 enum machine_mode fmode = elt_mode;
10350 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10352 gcc_assert (fmode == TFmode || fmode == TDmode);
10353 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10356 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10357 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10358 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10361 /* If there were not enough FPRs to hold the argument, the rest
10362 usually goes into memory. However, if the current position
10363 is still within the register parameter area, a portion may
10364 actually have to go into GPRs.
10366 Note that it may happen that the portion of the argument
10367 passed in the first "half" of the first GPR was already
10368 passed in the last FPR as well.
10370 For unnamed arguments, we already set up GPRs to cover the
10371 whole argument in rs6000_psave_function_arg, so there is
10372 nothing further to do at this point.
10374 GCC 4.8/4.9 Note: This was implemented incorrectly in earlier
10375 GCC releases. To avoid any ABI change on the release branch,
10376 we retain that original implementation here, but warn if we
10377 encounter a case where the ABI will change in the future. */
10378 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10379 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10380 && cum->nargs_prototype > 0)
10382 static bool warned;
10383 if (!warned && warn_psabi)
10385 warned = true;
10386 inform (input_location,
10387 "the ABI of passing homogeneous float aggregates"
10388 " will change in a future GCC release");
10392 return rs6000_finish_function_arg (mode, rvec, k);
10394 else if (align_words < GP_ARG_NUM_REG)
10396 if (TARGET_32BIT && TARGET_POWERPC64)
10397 return rs6000_mixed_function_arg (mode, type, align_words);
10399 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10401 else
10402 return NULL_RTX;
10406 /* For an arg passed partly in registers and partly in memory, this is
10407 the number of bytes passed in registers. For args passed entirely in
10408 registers or entirely in memory, zero. When an arg is described by a
10409 PARALLEL, perhaps using more than one register type, this function
10410 returns the number of bytes used by the first element of the PARALLEL. */
10412 static int
10413 rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
10414 tree type, bool named)
10416 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10417 bool passed_in_gprs = true;
10418 int ret = 0;
10419 int align_words;
10420 enum machine_mode elt_mode;
10421 int n_elts;
10423 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10425 if (DEFAULT_ABI == ABI_V4)
10426 return 0;
10428 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10430 /* If we are passing this arg in the fixed parameter save area
10431 (gprs or memory) as well as VRs, we do not use the partial
10432 bytes mechanism; instead, rs6000_function_arg will return a
10433 PARALLEL including a memory element as necessary. */
10434 if (TARGET_64BIT && ! cum->prototype)
10435 return 0;
10437 /* Otherwise, we pass in VRs only. Check for partial copies. */
10438 passed_in_gprs = false;
10439 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10440 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10443 /* In this complicated case we just disable the partial_nregs code. */
10444 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10445 return 0;
10447 align_words = rs6000_parm_start (mode, type, cum->words);
10449 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10451 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10453 /* If we are passing this arg in the fixed parameter save area
10454 (gprs or memory) as well as FPRs, we do not use the partial
10455 bytes mechanism; instead, rs6000_function_arg will return a
10456 PARALLEL including a memory element as necessary. */
10457 if (type
10458 && (cum->nargs_prototype <= 0
10459 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10460 && TARGET_XL_COMPAT
10461 && align_words >= GP_ARG_NUM_REG)))
10462 return 0;
10464 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10465 passed_in_gprs = false;
10466 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10467 ret = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10468 * MIN (8, GET_MODE_SIZE (elt_mode)));
10471 if (passed_in_gprs
10472 && align_words < GP_ARG_NUM_REG
10473 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10474 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10476 if (ret != 0 && TARGET_DEBUG_ARG)
10477 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10479 return ret;
10482 /* A C expression that indicates when an argument must be passed by
10483 reference. If nonzero for an argument, a copy of that argument is
10484 made in memory and a pointer to the argument is passed instead of
10485 the argument itself. The pointer is passed in whatever way is
10486 appropriate for passing a pointer to that type.
10488 Under V.4, aggregates and long double are passed by reference.
10490 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10491 reference unless the AltiVec vector extension ABI is in force.
10493 As an extension to all ABIs, variable sized types are passed by
10494 reference. */
10496 static bool
10497 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10498 enum machine_mode mode, const_tree type,
10499 bool named ATTRIBUTE_UNUSED)
10501 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10503 if (TARGET_DEBUG_ARG)
10504 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10505 return 1;
10508 if (!type)
10509 return 0;
10511 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10513 if (TARGET_DEBUG_ARG)
10514 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10515 return 1;
10518 if (int_size_in_bytes (type) < 0)
10520 if (TARGET_DEBUG_ARG)
10521 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10522 return 1;
10525 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10526 modes only exist for GCC vector types if -maltivec. */
10527 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10529 if (TARGET_DEBUG_ARG)
10530 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10531 return 1;
10534 /* Pass synthetic vectors in memory. */
10535 if (TREE_CODE (type) == VECTOR_TYPE
10536 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10538 static bool warned_for_pass_big_vectors = false;
10539 if (TARGET_DEBUG_ARG)
10540 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10541 if (!warned_for_pass_big_vectors)
10543 warning (0, "GCC vector passed by reference: "
10544 "non-standard ABI extension with no compatibility guarantee");
10545 warned_for_pass_big_vectors = true;
10547 return 1;
10550 return 0;
10553 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10554 already processes. Return true if the parameter must be passed
10555 (fully or partially) on the stack. */
10557 static bool
10558 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10560 enum machine_mode mode;
10561 int unsignedp;
10562 rtx entry_parm;
10564 /* Catch errors. */
10565 if (type == NULL || type == error_mark_node)
10566 return true;
10568 /* Handle types with no storage requirement. */
10569 if (TYPE_MODE (type) == VOIDmode)
10570 return false;
10572 /* Handle complex types. */
10573 if (TREE_CODE (type) == COMPLEX_TYPE)
10574 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10575 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10577 /* Handle transparent aggregates. */
10578 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10579 && TYPE_TRANSPARENT_AGGR (type))
10580 type = TREE_TYPE (first_field (type));
10582 /* See if this arg was passed by invisible reference. */
10583 if (pass_by_reference (get_cumulative_args (args_so_far),
10584 TYPE_MODE (type), type, true))
10585 type = build_pointer_type (type);
10587 /* Find mode as it is passed by the ABI. */
10588 unsignedp = TYPE_UNSIGNED (type);
10589 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10591 /* If we must pass in stack, we need a stack. */
10592 if (rs6000_must_pass_in_stack (mode, type))
10593 return true;
10595 /* If there is no incoming register, we need a stack. */
10596 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10597 if (entry_parm == NULL)
10598 return true;
10600 /* Likewise if we need to pass both in registers and on the stack. */
10601 if (GET_CODE (entry_parm) == PARALLEL
10602 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10603 return true;
10605 /* Also true if we're partially in registers and partially not. */
10606 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10607 return true;
10609 /* Update info on where next arg arrives in registers. */
10610 rs6000_function_arg_advance (args_so_far, mode, type, true);
10611 return false;
10614 /* Return true if FUN has no prototype, has a variable argument
10615 list, or passes any parameter in memory. */
10617 static bool
10618 rs6000_function_parms_need_stack (tree fun, bool incoming)
10620 tree fntype, result;
10621 CUMULATIVE_ARGS args_so_far_v;
10622 cumulative_args_t args_so_far;
10624 if (!fun)
10625 /* Must be a libcall, all of which only use reg parms. */
10626 return false;
10628 fntype = fun;
10629 if (!TYPE_P (fun))
10630 fntype = TREE_TYPE (fun);
10632 /* Varargs functions need the parameter save area. */
10633 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10634 return true;
10636 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10637 args_so_far = pack_cumulative_args (&args_so_far_v);
10639 /* When incoming, we will have been passed the function decl.
10640 It is necessary to use the decl to handle K&R style functions,
10641 where TYPE_ARG_TYPES may not be available. */
10642 if (incoming)
10644 gcc_assert (DECL_P (fun));
10645 result = DECL_RESULT (fun);
10647 else
10648 result = TREE_TYPE (fntype);
10650 if (result && aggregate_value_p (result, fntype))
10652 if (!TYPE_P (result))
10653 result = TREE_TYPE (result);
10654 result = build_pointer_type (result);
10655 rs6000_parm_needs_stack (args_so_far, result);
10658 if (incoming)
10660 tree parm;
10662 for (parm = DECL_ARGUMENTS (fun);
10663 parm && parm != void_list_node;
10664 parm = TREE_CHAIN (parm))
10665 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10666 return true;
10668 else
10670 function_args_iterator args_iter;
10671 tree arg_type;
10673 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10674 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10675 return true;
10678 return false;
10681 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10682 usually a constant depending on the ABI. However, in the ELFv2 ABI
10683 the register parameter area is optional when calling a function that
10684 has a prototype is scope, has no variable argument list, and passes
10685 all parameters in registers. */
10688 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10690 int reg_parm_stack_space;
10692 switch (DEFAULT_ABI)
10694 default:
10695 reg_parm_stack_space = 0;
10696 break;
10698 case ABI_AIX:
10699 case ABI_DARWIN:
10700 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10701 break;
10703 case ABI_ELFv2:
10704 /* ??? Recomputing this every time is a bit expensive. Is there
10705 a place to cache this information? */
10706 if (rs6000_function_parms_need_stack (fun, incoming))
10707 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10708 else
10709 reg_parm_stack_space = 0;
10710 break;
10713 return reg_parm_stack_space;
10716 static void
10717 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10719 int i;
10720 enum machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10722 if (nregs == 0)
10723 return;
10725 for (i = 0; i < nregs; i++)
10727 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10728 if (reload_completed)
10730 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10731 tem = NULL_RTX;
10732 else
10733 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10734 i * GET_MODE_SIZE (reg_mode));
10736 else
10737 tem = replace_equiv_address (tem, XEXP (tem, 0));
10739 gcc_assert (tem);
10741 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
10745 /* Perform any needed actions needed for a function that is receiving a
10746 variable number of arguments.
10748 CUM is as above.
10750 MODE and TYPE are the mode and type of the current parameter.
10752 PRETEND_SIZE is a variable that should be set to the amount of stack
10753 that must be pushed by the prolog to pretend that our caller pushed
10756 Normally, this macro will push all remaining incoming registers on the
10757 stack and set PRETEND_SIZE to the length of the registers pushed. */
10759 static void
10760 setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
10761 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10762 int no_rtl)
10764 CUMULATIVE_ARGS next_cum;
10765 int reg_size = TARGET_32BIT ? 4 : 8;
10766 rtx save_area = NULL_RTX, mem;
10767 int first_reg_offset;
10768 alias_set_type set;
10770 /* Skip the last named argument. */
10771 next_cum = *get_cumulative_args (cum);
10772 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
10774 if (DEFAULT_ABI == ABI_V4)
10776 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
10778 if (! no_rtl)
10780 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
10781 HOST_WIDE_INT offset = 0;
10783 /* Try to optimize the size of the varargs save area.
10784 The ABI requires that ap.reg_save_area is doubleword
10785 aligned, but we don't need to allocate space for all
10786 the bytes, only those to which we actually will save
10787 anything. */
10788 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
10789 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
10790 if (TARGET_HARD_FLOAT && TARGET_FPRS
10791 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10792 && cfun->va_list_fpr_size)
10794 if (gpr_reg_num)
10795 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
10796 * UNITS_PER_FP_WORD;
10797 if (cfun->va_list_fpr_size
10798 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10799 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
10800 else
10801 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10802 * UNITS_PER_FP_WORD;
10804 if (gpr_reg_num)
10806 offset = -((first_reg_offset * reg_size) & ~7);
10807 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
10809 gpr_reg_num = cfun->va_list_gpr_size;
10810 if (reg_size == 4 && (first_reg_offset & 1))
10811 gpr_reg_num++;
10813 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
10815 else if (fpr_size)
10816 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
10817 * UNITS_PER_FP_WORD
10818 - (int) (GP_ARG_NUM_REG * reg_size);
10820 if (gpr_size + fpr_size)
10822 rtx reg_save_area
10823 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
10824 gcc_assert (GET_CODE (reg_save_area) == MEM);
10825 reg_save_area = XEXP (reg_save_area, 0);
10826 if (GET_CODE (reg_save_area) == PLUS)
10828 gcc_assert (XEXP (reg_save_area, 0)
10829 == virtual_stack_vars_rtx);
10830 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
10831 offset += INTVAL (XEXP (reg_save_area, 1));
10833 else
10834 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
10837 cfun->machine->varargs_save_offset = offset;
10838 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
10841 else
10843 first_reg_offset = next_cum.words;
10844 save_area = virtual_incoming_args_rtx;
10846 if (targetm.calls.must_pass_in_stack (mode, type))
10847 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
10850 set = get_varargs_alias_set ();
10851 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
10852 && cfun->va_list_gpr_size)
10854 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
10856 if (va_list_gpr_counter_field)
10857 /* V4 va_list_gpr_size counts number of registers needed. */
10858 n_gpr = cfun->va_list_gpr_size;
10859 else
10860 /* char * va_list instead counts number of bytes needed. */
10861 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
10863 if (nregs > n_gpr)
10864 nregs = n_gpr;
10866 mem = gen_rtx_MEM (BLKmode,
10867 plus_constant (Pmode, save_area,
10868 first_reg_offset * reg_size));
10869 MEM_NOTRAP_P (mem) = 1;
10870 set_mem_alias_set (mem, set);
10871 set_mem_align (mem, BITS_PER_WORD);
10873 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
10874 nregs);
10877 /* Save FP registers if needed. */
10878 if (DEFAULT_ABI == ABI_V4
10879 && TARGET_HARD_FLOAT && TARGET_FPRS
10880 && ! no_rtl
10881 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10882 && cfun->va_list_fpr_size)
10884 int fregno = next_cum.fregno, nregs;
10885 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
10886 rtx lab = gen_label_rtx ();
10887 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
10888 * UNITS_PER_FP_WORD);
10890 emit_jump_insn
10891 (gen_rtx_SET (VOIDmode,
10892 pc_rtx,
10893 gen_rtx_IF_THEN_ELSE (VOIDmode,
10894 gen_rtx_NE (VOIDmode, cr1,
10895 const0_rtx),
10896 gen_rtx_LABEL_REF (VOIDmode, lab),
10897 pc_rtx)));
10899 for (nregs = 0;
10900 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
10901 fregno++, off += UNITS_PER_FP_WORD, nregs++)
10903 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10904 ? DFmode : SFmode,
10905 plus_constant (Pmode, save_area, off));
10906 MEM_NOTRAP_P (mem) = 1;
10907 set_mem_alias_set (mem, set);
10908 set_mem_align (mem, GET_MODE_ALIGNMENT (
10909 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10910 ? DFmode : SFmode));
10911 emit_move_insn (mem, gen_rtx_REG (
10912 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10913 ? DFmode : SFmode, fregno));
10916 emit_label (lab);
10920 /* Create the va_list data type. */
10922 static tree
10923 rs6000_build_builtin_va_list (void)
10925 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
10927 /* For AIX, prefer 'char *' because that's what the system
10928 header files like. */
10929 if (DEFAULT_ABI != ABI_V4)
10930 return build_pointer_type (char_type_node);
10932 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
10933 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
10934 get_identifier ("__va_list_tag"), record);
10936 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
10937 unsigned_char_type_node);
10938 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
10939 unsigned_char_type_node);
10940 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
10941 every user file. */
10942 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
10943 get_identifier ("reserved"), short_unsigned_type_node);
10944 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
10945 get_identifier ("overflow_arg_area"),
10946 ptr_type_node);
10947 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
10948 get_identifier ("reg_save_area"),
10949 ptr_type_node);
10951 va_list_gpr_counter_field = f_gpr;
10952 va_list_fpr_counter_field = f_fpr;
10954 DECL_FIELD_CONTEXT (f_gpr) = record;
10955 DECL_FIELD_CONTEXT (f_fpr) = record;
10956 DECL_FIELD_CONTEXT (f_res) = record;
10957 DECL_FIELD_CONTEXT (f_ovf) = record;
10958 DECL_FIELD_CONTEXT (f_sav) = record;
10960 TYPE_STUB_DECL (record) = type_decl;
10961 TYPE_NAME (record) = type_decl;
10962 TYPE_FIELDS (record) = f_gpr;
10963 DECL_CHAIN (f_gpr) = f_fpr;
10964 DECL_CHAIN (f_fpr) = f_res;
10965 DECL_CHAIN (f_res) = f_ovf;
10966 DECL_CHAIN (f_ovf) = f_sav;
10968 layout_type (record);
10970 /* The correct type is an array type of one element. */
10971 return build_array_type (record, build_index_type (size_zero_node));
10974 /* Implement va_start. */
10976 static void
10977 rs6000_va_start (tree valist, rtx nextarg)
10979 HOST_WIDE_INT words, n_gpr, n_fpr;
10980 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
10981 tree gpr, fpr, ovf, sav, t;
10983 /* Only SVR4 needs something special. */
10984 if (DEFAULT_ABI != ABI_V4)
10986 std_expand_builtin_va_start (valist, nextarg);
10987 return;
10990 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
10991 f_fpr = DECL_CHAIN (f_gpr);
10992 f_res = DECL_CHAIN (f_fpr);
10993 f_ovf = DECL_CHAIN (f_res);
10994 f_sav = DECL_CHAIN (f_ovf);
10996 valist = build_simple_mem_ref (valist);
10997 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
10998 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
10999 f_fpr, NULL_TREE);
11000 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11001 f_ovf, NULL_TREE);
11002 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11003 f_sav, NULL_TREE);
11005 /* Count number of gp and fp argument registers used. */
11006 words = crtl->args.info.words;
11007 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11008 GP_ARG_NUM_REG);
11009 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11010 FP_ARG_NUM_REG);
11012 if (TARGET_DEBUG_ARG)
11013 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11014 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11015 words, n_gpr, n_fpr);
11017 if (cfun->va_list_gpr_size)
11019 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11020 build_int_cst (NULL_TREE, n_gpr));
11021 TREE_SIDE_EFFECTS (t) = 1;
11022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11025 if (cfun->va_list_fpr_size)
11027 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11028 build_int_cst (NULL_TREE, n_fpr));
11029 TREE_SIDE_EFFECTS (t) = 1;
11030 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11032 #ifdef HAVE_AS_GNU_ATTRIBUTE
11033 if (call_ABI_of_interest (cfun->decl))
11034 rs6000_passes_float = true;
11035 #endif
11038 /* Find the overflow area. */
11039 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11040 if (words != 0)
11041 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
11042 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11043 TREE_SIDE_EFFECTS (t) = 1;
11044 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11046 /* If there were no va_arg invocations, don't set up the register
11047 save area. */
11048 if (!cfun->va_list_gpr_size
11049 && !cfun->va_list_fpr_size
11050 && n_gpr < GP_ARG_NUM_REG
11051 && n_fpr < FP_ARG_V4_MAX_REG)
11052 return;
11054 /* Find the register save area. */
11055 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11056 if (cfun->machine->varargs_save_offset)
11057 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11058 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11059 TREE_SIDE_EFFECTS (t) = 1;
11060 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11063 /* Implement va_arg. */
11065 static tree
11066 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11067 gimple_seq *post_p)
11069 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11070 tree gpr, fpr, ovf, sav, reg, t, u;
11071 int size, rsize, n_reg, sav_ofs, sav_scale;
11072 tree lab_false, lab_over, addr;
11073 int align;
11074 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11075 int regalign = 0;
11076 gimple stmt;
11078 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11080 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11081 return build_va_arg_indirect_ref (t);
11084 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11085 earlier version of gcc, with the property that it always applied alignment
11086 adjustments to the va-args (even for zero-sized types). The cheapest way
11087 to deal with this is to replicate the effect of the part of
11088 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11089 of relevance.
11090 We don't need to check for pass-by-reference because of the test above.
11091 We can return a simplifed answer, since we know there's no offset to add. */
11093 if (((TARGET_MACHO
11094 && rs6000_darwin64_abi)
11095 || DEFAULT_ABI == ABI_ELFv2
11096 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11097 && integer_zerop (TYPE_SIZE (type)))
11099 unsigned HOST_WIDE_INT align, boundary;
11100 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11101 align = PARM_BOUNDARY / BITS_PER_UNIT;
11102 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11103 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11104 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11105 boundary /= BITS_PER_UNIT;
11106 if (boundary > align)
11108 tree t ;
11109 /* This updates arg ptr by the amount that would be necessary
11110 to align the zero-sized (but not zero-alignment) item. */
11111 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11112 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11113 gimplify_and_add (t, pre_p);
11115 t = fold_convert (sizetype, valist_tmp);
11116 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11117 fold_convert (TREE_TYPE (valist),
11118 fold_build2 (BIT_AND_EXPR, sizetype, t,
11119 size_int (-boundary))));
11120 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11121 gimplify_and_add (t, pre_p);
11123 /* Since it is zero-sized there's no increment for the item itself. */
11124 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11125 return build_va_arg_indirect_ref (valist_tmp);
11128 if (DEFAULT_ABI != ABI_V4)
11130 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11132 tree elem_type = TREE_TYPE (type);
11133 enum machine_mode elem_mode = TYPE_MODE (elem_type);
11134 int elem_size = GET_MODE_SIZE (elem_mode);
11136 if (elem_size < UNITS_PER_WORD)
11138 tree real_part, imag_part;
11139 gimple_seq post = NULL;
11141 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11142 &post);
11143 /* Copy the value into a temporary, lest the formal temporary
11144 be reused out from under us. */
11145 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11146 gimple_seq_add_seq (pre_p, post);
11148 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11149 post_p);
11151 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11155 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11158 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11159 f_fpr = DECL_CHAIN (f_gpr);
11160 f_res = DECL_CHAIN (f_fpr);
11161 f_ovf = DECL_CHAIN (f_res);
11162 f_sav = DECL_CHAIN (f_ovf);
11164 valist = build_va_arg_indirect_ref (valist);
11165 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11166 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11167 f_fpr, NULL_TREE);
11168 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11169 f_ovf, NULL_TREE);
11170 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11171 f_sav, NULL_TREE);
11173 size = int_size_in_bytes (type);
11174 rsize = (size + 3) / 4;
11175 align = 1;
11177 if (TARGET_HARD_FLOAT && TARGET_FPRS
11178 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11179 || (TARGET_DOUBLE_FLOAT
11180 && (TYPE_MODE (type) == DFmode
11181 || TYPE_MODE (type) == TFmode
11182 || TYPE_MODE (type) == SDmode
11183 || TYPE_MODE (type) == DDmode
11184 || TYPE_MODE (type) == TDmode))))
11186 /* FP args go in FP registers, if present. */
11187 reg = fpr;
11188 n_reg = (size + 7) / 8;
11189 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11190 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11191 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11192 align = 8;
11194 else
11196 /* Otherwise into GP registers. */
11197 reg = gpr;
11198 n_reg = rsize;
11199 sav_ofs = 0;
11200 sav_scale = 4;
11201 if (n_reg == 2)
11202 align = 8;
11205 /* Pull the value out of the saved registers.... */
11207 lab_over = NULL;
11208 addr = create_tmp_var (ptr_type_node, "addr");
11210 /* AltiVec vectors never go in registers when -mabi=altivec. */
11211 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11212 align = 16;
11213 else
11215 lab_false = create_artificial_label (input_location);
11216 lab_over = create_artificial_label (input_location);
11218 /* Long long and SPE vectors are aligned in the registers.
11219 As are any other 2 gpr item such as complex int due to a
11220 historical mistake. */
11221 u = reg;
11222 if (n_reg == 2 && reg == gpr)
11224 regalign = 1;
11225 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11226 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11227 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11228 unshare_expr (reg), u);
11230 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11231 reg number is 0 for f1, so we want to make it odd. */
11232 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11234 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11235 build_int_cst (TREE_TYPE (reg), 1));
11236 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11239 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11240 t = build2 (GE_EXPR, boolean_type_node, u, t);
11241 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11242 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11243 gimplify_and_add (t, pre_p);
11245 t = sav;
11246 if (sav_ofs)
11247 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11249 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11250 build_int_cst (TREE_TYPE (reg), n_reg));
11251 u = fold_convert (sizetype, u);
11252 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11253 t = fold_build_pointer_plus (t, u);
11255 /* _Decimal32 varargs are located in the second word of the 64-bit
11256 FP register for 32-bit binaries. */
11257 if (!TARGET_POWERPC64
11258 && TARGET_HARD_FLOAT && TARGET_FPRS
11259 && TYPE_MODE (type) == SDmode)
11260 t = fold_build_pointer_plus_hwi (t, size);
11262 gimplify_assign (addr, t, pre_p);
11264 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11266 stmt = gimple_build_label (lab_false);
11267 gimple_seq_add_stmt (pre_p, stmt);
11269 if ((n_reg == 2 && !regalign) || n_reg > 2)
11271 /* Ensure that we don't find any more args in regs.
11272 Alignment has taken care of for special cases. */
11273 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11277 /* ... otherwise out of the overflow area. */
11279 /* Care for on-stack alignment if needed. */
11280 t = ovf;
11281 if (align != 1)
11283 t = fold_build_pointer_plus_hwi (t, align - 1);
11284 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11285 build_int_cst (TREE_TYPE (t), -align));
11287 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11289 gimplify_assign (unshare_expr (addr), t, pre_p);
11291 t = fold_build_pointer_plus_hwi (t, size);
11292 gimplify_assign (unshare_expr (ovf), t, pre_p);
11294 if (lab_over)
11296 stmt = gimple_build_label (lab_over);
11297 gimple_seq_add_stmt (pre_p, stmt);
11300 if (STRICT_ALIGNMENT
11301 && (TYPE_ALIGN (type)
11302 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11304 /* The value (of type complex double, for example) may not be
11305 aligned in memory in the saved registers, so copy via a
11306 temporary. (This is the same code as used for SPARC.) */
11307 tree tmp = create_tmp_var (type, "va_arg_tmp");
11308 tree dest_addr = build_fold_addr_expr (tmp);
11310 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11311 3, dest_addr, addr, size_int (rsize * 4));
11313 gimplify_and_add (copy, pre_p);
11314 addr = dest_addr;
11317 addr = fold_convert (ptrtype, addr);
11318 return build_va_arg_indirect_ref (addr);
11321 /* Builtins. */
11323 static void
11324 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11326 tree t;
11327 unsigned classify = rs6000_builtin_info[(int)code].attr;
11328 const char *attr_string = "";
11330 gcc_assert (name != NULL);
11331 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11333 if (rs6000_builtin_decls[(int)code])
11334 fatal_error ("internal error: builtin function %s already processed", name);
11336 rs6000_builtin_decls[(int)code] = t =
11337 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11339 /* Set any special attributes. */
11340 if ((classify & RS6000_BTC_CONST) != 0)
11342 /* const function, function only depends on the inputs. */
11343 TREE_READONLY (t) = 1;
11344 TREE_NOTHROW (t) = 1;
11345 attr_string = ", pure";
11347 else if ((classify & RS6000_BTC_PURE) != 0)
11349 /* pure function, function can read global memory, but does not set any
11350 external state. */
11351 DECL_PURE_P (t) = 1;
11352 TREE_NOTHROW (t) = 1;
11353 attr_string = ", const";
11355 else if ((classify & RS6000_BTC_FP) != 0)
11357 /* Function is a math function. If rounding mode is on, then treat the
11358 function as not reading global memory, but it can have arbitrary side
11359 effects. If it is off, then assume the function is a const function.
11360 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11361 builtin-attribute.def that is used for the math functions. */
11362 TREE_NOTHROW (t) = 1;
11363 if (flag_rounding_math)
11365 DECL_PURE_P (t) = 1;
11366 DECL_IS_NOVOPS (t) = 1;
11367 attr_string = ", fp, pure";
11369 else
11371 TREE_READONLY (t) = 1;
11372 attr_string = ", fp, const";
11375 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11376 gcc_unreachable ();
11378 if (TARGET_DEBUG_BUILTIN)
11379 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11380 (int)code, name, attr_string);
11383 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11385 #undef RS6000_BUILTIN_1
11386 #undef RS6000_BUILTIN_2
11387 #undef RS6000_BUILTIN_3
11388 #undef RS6000_BUILTIN_A
11389 #undef RS6000_BUILTIN_D
11390 #undef RS6000_BUILTIN_E
11391 #undef RS6000_BUILTIN_H
11392 #undef RS6000_BUILTIN_P
11393 #undef RS6000_BUILTIN_Q
11394 #undef RS6000_BUILTIN_S
11395 #undef RS6000_BUILTIN_X
11397 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11398 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11399 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11400 { MASK, ICODE, NAME, ENUM },
11402 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11403 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11404 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11405 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11406 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11407 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11408 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11409 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11411 static const struct builtin_description bdesc_3arg[] =
11413 #include "rs6000-builtin.def"
11416 /* DST operations: void foo (void *, const int, const char). */
11418 #undef RS6000_BUILTIN_1
11419 #undef RS6000_BUILTIN_2
11420 #undef RS6000_BUILTIN_3
11421 #undef RS6000_BUILTIN_A
11422 #undef RS6000_BUILTIN_D
11423 #undef RS6000_BUILTIN_E
11424 #undef RS6000_BUILTIN_H
11425 #undef RS6000_BUILTIN_P
11426 #undef RS6000_BUILTIN_Q
11427 #undef RS6000_BUILTIN_S
11428 #undef RS6000_BUILTIN_X
11430 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11431 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11432 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11433 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11434 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11435 { MASK, ICODE, NAME, ENUM },
11437 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11438 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11439 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11440 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11441 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11442 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11444 static const struct builtin_description bdesc_dst[] =
11446 #include "rs6000-builtin.def"
11449 /* Simple binary operations: VECc = foo (VECa, VECb). */
11451 #undef RS6000_BUILTIN_1
11452 #undef RS6000_BUILTIN_2
11453 #undef RS6000_BUILTIN_3
11454 #undef RS6000_BUILTIN_A
11455 #undef RS6000_BUILTIN_D
11456 #undef RS6000_BUILTIN_E
11457 #undef RS6000_BUILTIN_H
11458 #undef RS6000_BUILTIN_P
11459 #undef RS6000_BUILTIN_Q
11460 #undef RS6000_BUILTIN_S
11461 #undef RS6000_BUILTIN_X
11463 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11464 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11465 { MASK, ICODE, NAME, ENUM },
11467 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11468 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11469 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11470 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11471 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11472 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11473 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11474 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11475 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11477 static const struct builtin_description bdesc_2arg[] =
11479 #include "rs6000-builtin.def"
11482 #undef RS6000_BUILTIN_1
11483 #undef RS6000_BUILTIN_2
11484 #undef RS6000_BUILTIN_3
11485 #undef RS6000_BUILTIN_A
11486 #undef RS6000_BUILTIN_D
11487 #undef RS6000_BUILTIN_E
11488 #undef RS6000_BUILTIN_H
11489 #undef RS6000_BUILTIN_P
11490 #undef RS6000_BUILTIN_Q
11491 #undef RS6000_BUILTIN_S
11492 #undef RS6000_BUILTIN_X
11494 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11495 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11496 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11497 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11498 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11499 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11500 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11501 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11502 { MASK, ICODE, NAME, ENUM },
11504 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11505 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11506 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11508 /* AltiVec predicates. */
11510 static const struct builtin_description bdesc_altivec_preds[] =
11512 #include "rs6000-builtin.def"
11515 /* SPE predicates. */
11516 #undef RS6000_BUILTIN_1
11517 #undef RS6000_BUILTIN_2
11518 #undef RS6000_BUILTIN_3
11519 #undef RS6000_BUILTIN_A
11520 #undef RS6000_BUILTIN_D
11521 #undef RS6000_BUILTIN_E
11522 #undef RS6000_BUILTIN_H
11523 #undef RS6000_BUILTIN_P
11524 #undef RS6000_BUILTIN_Q
11525 #undef RS6000_BUILTIN_S
11526 #undef RS6000_BUILTIN_X
11528 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11529 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11530 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11531 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11532 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11533 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11534 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11535 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11536 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11537 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11538 { MASK, ICODE, NAME, ENUM },
11540 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11542 static const struct builtin_description bdesc_spe_predicates[] =
11544 #include "rs6000-builtin.def"
11547 /* SPE evsel predicates. */
11548 #undef RS6000_BUILTIN_1
11549 #undef RS6000_BUILTIN_2
11550 #undef RS6000_BUILTIN_3
11551 #undef RS6000_BUILTIN_A
11552 #undef RS6000_BUILTIN_D
11553 #undef RS6000_BUILTIN_E
11554 #undef RS6000_BUILTIN_H
11555 #undef RS6000_BUILTIN_P
11556 #undef RS6000_BUILTIN_Q
11557 #undef RS6000_BUILTIN_S
11558 #undef RS6000_BUILTIN_X
11560 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11561 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11562 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11563 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11564 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11565 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11566 { MASK, ICODE, NAME, ENUM },
11568 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11569 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11570 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11571 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11572 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11574 static const struct builtin_description bdesc_spe_evsel[] =
11576 #include "rs6000-builtin.def"
11579 /* PAIRED predicates. */
11580 #undef RS6000_BUILTIN_1
11581 #undef RS6000_BUILTIN_2
11582 #undef RS6000_BUILTIN_3
11583 #undef RS6000_BUILTIN_A
11584 #undef RS6000_BUILTIN_D
11585 #undef RS6000_BUILTIN_E
11586 #undef RS6000_BUILTIN_H
11587 #undef RS6000_BUILTIN_P
11588 #undef RS6000_BUILTIN_Q
11589 #undef RS6000_BUILTIN_S
11590 #undef RS6000_BUILTIN_X
11592 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11593 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11594 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11595 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11596 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11597 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11598 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11599 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11600 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11601 { MASK, ICODE, NAME, ENUM },
11603 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11604 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11606 static const struct builtin_description bdesc_paired_preds[] =
11608 #include "rs6000-builtin.def"
11611 /* ABS* operations. */
11613 #undef RS6000_BUILTIN_1
11614 #undef RS6000_BUILTIN_2
11615 #undef RS6000_BUILTIN_3
11616 #undef RS6000_BUILTIN_A
11617 #undef RS6000_BUILTIN_D
11618 #undef RS6000_BUILTIN_E
11619 #undef RS6000_BUILTIN_H
11620 #undef RS6000_BUILTIN_P
11621 #undef RS6000_BUILTIN_Q
11622 #undef RS6000_BUILTIN_S
11623 #undef RS6000_BUILTIN_X
11625 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11626 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11627 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11628 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11629 { MASK, ICODE, NAME, ENUM },
11631 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11632 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11633 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11634 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11635 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11636 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11637 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11639 static const struct builtin_description bdesc_abs[] =
11641 #include "rs6000-builtin.def"
11644 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11645 foo (VECa). */
11647 #undef RS6000_BUILTIN_1
11648 #undef RS6000_BUILTIN_2
11649 #undef RS6000_BUILTIN_3
11650 #undef RS6000_BUILTIN_A
11651 #undef RS6000_BUILTIN_D
11652 #undef RS6000_BUILTIN_E
11653 #undef RS6000_BUILTIN_H
11654 #undef RS6000_BUILTIN_P
11655 #undef RS6000_BUILTIN_Q
11656 #undef RS6000_BUILTIN_S
11657 #undef RS6000_BUILTIN_X
11659 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11660 { MASK, ICODE, NAME, ENUM },
11662 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11663 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11664 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11665 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11666 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11667 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11668 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11669 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11670 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11671 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11673 static const struct builtin_description bdesc_1arg[] =
11675 #include "rs6000-builtin.def"
11678 /* HTM builtins. */
11679 #undef RS6000_BUILTIN_1
11680 #undef RS6000_BUILTIN_2
11681 #undef RS6000_BUILTIN_3
11682 #undef RS6000_BUILTIN_A
11683 #undef RS6000_BUILTIN_D
11684 #undef RS6000_BUILTIN_E
11685 #undef RS6000_BUILTIN_H
11686 #undef RS6000_BUILTIN_P
11687 #undef RS6000_BUILTIN_Q
11688 #undef RS6000_BUILTIN_S
11689 #undef RS6000_BUILTIN_X
11691 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11692 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11693 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11694 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11695 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11696 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11697 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11698 { MASK, ICODE, NAME, ENUM },
11700 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11701 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11702 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11703 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11705 static const struct builtin_description bdesc_htm[] =
11707 #include "rs6000-builtin.def"
11710 #undef RS6000_BUILTIN_1
11711 #undef RS6000_BUILTIN_2
11712 #undef RS6000_BUILTIN_3
11713 #undef RS6000_BUILTIN_A
11714 #undef RS6000_BUILTIN_D
11715 #undef RS6000_BUILTIN_E
11716 #undef RS6000_BUILTIN_H
11717 #undef RS6000_BUILTIN_P
11718 #undef RS6000_BUILTIN_Q
11719 #undef RS6000_BUILTIN_S
11721 /* Return true if a builtin function is overloaded. */
11722 bool
11723 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11725 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11728 /* Expand an expression EXP that calls a builtin without arguments. */
11729 static rtx
11730 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11732 rtx pat;
11733 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11735 if (icode == CODE_FOR_nothing)
11736 /* Builtin not supported on this processor. */
11737 return 0;
11739 if (target == 0
11740 || GET_MODE (target) != tmode
11741 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11742 target = gen_reg_rtx (tmode);
11744 pat = GEN_FCN (icode) (target);
11745 if (! pat)
11746 return 0;
11747 emit_insn (pat);
11749 return target;
11753 static rtx
11754 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
11756 rtx pat;
11757 tree arg0 = CALL_EXPR_ARG (exp, 0);
11758 tree arg1 = CALL_EXPR_ARG (exp, 1);
11759 rtx op0 = expand_normal (arg0);
11760 rtx op1 = expand_normal (arg1);
11761 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11762 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11764 if (icode == CODE_FOR_nothing)
11765 /* Builtin not supported on this processor. */
11766 return 0;
11768 /* If we got invalid arguments bail out before generating bad rtl. */
11769 if (arg0 == error_mark_node || arg1 == error_mark_node)
11770 return const0_rtx;
11772 if (GET_CODE (op0) != CONST_INT
11773 || INTVAL (op0) > 255
11774 || INTVAL (op0) < 0)
11776 error ("argument 1 must be an 8-bit field value");
11777 return const0_rtx;
11780 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11781 op0 = copy_to_mode_reg (mode0, op0);
11783 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11784 op1 = copy_to_mode_reg (mode1, op1);
11786 pat = GEN_FCN (icode) (op0, op1);
11787 if (! pat)
11788 return const0_rtx;
11789 emit_insn (pat);
11791 return NULL_RTX;
11795 static rtx
11796 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
11798 rtx pat;
11799 tree arg0 = CALL_EXPR_ARG (exp, 0);
11800 rtx op0 = expand_normal (arg0);
11801 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11802 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11804 if (icode == CODE_FOR_nothing)
11805 /* Builtin not supported on this processor. */
11806 return 0;
11808 /* If we got invalid arguments bail out before generating bad rtl. */
11809 if (arg0 == error_mark_node)
11810 return const0_rtx;
11812 if (icode == CODE_FOR_altivec_vspltisb
11813 || icode == CODE_FOR_altivec_vspltish
11814 || icode == CODE_FOR_altivec_vspltisw
11815 || icode == CODE_FOR_spe_evsplatfi
11816 || icode == CODE_FOR_spe_evsplati)
11818 /* Only allow 5-bit *signed* literals. */
11819 if (GET_CODE (op0) != CONST_INT
11820 || INTVAL (op0) > 15
11821 || INTVAL (op0) < -16)
11823 error ("argument 1 must be a 5-bit signed literal");
11824 return const0_rtx;
11828 if (target == 0
11829 || GET_MODE (target) != tmode
11830 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11831 target = gen_reg_rtx (tmode);
11833 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11834 op0 = copy_to_mode_reg (mode0, op0);
11836 pat = GEN_FCN (icode) (target, op0);
11837 if (! pat)
11838 return 0;
11839 emit_insn (pat);
11841 return target;
11844 static rtx
11845 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
11847 rtx pat, scratch1, scratch2;
11848 tree arg0 = CALL_EXPR_ARG (exp, 0);
11849 rtx op0 = expand_normal (arg0);
11850 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11851 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11853 /* If we have invalid arguments, bail out before generating bad rtl. */
11854 if (arg0 == error_mark_node)
11855 return const0_rtx;
11857 if (target == 0
11858 || GET_MODE (target) != tmode
11859 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11860 target = gen_reg_rtx (tmode);
11862 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11863 op0 = copy_to_mode_reg (mode0, op0);
11865 scratch1 = gen_reg_rtx (mode0);
11866 scratch2 = gen_reg_rtx (mode0);
11868 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
11869 if (! pat)
11870 return 0;
11871 emit_insn (pat);
11873 return target;
11876 static rtx
11877 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
11879 rtx pat;
11880 tree arg0 = CALL_EXPR_ARG (exp, 0);
11881 tree arg1 = CALL_EXPR_ARG (exp, 1);
11882 rtx op0 = expand_normal (arg0);
11883 rtx op1 = expand_normal (arg1);
11884 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11885 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11886 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11888 if (icode == CODE_FOR_nothing)
11889 /* Builtin not supported on this processor. */
11890 return 0;
11892 /* If we got invalid arguments bail out before generating bad rtl. */
11893 if (arg0 == error_mark_node || arg1 == error_mark_node)
11894 return const0_rtx;
11896 if (icode == CODE_FOR_altivec_vcfux
11897 || icode == CODE_FOR_altivec_vcfsx
11898 || icode == CODE_FOR_altivec_vctsxs
11899 || icode == CODE_FOR_altivec_vctuxs
11900 || icode == CODE_FOR_altivec_vspltb
11901 || icode == CODE_FOR_altivec_vsplth
11902 || icode == CODE_FOR_altivec_vspltw
11903 || icode == CODE_FOR_spe_evaddiw
11904 || icode == CODE_FOR_spe_evldd
11905 || icode == CODE_FOR_spe_evldh
11906 || icode == CODE_FOR_spe_evldw
11907 || icode == CODE_FOR_spe_evlhhesplat
11908 || icode == CODE_FOR_spe_evlhhossplat
11909 || icode == CODE_FOR_spe_evlhhousplat
11910 || icode == CODE_FOR_spe_evlwhe
11911 || icode == CODE_FOR_spe_evlwhos
11912 || icode == CODE_FOR_spe_evlwhou
11913 || icode == CODE_FOR_spe_evlwhsplat
11914 || icode == CODE_FOR_spe_evlwwsplat
11915 || icode == CODE_FOR_spe_evrlwi
11916 || icode == CODE_FOR_spe_evslwi
11917 || icode == CODE_FOR_spe_evsrwis
11918 || icode == CODE_FOR_spe_evsubifw
11919 || icode == CODE_FOR_spe_evsrwiu)
11921 /* Only allow 5-bit unsigned literals. */
11922 STRIP_NOPS (arg1);
11923 if (TREE_CODE (arg1) != INTEGER_CST
11924 || TREE_INT_CST_LOW (arg1) & ~0x1f)
11926 error ("argument 2 must be a 5-bit unsigned literal");
11927 return const0_rtx;
11931 if (target == 0
11932 || GET_MODE (target) != tmode
11933 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11934 target = gen_reg_rtx (tmode);
11936 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11937 op0 = copy_to_mode_reg (mode0, op0);
11938 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11939 op1 = copy_to_mode_reg (mode1, op1);
11941 pat = GEN_FCN (icode) (target, op0, op1);
11942 if (! pat)
11943 return 0;
11944 emit_insn (pat);
11946 return target;
11949 static rtx
11950 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
11952 rtx pat, scratch;
11953 tree cr6_form = CALL_EXPR_ARG (exp, 0);
11954 tree arg0 = CALL_EXPR_ARG (exp, 1);
11955 tree arg1 = CALL_EXPR_ARG (exp, 2);
11956 rtx op0 = expand_normal (arg0);
11957 rtx op1 = expand_normal (arg1);
11958 enum machine_mode tmode = SImode;
11959 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11960 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11961 int cr6_form_int;
11963 if (TREE_CODE (cr6_form) != INTEGER_CST)
11965 error ("argument 1 of __builtin_altivec_predicate must be a constant");
11966 return const0_rtx;
11968 else
11969 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
11971 gcc_assert (mode0 == mode1);
11973 /* If we have invalid arguments, bail out before generating bad rtl. */
11974 if (arg0 == error_mark_node || arg1 == error_mark_node)
11975 return const0_rtx;
11977 if (target == 0
11978 || GET_MODE (target) != tmode
11979 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11980 target = gen_reg_rtx (tmode);
11982 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11983 op0 = copy_to_mode_reg (mode0, op0);
11984 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
11985 op1 = copy_to_mode_reg (mode1, op1);
11987 scratch = gen_reg_rtx (mode0);
11989 pat = GEN_FCN (icode) (scratch, op0, op1);
11990 if (! pat)
11991 return 0;
11992 emit_insn (pat);
11994 /* The vec_any* and vec_all* predicates use the same opcodes for two
11995 different operations, but the bits in CR6 will be different
11996 depending on what information we want. So we have to play tricks
11997 with CR6 to get the right bits out.
11999 If you think this is disgusting, look at the specs for the
12000 AltiVec predicates. */
12002 switch (cr6_form_int)
12004 case 0:
12005 emit_insn (gen_cr6_test_for_zero (target));
12006 break;
12007 case 1:
12008 emit_insn (gen_cr6_test_for_zero_reverse (target));
12009 break;
12010 case 2:
12011 emit_insn (gen_cr6_test_for_lt (target));
12012 break;
12013 case 3:
12014 emit_insn (gen_cr6_test_for_lt_reverse (target));
12015 break;
12016 default:
12017 error ("argument 1 of __builtin_altivec_predicate is out of range");
12018 break;
12021 return target;
12024 static rtx
12025 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12027 rtx pat, addr;
12028 tree arg0 = CALL_EXPR_ARG (exp, 0);
12029 tree arg1 = CALL_EXPR_ARG (exp, 1);
12030 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12031 enum machine_mode mode0 = Pmode;
12032 enum machine_mode mode1 = Pmode;
12033 rtx op0 = expand_normal (arg0);
12034 rtx op1 = expand_normal (arg1);
12036 if (icode == CODE_FOR_nothing)
12037 /* Builtin not supported on this processor. */
12038 return 0;
12040 /* If we got invalid arguments bail out before generating bad rtl. */
12041 if (arg0 == error_mark_node || arg1 == error_mark_node)
12042 return const0_rtx;
12044 if (target == 0
12045 || GET_MODE (target) != tmode
12046 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12047 target = gen_reg_rtx (tmode);
12049 op1 = copy_to_mode_reg (mode1, op1);
12051 if (op0 == const0_rtx)
12053 addr = gen_rtx_MEM (tmode, op1);
12055 else
12057 op0 = copy_to_mode_reg (mode0, op0);
12058 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12061 pat = GEN_FCN (icode) (target, addr);
12063 if (! pat)
12064 return 0;
12065 emit_insn (pat);
12067 return target;
12070 /* Return a constant vector for use as a little-endian permute control vector
12071 to reverse the order of elements of the given vector mode. */
12072 static rtx
12073 swap_selector_for_mode (enum machine_mode mode)
12075 /* These are little endian vectors, so their elements are reversed
12076 from what you would normally expect for a permute control vector. */
12077 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12078 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12079 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12080 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12081 unsigned int *swaparray, i;
12082 rtx perm[16];
12084 switch (mode)
12086 case V2DFmode:
12087 case V2DImode:
12088 swaparray = swap2;
12089 break;
12090 case V4SFmode:
12091 case V4SImode:
12092 swaparray = swap4;
12093 break;
12094 case V8HImode:
12095 swaparray = swap8;
12096 break;
12097 case V16QImode:
12098 swaparray = swap16;
12099 break;
12100 default:
12101 gcc_unreachable ();
12104 for (i = 0; i < 16; ++i)
12105 perm[i] = GEN_INT (swaparray[i]);
12107 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12110 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12111 with -maltivec=be specified. Issue the load followed by an element-reversing
12112 permute. */
12113 void
12114 altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
12116 rtx tmp = gen_reg_rtx (mode);
12117 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12118 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12119 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12120 rtx sel = swap_selector_for_mode (mode);
12121 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12123 gcc_assert (REG_P (op0));
12124 emit_insn (par);
12125 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12128 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12129 with -maltivec=be specified. Issue the store preceded by an element-reversing
12130 permute. */
12131 void
12132 altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
12134 rtx tmp = gen_reg_rtx (mode);
12135 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12136 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12137 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12138 rtx sel = swap_selector_for_mode (mode);
12139 rtx vperm;
12141 gcc_assert (REG_P (op1));
12142 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12143 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12144 emit_insn (par);
12147 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12148 specified. Issue the store preceded by an element-reversing permute. */
12149 void
12150 altivec_expand_stvex_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
12152 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12153 rtx tmp = gen_reg_rtx (mode);
12154 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12155 rtx sel = swap_selector_for_mode (mode);
12156 rtx vperm;
12158 gcc_assert (REG_P (op1));
12159 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12160 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12161 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12164 static rtx
12165 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12167 rtx pat, addr;
12168 tree arg0 = CALL_EXPR_ARG (exp, 0);
12169 tree arg1 = CALL_EXPR_ARG (exp, 1);
12170 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12171 enum machine_mode mode0 = Pmode;
12172 enum machine_mode mode1 = Pmode;
12173 rtx op0 = expand_normal (arg0);
12174 rtx op1 = expand_normal (arg1);
12176 if (icode == CODE_FOR_nothing)
12177 /* Builtin not supported on this processor. */
12178 return 0;
12180 /* If we got invalid arguments bail out before generating bad rtl. */
12181 if (arg0 == error_mark_node || arg1 == error_mark_node)
12182 return const0_rtx;
12184 if (target == 0
12185 || GET_MODE (target) != tmode
12186 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12187 target = gen_reg_rtx (tmode);
12189 op1 = copy_to_mode_reg (mode1, op1);
12191 if (op0 == const0_rtx)
12193 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12195 else
12197 op0 = copy_to_mode_reg (mode0, op0);
12198 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12201 pat = GEN_FCN (icode) (target, addr);
12203 if (! pat)
12204 return 0;
12205 emit_insn (pat);
12207 return target;
12210 static rtx
12211 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12213 tree arg0 = CALL_EXPR_ARG (exp, 0);
12214 tree arg1 = CALL_EXPR_ARG (exp, 1);
12215 tree arg2 = CALL_EXPR_ARG (exp, 2);
12216 rtx op0 = expand_normal (arg0);
12217 rtx op1 = expand_normal (arg1);
12218 rtx op2 = expand_normal (arg2);
12219 rtx pat;
12220 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12221 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12222 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
12224 /* Invalid arguments. Bail before doing anything stoopid! */
12225 if (arg0 == error_mark_node
12226 || arg1 == error_mark_node
12227 || arg2 == error_mark_node)
12228 return const0_rtx;
12230 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12231 op0 = copy_to_mode_reg (mode2, op0);
12232 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12233 op1 = copy_to_mode_reg (mode0, op1);
12234 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12235 op2 = copy_to_mode_reg (mode1, op2);
12237 pat = GEN_FCN (icode) (op1, op2, op0);
12238 if (pat)
12239 emit_insn (pat);
12240 return NULL_RTX;
12243 static rtx
12244 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12246 tree arg0 = CALL_EXPR_ARG (exp, 0);
12247 tree arg1 = CALL_EXPR_ARG (exp, 1);
12248 tree arg2 = CALL_EXPR_ARG (exp, 2);
12249 rtx op0 = expand_normal (arg0);
12250 rtx op1 = expand_normal (arg1);
12251 rtx op2 = expand_normal (arg2);
12252 rtx pat, addr;
12253 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12254 enum machine_mode mode1 = Pmode;
12255 enum machine_mode mode2 = Pmode;
12257 /* Invalid arguments. Bail before doing anything stoopid! */
12258 if (arg0 == error_mark_node
12259 || arg1 == error_mark_node
12260 || arg2 == error_mark_node)
12261 return const0_rtx;
12263 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12264 op0 = copy_to_mode_reg (tmode, op0);
12266 op2 = copy_to_mode_reg (mode2, op2);
12268 if (op1 == const0_rtx)
12270 addr = gen_rtx_MEM (tmode, op2);
12272 else
12274 op1 = copy_to_mode_reg (mode1, op1);
12275 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12278 pat = GEN_FCN (icode) (addr, op0);
12279 if (pat)
12280 emit_insn (pat);
12281 return NULL_RTX;
12284 static rtx
12285 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12287 tree arg0 = CALL_EXPR_ARG (exp, 0);
12288 tree arg1 = CALL_EXPR_ARG (exp, 1);
12289 tree arg2 = CALL_EXPR_ARG (exp, 2);
12290 rtx op0 = expand_normal (arg0);
12291 rtx op1 = expand_normal (arg1);
12292 rtx op2 = expand_normal (arg2);
12293 rtx pat, addr;
12294 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12295 enum machine_mode smode = insn_data[icode].operand[1].mode;
12296 enum machine_mode mode1 = Pmode;
12297 enum machine_mode mode2 = Pmode;
12299 /* Invalid arguments. Bail before doing anything stoopid! */
12300 if (arg0 == error_mark_node
12301 || arg1 == error_mark_node
12302 || arg2 == error_mark_node)
12303 return const0_rtx;
12305 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12306 op0 = copy_to_mode_reg (smode, op0);
12308 op2 = copy_to_mode_reg (mode2, op2);
12310 if (op1 == const0_rtx)
12312 addr = gen_rtx_MEM (tmode, op2);
12314 else
12316 op1 = copy_to_mode_reg (mode1, op1);
12317 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12320 pat = GEN_FCN (icode) (addr, op0);
12321 if (pat)
12322 emit_insn (pat);
12323 return NULL_RTX;
12326 /* Return the appropriate SPR number associated with the given builtin. */
12327 static inline HOST_WIDE_INT
12328 htm_spr_num (enum rs6000_builtins code)
12330 if (code == HTM_BUILTIN_GET_TFHAR
12331 || code == HTM_BUILTIN_SET_TFHAR)
12332 return TFHAR_SPR;
12333 else if (code == HTM_BUILTIN_GET_TFIAR
12334 || code == HTM_BUILTIN_SET_TFIAR)
12335 return TFIAR_SPR;
12336 else if (code == HTM_BUILTIN_GET_TEXASR
12337 || code == HTM_BUILTIN_SET_TEXASR)
12338 return TEXASR_SPR;
12339 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12340 || code == HTM_BUILTIN_SET_TEXASRU);
12341 return TEXASRU_SPR;
12344 /* Return the appropriate SPR regno associated with the given builtin. */
12345 static inline HOST_WIDE_INT
12346 htm_spr_regno (enum rs6000_builtins code)
12348 if (code == HTM_BUILTIN_GET_TFHAR
12349 || code == HTM_BUILTIN_SET_TFHAR)
12350 return TFHAR_REGNO;
12351 else if (code == HTM_BUILTIN_GET_TFIAR
12352 || code == HTM_BUILTIN_SET_TFIAR)
12353 return TFIAR_REGNO;
12354 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12355 || code == HTM_BUILTIN_SET_TEXASR
12356 || code == HTM_BUILTIN_GET_TEXASRU
12357 || code == HTM_BUILTIN_SET_TEXASRU);
12358 return TEXASR_REGNO;
12361 /* Return the correct ICODE value depending on whether we are
12362 setting or reading the HTM SPRs. */
12363 static inline enum insn_code
12364 rs6000_htm_spr_icode (bool nonvoid)
12366 if (nonvoid)
12367 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12368 else
12369 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12372 /* Expand the HTM builtin in EXP and store the result in TARGET.
12373 Store true in *EXPANDEDP if we found a builtin to expand. */
12374 static rtx
12375 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12377 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12378 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12379 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12380 const struct builtin_description *d;
12381 size_t i;
12383 *expandedp = false;
12385 /* Expand the HTM builtins. */
12386 d = bdesc_htm;
12387 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12388 if (d->code == fcode)
12390 rtx op[MAX_HTM_OPERANDS], pat;
12391 int nopnds = 0;
12392 tree arg;
12393 call_expr_arg_iterator iter;
12394 unsigned attr = rs6000_builtin_info[fcode].attr;
12395 enum insn_code icode = d->icode;
12397 if (attr & RS6000_BTC_SPR)
12398 icode = rs6000_htm_spr_icode (nonvoid);
12400 if (nonvoid)
12402 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12403 if (!target
12404 || GET_MODE (target) != tmode
12405 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12406 target = gen_reg_rtx (tmode);
12407 op[nopnds++] = target;
12410 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12412 const struct insn_operand_data *insn_op;
12414 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12415 return NULL_RTX;
12417 insn_op = &insn_data[icode].operand[nopnds];
12419 op[nopnds] = expand_normal (arg);
12421 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12423 if (!strcmp (insn_op->constraint, "n"))
12425 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12426 if (!CONST_INT_P (op[nopnds]))
12427 error ("argument %d must be an unsigned literal", arg_num);
12428 else
12429 error ("argument %d is an unsigned literal that is "
12430 "out of range", arg_num);
12431 return const0_rtx;
12433 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12436 nopnds++;
12439 /* Handle the builtins for extended mnemonics. These accept
12440 no arguments, but map to builtins that take arguments. */
12441 switch (fcode)
12443 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12444 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12445 op[nopnds++] = GEN_INT (1);
12446 #ifdef ENABLE_CHECKING
12447 attr |= RS6000_BTC_UNARY;
12448 #endif
12449 break;
12450 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12451 op[nopnds++] = GEN_INT (0);
12452 #ifdef ENABLE_CHECKING
12453 attr |= RS6000_BTC_UNARY;
12454 #endif
12455 break;
12456 default:
12457 break;
12460 /* If this builtin accesses SPRs, then pass in the appropriate
12461 SPR number and SPR regno as the last two operands. */
12462 if (attr & RS6000_BTC_SPR)
12464 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12465 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12468 #ifdef ENABLE_CHECKING
12469 int expected_nopnds = 0;
12470 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12471 expected_nopnds = 1;
12472 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12473 expected_nopnds = 2;
12474 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12475 expected_nopnds = 3;
12476 if (!(attr & RS6000_BTC_VOID))
12477 expected_nopnds += 1;
12478 if (attr & RS6000_BTC_SPR)
12479 expected_nopnds += 2;
12481 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12482 #endif
12484 switch (nopnds)
12486 case 1:
12487 pat = GEN_FCN (icode) (op[0]);
12488 break;
12489 case 2:
12490 pat = GEN_FCN (icode) (op[0], op[1]);
12491 break;
12492 case 3:
12493 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12494 break;
12495 case 4:
12496 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12497 break;
12498 default:
12499 gcc_unreachable ();
12501 if (!pat)
12502 return NULL_RTX;
12503 emit_insn (pat);
12505 *expandedp = true;
12506 if (nonvoid)
12507 return target;
12508 return const0_rtx;
12511 return NULL_RTX;
12514 static rtx
12515 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12517 rtx pat;
12518 tree arg0 = CALL_EXPR_ARG (exp, 0);
12519 tree arg1 = CALL_EXPR_ARG (exp, 1);
12520 tree arg2 = CALL_EXPR_ARG (exp, 2);
12521 rtx op0 = expand_normal (arg0);
12522 rtx op1 = expand_normal (arg1);
12523 rtx op2 = expand_normal (arg2);
12524 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12525 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12526 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12527 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
12529 if (icode == CODE_FOR_nothing)
12530 /* Builtin not supported on this processor. */
12531 return 0;
12533 /* If we got invalid arguments bail out before generating bad rtl. */
12534 if (arg0 == error_mark_node
12535 || arg1 == error_mark_node
12536 || arg2 == error_mark_node)
12537 return const0_rtx;
12539 /* Check and prepare argument depending on the instruction code.
12541 Note that a switch statement instead of the sequence of tests
12542 would be incorrect as many of the CODE_FOR values could be
12543 CODE_FOR_nothing and that would yield multiple alternatives
12544 with identical values. We'd never reach here at runtime in
12545 this case. */
12546 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12547 || icode == CODE_FOR_altivec_vsldoi_v4si
12548 || icode == CODE_FOR_altivec_vsldoi_v8hi
12549 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12551 /* Only allow 4-bit unsigned literals. */
12552 STRIP_NOPS (arg2);
12553 if (TREE_CODE (arg2) != INTEGER_CST
12554 || TREE_INT_CST_LOW (arg2) & ~0xf)
12556 error ("argument 3 must be a 4-bit unsigned literal");
12557 return const0_rtx;
12560 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12561 || icode == CODE_FOR_vsx_xxpermdi_v2di
12562 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12563 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12564 || icode == CODE_FOR_vsx_xxsldwi_v4si
12565 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12566 || icode == CODE_FOR_vsx_xxsldwi_v2di
12567 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12569 /* Only allow 2-bit unsigned literals. */
12570 STRIP_NOPS (arg2);
12571 if (TREE_CODE (arg2) != INTEGER_CST
12572 || TREE_INT_CST_LOW (arg2) & ~0x3)
12574 error ("argument 3 must be a 2-bit unsigned literal");
12575 return const0_rtx;
12578 else if (icode == CODE_FOR_vsx_set_v2df
12579 || icode == CODE_FOR_vsx_set_v2di
12580 || icode == CODE_FOR_bcdadd
12581 || icode == CODE_FOR_bcdadd_lt
12582 || icode == CODE_FOR_bcdadd_eq
12583 || icode == CODE_FOR_bcdadd_gt
12584 || icode == CODE_FOR_bcdsub
12585 || icode == CODE_FOR_bcdsub_lt
12586 || icode == CODE_FOR_bcdsub_eq
12587 || icode == CODE_FOR_bcdsub_gt)
12589 /* Only allow 1-bit unsigned literals. */
12590 STRIP_NOPS (arg2);
12591 if (TREE_CODE (arg2) != INTEGER_CST
12592 || TREE_INT_CST_LOW (arg2) & ~0x1)
12594 error ("argument 3 must be a 1-bit unsigned literal");
12595 return const0_rtx;
12598 else if (icode == CODE_FOR_dfp_ddedpd_dd
12599 || icode == CODE_FOR_dfp_ddedpd_td)
12601 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12602 STRIP_NOPS (arg0);
12603 if (TREE_CODE (arg0) != INTEGER_CST
12604 || TREE_INT_CST_LOW (arg2) & ~0x3)
12606 error ("argument 1 must be 0 or 2");
12607 return const0_rtx;
12610 else if (icode == CODE_FOR_dfp_denbcd_dd
12611 || icode == CODE_FOR_dfp_denbcd_td)
12613 /* Only allow 1-bit unsigned literals. */
12614 STRIP_NOPS (arg0);
12615 if (TREE_CODE (arg0) != INTEGER_CST
12616 || TREE_INT_CST_LOW (arg0) & ~0x1)
12618 error ("argument 1 must be a 1-bit unsigned literal");
12619 return const0_rtx;
12622 else if (icode == CODE_FOR_dfp_dscli_dd
12623 || icode == CODE_FOR_dfp_dscli_td
12624 || icode == CODE_FOR_dfp_dscri_dd
12625 || icode == CODE_FOR_dfp_dscri_td)
12627 /* Only allow 6-bit unsigned literals. */
12628 STRIP_NOPS (arg1);
12629 if (TREE_CODE (arg1) != INTEGER_CST
12630 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12632 error ("argument 2 must be a 6-bit unsigned literal");
12633 return const0_rtx;
12636 else if (icode == CODE_FOR_crypto_vshasigmaw
12637 || icode == CODE_FOR_crypto_vshasigmad)
12639 /* Check whether the 2nd and 3rd arguments are integer constants and in
12640 range and prepare arguments. */
12641 STRIP_NOPS (arg1);
12642 if (TREE_CODE (arg1) != INTEGER_CST
12643 || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
12645 error ("argument 2 must be 0 or 1");
12646 return const0_rtx;
12649 STRIP_NOPS (arg2);
12650 if (TREE_CODE (arg2) != INTEGER_CST
12651 || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
12653 error ("argument 3 must be in the range 0..15");
12654 return const0_rtx;
12658 if (target == 0
12659 || GET_MODE (target) != tmode
12660 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12661 target = gen_reg_rtx (tmode);
12663 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12664 op0 = copy_to_mode_reg (mode0, op0);
12665 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12666 op1 = copy_to_mode_reg (mode1, op1);
12667 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12668 op2 = copy_to_mode_reg (mode2, op2);
12670 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12671 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12672 else
12673 pat = GEN_FCN (icode) (target, op0, op1, op2);
12674 if (! pat)
12675 return 0;
12676 emit_insn (pat);
12678 return target;
12681 /* Expand the lvx builtins. */
12682 static rtx
12683 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12685 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12686 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12687 tree arg0;
12688 enum machine_mode tmode, mode0;
12689 rtx pat, op0;
12690 enum insn_code icode;
12692 switch (fcode)
12694 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12695 icode = CODE_FOR_vector_altivec_load_v16qi;
12696 break;
12697 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12698 icode = CODE_FOR_vector_altivec_load_v8hi;
12699 break;
12700 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12701 icode = CODE_FOR_vector_altivec_load_v4si;
12702 break;
12703 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12704 icode = CODE_FOR_vector_altivec_load_v4sf;
12705 break;
12706 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12707 icode = CODE_FOR_vector_altivec_load_v2df;
12708 break;
12709 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12710 icode = CODE_FOR_vector_altivec_load_v2di;
12711 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12712 icode = CODE_FOR_vector_altivec_load_v1ti;
12713 break;
12714 default:
12715 *expandedp = false;
12716 return NULL_RTX;
12719 *expandedp = true;
12721 arg0 = CALL_EXPR_ARG (exp, 0);
12722 op0 = expand_normal (arg0);
12723 tmode = insn_data[icode].operand[0].mode;
12724 mode0 = insn_data[icode].operand[1].mode;
12726 if (target == 0
12727 || GET_MODE (target) != tmode
12728 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12729 target = gen_reg_rtx (tmode);
12731 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12732 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12734 pat = GEN_FCN (icode) (target, op0);
12735 if (! pat)
12736 return 0;
12737 emit_insn (pat);
12738 return target;
12741 /* Expand the stvx builtins. */
12742 static rtx
12743 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12744 bool *expandedp)
12746 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12747 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12748 tree arg0, arg1;
12749 enum machine_mode mode0, mode1;
12750 rtx pat, op0, op1;
12751 enum insn_code icode;
12753 switch (fcode)
12755 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
12756 icode = CODE_FOR_vector_altivec_store_v16qi;
12757 break;
12758 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
12759 icode = CODE_FOR_vector_altivec_store_v8hi;
12760 break;
12761 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
12762 icode = CODE_FOR_vector_altivec_store_v4si;
12763 break;
12764 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
12765 icode = CODE_FOR_vector_altivec_store_v4sf;
12766 break;
12767 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
12768 icode = CODE_FOR_vector_altivec_store_v2df;
12769 break;
12770 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
12771 icode = CODE_FOR_vector_altivec_store_v2di;
12772 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
12773 icode = CODE_FOR_vector_altivec_store_v1ti;
12774 break;
12775 default:
12776 *expandedp = false;
12777 return NULL_RTX;
12780 arg0 = CALL_EXPR_ARG (exp, 0);
12781 arg1 = CALL_EXPR_ARG (exp, 1);
12782 op0 = expand_normal (arg0);
12783 op1 = expand_normal (arg1);
12784 mode0 = insn_data[icode].operand[0].mode;
12785 mode1 = insn_data[icode].operand[1].mode;
12787 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12788 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12789 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12790 op1 = copy_to_mode_reg (mode1, op1);
12792 pat = GEN_FCN (icode) (op0, op1);
12793 if (pat)
12794 emit_insn (pat);
12796 *expandedp = true;
12797 return NULL_RTX;
12800 /* Expand the dst builtins. */
12801 static rtx
12802 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12803 bool *expandedp)
12805 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12806 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12807 tree arg0, arg1, arg2;
12808 enum machine_mode mode0, mode1;
12809 rtx pat, op0, op1, op2;
12810 const struct builtin_description *d;
12811 size_t i;
12813 *expandedp = false;
12815 /* Handle DST variants. */
12816 d = bdesc_dst;
12817 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
12818 if (d->code == fcode)
12820 arg0 = CALL_EXPR_ARG (exp, 0);
12821 arg1 = CALL_EXPR_ARG (exp, 1);
12822 arg2 = CALL_EXPR_ARG (exp, 2);
12823 op0 = expand_normal (arg0);
12824 op1 = expand_normal (arg1);
12825 op2 = expand_normal (arg2);
12826 mode0 = insn_data[d->icode].operand[0].mode;
12827 mode1 = insn_data[d->icode].operand[1].mode;
12829 /* Invalid arguments, bail out before generating bad rtl. */
12830 if (arg0 == error_mark_node
12831 || arg1 == error_mark_node
12832 || arg2 == error_mark_node)
12833 return const0_rtx;
12835 *expandedp = true;
12836 STRIP_NOPS (arg2);
12837 if (TREE_CODE (arg2) != INTEGER_CST
12838 || TREE_INT_CST_LOW (arg2) & ~0x3)
12840 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
12841 return const0_rtx;
12844 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12845 op0 = copy_to_mode_reg (Pmode, op0);
12846 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12847 op1 = copy_to_mode_reg (mode1, op1);
12849 pat = GEN_FCN (d->icode) (op0, op1, op2);
12850 if (pat != 0)
12851 emit_insn (pat);
12853 return NULL_RTX;
12856 return NULL_RTX;
12859 /* Expand vec_init builtin. */
12860 static rtx
12861 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
12863 enum machine_mode tmode = TYPE_MODE (type);
12864 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
12865 int i, n_elt = GET_MODE_NUNITS (tmode);
12867 gcc_assert (VECTOR_MODE_P (tmode));
12868 gcc_assert (n_elt == call_expr_nargs (exp));
12870 if (!target || !register_operand (target, tmode))
12871 target = gen_reg_rtx (tmode);
12873 /* If we have a vector compromised of a single element, such as V1TImode, do
12874 the initialization directly. */
12875 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
12877 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
12878 emit_move_insn (target, gen_lowpart (tmode, x));
12880 else
12882 rtvec v = rtvec_alloc (n_elt);
12884 for (i = 0; i < n_elt; ++i)
12886 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
12887 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
12890 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
12893 return target;
12896 /* Return the integer constant in ARG. Constrain it to be in the range
12897 of the subparts of VEC_TYPE; issue an error if not. */
12899 static int
12900 get_element_number (tree vec_type, tree arg)
12902 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
12904 if (!tree_fits_uhwi_p (arg)
12905 || (elt = tree_to_uhwi (arg), elt > max))
12907 error ("selector must be an integer constant in the range 0..%wi", max);
12908 return 0;
12911 return elt;
12914 /* Expand vec_set builtin. */
12915 static rtx
12916 altivec_expand_vec_set_builtin (tree exp)
12918 enum machine_mode tmode, mode1;
12919 tree arg0, arg1, arg2;
12920 int elt;
12921 rtx op0, op1;
12923 arg0 = CALL_EXPR_ARG (exp, 0);
12924 arg1 = CALL_EXPR_ARG (exp, 1);
12925 arg2 = CALL_EXPR_ARG (exp, 2);
12927 tmode = TYPE_MODE (TREE_TYPE (arg0));
12928 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
12929 gcc_assert (VECTOR_MODE_P (tmode));
12931 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
12932 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
12933 elt = get_element_number (TREE_TYPE (arg0), arg2);
12935 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
12936 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
12938 op0 = force_reg (tmode, op0);
12939 op1 = force_reg (mode1, op1);
12941 rs6000_expand_vector_set (op0, op1, elt);
12943 return op0;
12946 /* Expand vec_ext builtin. */
12947 static rtx
12948 altivec_expand_vec_ext_builtin (tree exp, rtx target)
12950 enum machine_mode tmode, mode0;
12951 tree arg0, arg1;
12952 int elt;
12953 rtx op0;
12955 arg0 = CALL_EXPR_ARG (exp, 0);
12956 arg1 = CALL_EXPR_ARG (exp, 1);
12958 op0 = expand_normal (arg0);
12959 elt = get_element_number (TREE_TYPE (arg0), arg1);
12961 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
12962 mode0 = TYPE_MODE (TREE_TYPE (arg0));
12963 gcc_assert (VECTOR_MODE_P (mode0));
12965 op0 = force_reg (mode0, op0);
12967 if (optimize || !target || !register_operand (target, tmode))
12968 target = gen_reg_rtx (tmode);
12970 rs6000_expand_vector_extract (target, op0, elt);
12972 return target;
12975 /* Expand the builtin in EXP and store the result in TARGET. Store
12976 true in *EXPANDEDP if we found a builtin to expand. */
12977 static rtx
12978 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
12980 const struct builtin_description *d;
12981 size_t i;
12982 enum insn_code icode;
12983 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12984 tree arg0;
12985 rtx op0, pat;
12986 enum machine_mode tmode, mode0;
12987 enum rs6000_builtins fcode
12988 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12990 if (rs6000_overloaded_builtin_p (fcode))
12992 *expandedp = true;
12993 error ("unresolved overload for Altivec builtin %qF", fndecl);
12995 /* Given it is invalid, just generate a normal call. */
12996 return expand_call (exp, target, false);
12999 target = altivec_expand_ld_builtin (exp, target, expandedp);
13000 if (*expandedp)
13001 return target;
13003 target = altivec_expand_st_builtin (exp, target, expandedp);
13004 if (*expandedp)
13005 return target;
13007 target = altivec_expand_dst_builtin (exp, target, expandedp);
13008 if (*expandedp)
13009 return target;
13011 *expandedp = true;
13013 switch (fcode)
13015 case ALTIVEC_BUILTIN_STVX_V2DF:
13016 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13017 case ALTIVEC_BUILTIN_STVX_V2DI:
13018 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13019 case ALTIVEC_BUILTIN_STVX_V4SF:
13020 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13021 case ALTIVEC_BUILTIN_STVX:
13022 case ALTIVEC_BUILTIN_STVX_V4SI:
13023 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13024 case ALTIVEC_BUILTIN_STVX_V8HI:
13025 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13026 case ALTIVEC_BUILTIN_STVX_V16QI:
13027 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13028 case ALTIVEC_BUILTIN_STVEBX:
13029 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13030 case ALTIVEC_BUILTIN_STVEHX:
13031 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13032 case ALTIVEC_BUILTIN_STVEWX:
13033 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13034 case ALTIVEC_BUILTIN_STVXL_V2DF:
13035 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13036 case ALTIVEC_BUILTIN_STVXL_V2DI:
13037 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13038 case ALTIVEC_BUILTIN_STVXL_V4SF:
13039 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13040 case ALTIVEC_BUILTIN_STVXL:
13041 case ALTIVEC_BUILTIN_STVXL_V4SI:
13042 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13043 case ALTIVEC_BUILTIN_STVXL_V8HI:
13044 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13045 case ALTIVEC_BUILTIN_STVXL_V16QI:
13046 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13048 case ALTIVEC_BUILTIN_STVLX:
13049 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13050 case ALTIVEC_BUILTIN_STVLXL:
13051 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13052 case ALTIVEC_BUILTIN_STVRX:
13053 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13054 case ALTIVEC_BUILTIN_STVRXL:
13055 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13057 case VSX_BUILTIN_STXVD2X_V1TI:
13058 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13059 case VSX_BUILTIN_STXVD2X_V2DF:
13060 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13061 case VSX_BUILTIN_STXVD2X_V2DI:
13062 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13063 case VSX_BUILTIN_STXVW4X_V4SF:
13064 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13065 case VSX_BUILTIN_STXVW4X_V4SI:
13066 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13067 case VSX_BUILTIN_STXVW4X_V8HI:
13068 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13069 case VSX_BUILTIN_STXVW4X_V16QI:
13070 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13072 case ALTIVEC_BUILTIN_MFVSCR:
13073 icode = CODE_FOR_altivec_mfvscr;
13074 tmode = insn_data[icode].operand[0].mode;
13076 if (target == 0
13077 || GET_MODE (target) != tmode
13078 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13079 target = gen_reg_rtx (tmode);
13081 pat = GEN_FCN (icode) (target);
13082 if (! pat)
13083 return 0;
13084 emit_insn (pat);
13085 return target;
13087 case ALTIVEC_BUILTIN_MTVSCR:
13088 icode = CODE_FOR_altivec_mtvscr;
13089 arg0 = CALL_EXPR_ARG (exp, 0);
13090 op0 = expand_normal (arg0);
13091 mode0 = insn_data[icode].operand[0].mode;
13093 /* If we got invalid arguments bail out before generating bad rtl. */
13094 if (arg0 == error_mark_node)
13095 return const0_rtx;
13097 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13098 op0 = copy_to_mode_reg (mode0, op0);
13100 pat = GEN_FCN (icode) (op0);
13101 if (pat)
13102 emit_insn (pat);
13103 return NULL_RTX;
13105 case ALTIVEC_BUILTIN_DSSALL:
13106 emit_insn (gen_altivec_dssall ());
13107 return NULL_RTX;
13109 case ALTIVEC_BUILTIN_DSS:
13110 icode = CODE_FOR_altivec_dss;
13111 arg0 = CALL_EXPR_ARG (exp, 0);
13112 STRIP_NOPS (arg0);
13113 op0 = expand_normal (arg0);
13114 mode0 = insn_data[icode].operand[0].mode;
13116 /* If we got invalid arguments bail out before generating bad rtl. */
13117 if (arg0 == error_mark_node)
13118 return const0_rtx;
13120 if (TREE_CODE (arg0) != INTEGER_CST
13121 || TREE_INT_CST_LOW (arg0) & ~0x3)
13123 error ("argument to dss must be a 2-bit unsigned literal");
13124 return const0_rtx;
13127 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13128 op0 = copy_to_mode_reg (mode0, op0);
13130 emit_insn (gen_altivec_dss (op0));
13131 return NULL_RTX;
13133 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13134 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13135 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13136 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13137 case VSX_BUILTIN_VEC_INIT_V2DF:
13138 case VSX_BUILTIN_VEC_INIT_V2DI:
13139 case VSX_BUILTIN_VEC_INIT_V1TI:
13140 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13142 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13143 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13144 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13145 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13146 case VSX_BUILTIN_VEC_SET_V2DF:
13147 case VSX_BUILTIN_VEC_SET_V2DI:
13148 case VSX_BUILTIN_VEC_SET_V1TI:
13149 return altivec_expand_vec_set_builtin (exp);
13151 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13152 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13153 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13154 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13155 case VSX_BUILTIN_VEC_EXT_V2DF:
13156 case VSX_BUILTIN_VEC_EXT_V2DI:
13157 case VSX_BUILTIN_VEC_EXT_V1TI:
13158 return altivec_expand_vec_ext_builtin (exp, target);
13160 default:
13161 break;
13162 /* Fall through. */
13165 /* Expand abs* operations. */
13166 d = bdesc_abs;
13167 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13168 if (d->code == fcode)
13169 return altivec_expand_abs_builtin (d->icode, exp, target);
13171 /* Expand the AltiVec predicates. */
13172 d = bdesc_altivec_preds;
13173 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13174 if (d->code == fcode)
13175 return altivec_expand_predicate_builtin (d->icode, exp, target);
13177 /* LV* are funky. We initialized them differently. */
13178 switch (fcode)
13180 case ALTIVEC_BUILTIN_LVSL:
13181 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13182 exp, target, false);
13183 case ALTIVEC_BUILTIN_LVSR:
13184 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13185 exp, target, false);
13186 case ALTIVEC_BUILTIN_LVEBX:
13187 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13188 exp, target, false);
13189 case ALTIVEC_BUILTIN_LVEHX:
13190 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13191 exp, target, false);
13192 case ALTIVEC_BUILTIN_LVEWX:
13193 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13194 exp, target, false);
13195 case ALTIVEC_BUILTIN_LVXL_V2DF:
13196 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13197 exp, target, false);
13198 case ALTIVEC_BUILTIN_LVXL_V2DI:
13199 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13200 exp, target, false);
13201 case ALTIVEC_BUILTIN_LVXL_V4SF:
13202 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13203 exp, target, false);
13204 case ALTIVEC_BUILTIN_LVXL:
13205 case ALTIVEC_BUILTIN_LVXL_V4SI:
13206 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13207 exp, target, false);
13208 case ALTIVEC_BUILTIN_LVXL_V8HI:
13209 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13210 exp, target, false);
13211 case ALTIVEC_BUILTIN_LVXL_V16QI:
13212 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13213 exp, target, false);
13214 case ALTIVEC_BUILTIN_LVX_V2DF:
13215 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13216 exp, target, false);
13217 case ALTIVEC_BUILTIN_LVX_V2DI:
13218 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13219 exp, target, false);
13220 case ALTIVEC_BUILTIN_LVX_V4SF:
13221 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13222 exp, target, false);
13223 case ALTIVEC_BUILTIN_LVX:
13224 case ALTIVEC_BUILTIN_LVX_V4SI:
13225 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13226 exp, target, false);
13227 case ALTIVEC_BUILTIN_LVX_V8HI:
13228 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13229 exp, target, false);
13230 case ALTIVEC_BUILTIN_LVX_V16QI:
13231 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13232 exp, target, false);
13233 case ALTIVEC_BUILTIN_LVLX:
13234 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13235 exp, target, true);
13236 case ALTIVEC_BUILTIN_LVLXL:
13237 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13238 exp, target, true);
13239 case ALTIVEC_BUILTIN_LVRX:
13240 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13241 exp, target, true);
13242 case ALTIVEC_BUILTIN_LVRXL:
13243 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13244 exp, target, true);
13245 case VSX_BUILTIN_LXVD2X_V1TI:
13246 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13247 exp, target, false);
13248 case VSX_BUILTIN_LXVD2X_V2DF:
13249 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13250 exp, target, false);
13251 case VSX_BUILTIN_LXVD2X_V2DI:
13252 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13253 exp, target, false);
13254 case VSX_BUILTIN_LXVW4X_V4SF:
13255 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13256 exp, target, false);
13257 case VSX_BUILTIN_LXVW4X_V4SI:
13258 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13259 exp, target, false);
13260 case VSX_BUILTIN_LXVW4X_V8HI:
13261 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13262 exp, target, false);
13263 case VSX_BUILTIN_LXVW4X_V16QI:
13264 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13265 exp, target, false);
13266 break;
13267 default:
13268 break;
13269 /* Fall through. */
13272 *expandedp = false;
13273 return NULL_RTX;
13276 /* Expand the builtin in EXP and store the result in TARGET. Store
13277 true in *EXPANDEDP if we found a builtin to expand. */
13278 static rtx
13279 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13281 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13282 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13283 const struct builtin_description *d;
13284 size_t i;
13286 *expandedp = true;
13288 switch (fcode)
13290 case PAIRED_BUILTIN_STX:
13291 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13292 case PAIRED_BUILTIN_LX:
13293 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13294 default:
13295 break;
13296 /* Fall through. */
13299 /* Expand the paired predicates. */
13300 d = bdesc_paired_preds;
13301 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13302 if (d->code == fcode)
13303 return paired_expand_predicate_builtin (d->icode, exp, target);
13305 *expandedp = false;
13306 return NULL_RTX;
13309 /* Binops that need to be initialized manually, but can be expanded
13310 automagically by rs6000_expand_binop_builtin. */
13311 static const struct builtin_description bdesc_2arg_spe[] =
13313 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13314 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13315 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13316 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13317 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13318 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13319 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13320 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13321 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13322 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13323 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13324 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13325 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13326 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13327 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13328 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13329 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13330 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13331 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13332 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13333 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13334 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13337 /* Expand the builtin in EXP and store the result in TARGET. Store
13338 true in *EXPANDEDP if we found a builtin to expand.
13340 This expands the SPE builtins that are not simple unary and binary
13341 operations. */
13342 static rtx
13343 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13345 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13346 tree arg1, arg0;
13347 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13348 enum insn_code icode;
13349 enum machine_mode tmode, mode0;
13350 rtx pat, op0;
13351 const struct builtin_description *d;
13352 size_t i;
13354 *expandedp = true;
13356 /* Syntax check for a 5-bit unsigned immediate. */
13357 switch (fcode)
13359 case SPE_BUILTIN_EVSTDD:
13360 case SPE_BUILTIN_EVSTDH:
13361 case SPE_BUILTIN_EVSTDW:
13362 case SPE_BUILTIN_EVSTWHE:
13363 case SPE_BUILTIN_EVSTWHO:
13364 case SPE_BUILTIN_EVSTWWE:
13365 case SPE_BUILTIN_EVSTWWO:
13366 arg1 = CALL_EXPR_ARG (exp, 2);
13367 if (TREE_CODE (arg1) != INTEGER_CST
13368 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13370 error ("argument 2 must be a 5-bit unsigned literal");
13371 return const0_rtx;
13373 break;
13374 default:
13375 break;
13378 /* The evsplat*i instructions are not quite generic. */
13379 switch (fcode)
13381 case SPE_BUILTIN_EVSPLATFI:
13382 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13383 exp, target);
13384 case SPE_BUILTIN_EVSPLATI:
13385 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13386 exp, target);
13387 default:
13388 break;
13391 d = bdesc_2arg_spe;
13392 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13393 if (d->code == fcode)
13394 return rs6000_expand_binop_builtin (d->icode, exp, target);
13396 d = bdesc_spe_predicates;
13397 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13398 if (d->code == fcode)
13399 return spe_expand_predicate_builtin (d->icode, exp, target);
13401 d = bdesc_spe_evsel;
13402 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13403 if (d->code == fcode)
13404 return spe_expand_evsel_builtin (d->icode, exp, target);
13406 switch (fcode)
13408 case SPE_BUILTIN_EVSTDDX:
13409 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13410 case SPE_BUILTIN_EVSTDHX:
13411 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13412 case SPE_BUILTIN_EVSTDWX:
13413 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13414 case SPE_BUILTIN_EVSTWHEX:
13415 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13416 case SPE_BUILTIN_EVSTWHOX:
13417 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13418 case SPE_BUILTIN_EVSTWWEX:
13419 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13420 case SPE_BUILTIN_EVSTWWOX:
13421 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13422 case SPE_BUILTIN_EVSTDD:
13423 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13424 case SPE_BUILTIN_EVSTDH:
13425 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13426 case SPE_BUILTIN_EVSTDW:
13427 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13428 case SPE_BUILTIN_EVSTWHE:
13429 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13430 case SPE_BUILTIN_EVSTWHO:
13431 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13432 case SPE_BUILTIN_EVSTWWE:
13433 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13434 case SPE_BUILTIN_EVSTWWO:
13435 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13436 case SPE_BUILTIN_MFSPEFSCR:
13437 icode = CODE_FOR_spe_mfspefscr;
13438 tmode = insn_data[icode].operand[0].mode;
13440 if (target == 0
13441 || GET_MODE (target) != tmode
13442 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13443 target = gen_reg_rtx (tmode);
13445 pat = GEN_FCN (icode) (target);
13446 if (! pat)
13447 return 0;
13448 emit_insn (pat);
13449 return target;
13450 case SPE_BUILTIN_MTSPEFSCR:
13451 icode = CODE_FOR_spe_mtspefscr;
13452 arg0 = CALL_EXPR_ARG (exp, 0);
13453 op0 = expand_normal (arg0);
13454 mode0 = insn_data[icode].operand[0].mode;
13456 if (arg0 == error_mark_node)
13457 return const0_rtx;
13459 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13460 op0 = copy_to_mode_reg (mode0, op0);
13462 pat = GEN_FCN (icode) (op0);
13463 if (pat)
13464 emit_insn (pat);
13465 return NULL_RTX;
13466 default:
13467 break;
13470 *expandedp = false;
13471 return NULL_RTX;
13474 static rtx
13475 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13477 rtx pat, scratch, tmp;
13478 tree form = CALL_EXPR_ARG (exp, 0);
13479 tree arg0 = CALL_EXPR_ARG (exp, 1);
13480 tree arg1 = CALL_EXPR_ARG (exp, 2);
13481 rtx op0 = expand_normal (arg0);
13482 rtx op1 = expand_normal (arg1);
13483 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13484 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13485 int form_int;
13486 enum rtx_code code;
13488 if (TREE_CODE (form) != INTEGER_CST)
13490 error ("argument 1 of __builtin_paired_predicate must be a constant");
13491 return const0_rtx;
13493 else
13494 form_int = TREE_INT_CST_LOW (form);
13496 gcc_assert (mode0 == mode1);
13498 if (arg0 == error_mark_node || arg1 == error_mark_node)
13499 return const0_rtx;
13501 if (target == 0
13502 || GET_MODE (target) != SImode
13503 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13504 target = gen_reg_rtx (SImode);
13505 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13506 op0 = copy_to_mode_reg (mode0, op0);
13507 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13508 op1 = copy_to_mode_reg (mode1, op1);
13510 scratch = gen_reg_rtx (CCFPmode);
13512 pat = GEN_FCN (icode) (scratch, op0, op1);
13513 if (!pat)
13514 return const0_rtx;
13516 emit_insn (pat);
13518 switch (form_int)
13520 /* LT bit. */
13521 case 0:
13522 code = LT;
13523 break;
13524 /* GT bit. */
13525 case 1:
13526 code = GT;
13527 break;
13528 /* EQ bit. */
13529 case 2:
13530 code = EQ;
13531 break;
13532 /* UN bit. */
13533 case 3:
13534 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13535 return target;
13536 default:
13537 error ("argument 1 of __builtin_paired_predicate is out of range");
13538 return const0_rtx;
13541 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13542 emit_move_insn (target, tmp);
13543 return target;
13546 static rtx
13547 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13549 rtx pat, scratch, tmp;
13550 tree form = CALL_EXPR_ARG (exp, 0);
13551 tree arg0 = CALL_EXPR_ARG (exp, 1);
13552 tree arg1 = CALL_EXPR_ARG (exp, 2);
13553 rtx op0 = expand_normal (arg0);
13554 rtx op1 = expand_normal (arg1);
13555 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13556 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13557 int form_int;
13558 enum rtx_code code;
13560 if (TREE_CODE (form) != INTEGER_CST)
13562 error ("argument 1 of __builtin_spe_predicate must be a constant");
13563 return const0_rtx;
13565 else
13566 form_int = TREE_INT_CST_LOW (form);
13568 gcc_assert (mode0 == mode1);
13570 if (arg0 == error_mark_node || arg1 == error_mark_node)
13571 return const0_rtx;
13573 if (target == 0
13574 || GET_MODE (target) != SImode
13575 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13576 target = gen_reg_rtx (SImode);
13578 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13579 op0 = copy_to_mode_reg (mode0, op0);
13580 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13581 op1 = copy_to_mode_reg (mode1, op1);
13583 scratch = gen_reg_rtx (CCmode);
13585 pat = GEN_FCN (icode) (scratch, op0, op1);
13586 if (! pat)
13587 return const0_rtx;
13588 emit_insn (pat);
13590 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13591 _lower_. We use one compare, but look in different bits of the
13592 CR for each variant.
13594 There are 2 elements in each SPE simd type (upper/lower). The CR
13595 bits are set as follows:
13597 BIT0 | BIT 1 | BIT 2 | BIT 3
13598 U | L | (U | L) | (U & L)
13600 So, for an "all" relationship, BIT 3 would be set.
13601 For an "any" relationship, BIT 2 would be set. Etc.
13603 Following traditional nomenclature, these bits map to:
13605 BIT0 | BIT 1 | BIT 2 | BIT 3
13606 LT | GT | EQ | OV
13608 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13611 switch (form_int)
13613 /* All variant. OV bit. */
13614 case 0:
13615 /* We need to get to the OV bit, which is the ORDERED bit. We
13616 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13617 that's ugly and will make validate_condition_mode die.
13618 So let's just use another pattern. */
13619 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13620 return target;
13621 /* Any variant. EQ bit. */
13622 case 1:
13623 code = EQ;
13624 break;
13625 /* Upper variant. LT bit. */
13626 case 2:
13627 code = LT;
13628 break;
13629 /* Lower variant. GT bit. */
13630 case 3:
13631 code = GT;
13632 break;
13633 default:
13634 error ("argument 1 of __builtin_spe_predicate is out of range");
13635 return const0_rtx;
13638 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13639 emit_move_insn (target, tmp);
13641 return target;
13644 /* The evsel builtins look like this:
13646 e = __builtin_spe_evsel_OP (a, b, c, d);
13648 and work like this:
13650 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13651 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13654 static rtx
13655 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13657 rtx pat, scratch;
13658 tree arg0 = CALL_EXPR_ARG (exp, 0);
13659 tree arg1 = CALL_EXPR_ARG (exp, 1);
13660 tree arg2 = CALL_EXPR_ARG (exp, 2);
13661 tree arg3 = CALL_EXPR_ARG (exp, 3);
13662 rtx op0 = expand_normal (arg0);
13663 rtx op1 = expand_normal (arg1);
13664 rtx op2 = expand_normal (arg2);
13665 rtx op3 = expand_normal (arg3);
13666 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13667 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13669 gcc_assert (mode0 == mode1);
13671 if (arg0 == error_mark_node || arg1 == error_mark_node
13672 || arg2 == error_mark_node || arg3 == error_mark_node)
13673 return const0_rtx;
13675 if (target == 0
13676 || GET_MODE (target) != mode0
13677 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13678 target = gen_reg_rtx (mode0);
13680 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13681 op0 = copy_to_mode_reg (mode0, op0);
13682 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13683 op1 = copy_to_mode_reg (mode0, op1);
13684 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13685 op2 = copy_to_mode_reg (mode0, op2);
13686 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13687 op3 = copy_to_mode_reg (mode0, op3);
13689 /* Generate the compare. */
13690 scratch = gen_reg_rtx (CCmode);
13691 pat = GEN_FCN (icode) (scratch, op0, op1);
13692 if (! pat)
13693 return const0_rtx;
13694 emit_insn (pat);
13696 if (mode0 == V2SImode)
13697 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13698 else
13699 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13701 return target;
13704 /* Raise an error message for a builtin function that is called without the
13705 appropriate target options being set. */
13707 static void
13708 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13710 size_t uns_fncode = (size_t)fncode;
13711 const char *name = rs6000_builtin_info[uns_fncode].name;
13712 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13714 gcc_assert (name != NULL);
13715 if ((fnmask & RS6000_BTM_CELL) != 0)
13716 error ("Builtin function %s is only valid for the cell processor", name);
13717 else if ((fnmask & RS6000_BTM_VSX) != 0)
13718 error ("Builtin function %s requires the -mvsx option", name);
13719 else if ((fnmask & RS6000_BTM_HTM) != 0)
13720 error ("Builtin function %s requires the -mhtm option", name);
13721 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13722 error ("Builtin function %s requires the -maltivec option", name);
13723 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13724 error ("Builtin function %s requires the -mpaired option", name);
13725 else if ((fnmask & RS6000_BTM_SPE) != 0)
13726 error ("Builtin function %s requires the -mspe option", name);
13727 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13728 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13729 error ("Builtin function %s requires the -mhard-dfp and"
13730 " -mpower8-vector options", name);
13731 else if ((fnmask & RS6000_BTM_DFP) != 0)
13732 error ("Builtin function %s requires the -mhard-dfp option", name);
13733 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13734 error ("Builtin function %s requires the -mpower8-vector option", name);
13735 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13736 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13737 error ("Builtin function %s requires the -mhard-float and"
13738 " -mlong-double-128 options", name);
13739 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13740 error ("Builtin function %s requires the -mhard-float option", name);
13741 else
13742 error ("Builtin function %s is not supported with the current options",
13743 name);
13746 /* Expand an expression EXP that calls a built-in function,
13747 with result going to TARGET if that's convenient
13748 (and in mode MODE if that's convenient).
13749 SUBTARGET may be used as the target for computing one of EXP's operands.
13750 IGNORE is nonzero if the value is to be ignored. */
13752 static rtx
13753 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13754 enum machine_mode mode ATTRIBUTE_UNUSED,
13755 int ignore ATTRIBUTE_UNUSED)
13757 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13758 enum rs6000_builtins fcode
13759 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
13760 size_t uns_fcode = (size_t)fcode;
13761 const struct builtin_description *d;
13762 size_t i;
13763 rtx ret;
13764 bool success;
13765 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
13766 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
13768 if (TARGET_DEBUG_BUILTIN)
13770 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
13771 const char *name1 = rs6000_builtin_info[uns_fcode].name;
13772 const char *name2 = ((icode != CODE_FOR_nothing)
13773 ? get_insn_name ((int)icode)
13774 : "nothing");
13775 const char *name3;
13777 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
13779 default: name3 = "unknown"; break;
13780 case RS6000_BTC_SPECIAL: name3 = "special"; break;
13781 case RS6000_BTC_UNARY: name3 = "unary"; break;
13782 case RS6000_BTC_BINARY: name3 = "binary"; break;
13783 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
13784 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
13785 case RS6000_BTC_ABS: name3 = "abs"; break;
13786 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
13787 case RS6000_BTC_DST: name3 = "dst"; break;
13791 fprintf (stderr,
13792 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
13793 (name1) ? name1 : "---", fcode,
13794 (name2) ? name2 : "---", (int)icode,
13795 name3,
13796 func_valid_p ? "" : ", not valid");
13799 if (!func_valid_p)
13801 rs6000_invalid_builtin (fcode);
13803 /* Given it is invalid, just generate a normal call. */
13804 return expand_call (exp, target, ignore);
13807 switch (fcode)
13809 case RS6000_BUILTIN_RECIP:
13810 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
13812 case RS6000_BUILTIN_RECIPF:
13813 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
13815 case RS6000_BUILTIN_RSQRTF:
13816 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
13818 case RS6000_BUILTIN_RSQRT:
13819 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
13821 case POWER7_BUILTIN_BPERMD:
13822 return rs6000_expand_binop_builtin (((TARGET_64BIT)
13823 ? CODE_FOR_bpermd_di
13824 : CODE_FOR_bpermd_si), exp, target);
13826 case RS6000_BUILTIN_GET_TB:
13827 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
13828 target);
13830 case RS6000_BUILTIN_MFTB:
13831 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
13832 ? CODE_FOR_rs6000_mftb_di
13833 : CODE_FOR_rs6000_mftb_si),
13834 target);
13836 case RS6000_BUILTIN_MFFS:
13837 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
13839 case RS6000_BUILTIN_MTFSF:
13840 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
13842 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
13843 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
13845 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
13846 : (int) CODE_FOR_altivec_lvsl_direct);
13847 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13848 enum machine_mode mode = insn_data[icode].operand[1].mode;
13849 tree arg;
13850 rtx op, addr, pat;
13852 gcc_assert (TARGET_ALTIVEC);
13854 arg = CALL_EXPR_ARG (exp, 0);
13855 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
13856 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
13857 addr = memory_address (mode, op);
13858 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
13859 op = addr;
13860 else
13862 /* For the load case need to negate the address. */
13863 op = gen_reg_rtx (GET_MODE (addr));
13864 emit_insn (gen_rtx_SET (VOIDmode, op,
13865 gen_rtx_NEG (GET_MODE (addr), addr)));
13867 op = gen_rtx_MEM (mode, op);
13869 if (target == 0
13870 || GET_MODE (target) != tmode
13871 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13872 target = gen_reg_rtx (tmode);
13874 pat = GEN_FCN (icode) (target, op);
13875 if (!pat)
13876 return 0;
13877 emit_insn (pat);
13879 return target;
13882 case ALTIVEC_BUILTIN_VCFUX:
13883 case ALTIVEC_BUILTIN_VCFSX:
13884 case ALTIVEC_BUILTIN_VCTUXS:
13885 case ALTIVEC_BUILTIN_VCTSXS:
13886 /* FIXME: There's got to be a nicer way to handle this case than
13887 constructing a new CALL_EXPR. */
13888 if (call_expr_nargs (exp) == 1)
13890 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
13891 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
13893 break;
13895 default:
13896 break;
13899 if (TARGET_ALTIVEC)
13901 ret = altivec_expand_builtin (exp, target, &success);
13903 if (success)
13904 return ret;
13906 if (TARGET_SPE)
13908 ret = spe_expand_builtin (exp, target, &success);
13910 if (success)
13911 return ret;
13913 if (TARGET_PAIRED_FLOAT)
13915 ret = paired_expand_builtin (exp, target, &success);
13917 if (success)
13918 return ret;
13920 if (TARGET_HTM)
13922 ret = htm_expand_builtin (exp, target, &success);
13924 if (success)
13925 return ret;
13928 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
13929 gcc_assert (attr == RS6000_BTC_UNARY
13930 || attr == RS6000_BTC_BINARY
13931 || attr == RS6000_BTC_TERNARY);
13933 /* Handle simple unary operations. */
13934 d = bdesc_1arg;
13935 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
13936 if (d->code == fcode)
13937 return rs6000_expand_unop_builtin (d->icode, exp, target);
13939 /* Handle simple binary operations. */
13940 d = bdesc_2arg;
13941 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
13942 if (d->code == fcode)
13943 return rs6000_expand_binop_builtin (d->icode, exp, target);
13945 /* Handle simple ternary operations. */
13946 d = bdesc_3arg;
13947 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
13948 if (d->code == fcode)
13949 return rs6000_expand_ternop_builtin (d->icode, exp, target);
13951 gcc_unreachable ();
13954 static void
13955 rs6000_init_builtins (void)
13957 tree tdecl;
13958 tree ftype;
13959 enum machine_mode mode;
13961 if (TARGET_DEBUG_BUILTIN)
13962 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
13963 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
13964 (TARGET_SPE) ? ", spe" : "",
13965 (TARGET_ALTIVEC) ? ", altivec" : "",
13966 (TARGET_VSX) ? ", vsx" : "");
13968 V2SI_type_node = build_vector_type (intSI_type_node, 2);
13969 V2SF_type_node = build_vector_type (float_type_node, 2);
13970 V2DI_type_node = build_vector_type (intDI_type_node, 2);
13971 V2DF_type_node = build_vector_type (double_type_node, 2);
13972 V4HI_type_node = build_vector_type (intHI_type_node, 4);
13973 V4SI_type_node = build_vector_type (intSI_type_node, 4);
13974 V4SF_type_node = build_vector_type (float_type_node, 4);
13975 V8HI_type_node = build_vector_type (intHI_type_node, 8);
13976 V16QI_type_node = build_vector_type (intQI_type_node, 16);
13978 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
13979 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
13980 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
13981 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
13983 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
13984 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
13985 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
13986 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
13988 /* We use V1TI mode as a special container to hold __int128_t items that
13989 must live in VSX registers. */
13990 if (intTI_type_node)
13992 V1TI_type_node = build_vector_type (intTI_type_node, 1);
13993 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
13996 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
13997 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
13998 'vector unsigned short'. */
14000 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14001 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14002 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14003 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14004 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14006 long_integer_type_internal_node = long_integer_type_node;
14007 long_unsigned_type_internal_node = long_unsigned_type_node;
14008 long_long_integer_type_internal_node = long_long_integer_type_node;
14009 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14010 intQI_type_internal_node = intQI_type_node;
14011 uintQI_type_internal_node = unsigned_intQI_type_node;
14012 intHI_type_internal_node = intHI_type_node;
14013 uintHI_type_internal_node = unsigned_intHI_type_node;
14014 intSI_type_internal_node = intSI_type_node;
14015 uintSI_type_internal_node = unsigned_intSI_type_node;
14016 intDI_type_internal_node = intDI_type_node;
14017 uintDI_type_internal_node = unsigned_intDI_type_node;
14018 intTI_type_internal_node = intTI_type_node;
14019 uintTI_type_internal_node = unsigned_intTI_type_node;
14020 float_type_internal_node = float_type_node;
14021 double_type_internal_node = double_type_node;
14022 long_double_type_internal_node = long_double_type_node;
14023 dfloat64_type_internal_node = dfloat64_type_node;
14024 dfloat128_type_internal_node = dfloat128_type_node;
14025 void_type_internal_node = void_type_node;
14027 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14028 tree type node. */
14029 builtin_mode_to_type[QImode][0] = integer_type_node;
14030 builtin_mode_to_type[HImode][0] = integer_type_node;
14031 builtin_mode_to_type[SImode][0] = intSI_type_node;
14032 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14033 builtin_mode_to_type[DImode][0] = intDI_type_node;
14034 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14035 builtin_mode_to_type[TImode][0] = intTI_type_node;
14036 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14037 builtin_mode_to_type[SFmode][0] = float_type_node;
14038 builtin_mode_to_type[DFmode][0] = double_type_node;
14039 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14040 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14041 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14042 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14043 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14044 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14045 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14046 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14047 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14048 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14049 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14050 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14051 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14052 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14053 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14054 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14055 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14056 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14058 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14059 TYPE_NAME (bool_char_type_node) = tdecl;
14061 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14062 TYPE_NAME (bool_short_type_node) = tdecl;
14064 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14065 TYPE_NAME (bool_int_type_node) = tdecl;
14067 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14068 TYPE_NAME (pixel_type_node) = tdecl;
14070 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14071 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14072 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14073 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14074 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14076 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14077 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14079 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14080 TYPE_NAME (V16QI_type_node) = tdecl;
14082 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14083 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14085 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14086 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14088 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14089 TYPE_NAME (V8HI_type_node) = tdecl;
14091 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14092 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14094 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14095 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14097 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14098 TYPE_NAME (V4SI_type_node) = tdecl;
14100 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14101 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14103 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14104 TYPE_NAME (V4SF_type_node) = tdecl;
14106 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14107 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14109 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14110 TYPE_NAME (V2DF_type_node) = tdecl;
14112 if (TARGET_POWERPC64)
14114 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14115 TYPE_NAME (V2DI_type_node) = tdecl;
14117 tdecl = add_builtin_type ("__vector unsigned long",
14118 unsigned_V2DI_type_node);
14119 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14121 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14122 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14124 else
14126 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14127 TYPE_NAME (V2DI_type_node) = tdecl;
14129 tdecl = add_builtin_type ("__vector unsigned long long",
14130 unsigned_V2DI_type_node);
14131 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14133 tdecl = add_builtin_type ("__vector __bool long long",
14134 bool_V2DI_type_node);
14135 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14138 if (V1TI_type_node)
14140 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14141 TYPE_NAME (V1TI_type_node) = tdecl;
14143 tdecl = add_builtin_type ("__vector unsigned __int128",
14144 unsigned_V1TI_type_node);
14145 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14148 /* Paired and SPE builtins are only available if you build a compiler with
14149 the appropriate options, so only create those builtins with the
14150 appropriate compiler option. Create Altivec and VSX builtins on machines
14151 with at least the general purpose extensions (970 and newer) to allow the
14152 use of the target attribute. */
14153 if (TARGET_PAIRED_FLOAT)
14154 paired_init_builtins ();
14155 if (TARGET_SPE)
14156 spe_init_builtins ();
14157 if (TARGET_EXTRA_BUILTINS)
14158 altivec_init_builtins ();
14159 if (TARGET_HTM)
14160 htm_init_builtins ();
14162 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14163 rs6000_common_init_builtins ();
14165 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14166 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14167 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14169 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14170 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14171 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14173 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14174 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14175 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14177 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14178 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14179 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14181 mode = (TARGET_64BIT) ? DImode : SImode;
14182 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14183 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14184 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14186 ftype = build_function_type_list (unsigned_intDI_type_node,
14187 NULL_TREE);
14188 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14190 if (TARGET_64BIT)
14191 ftype = build_function_type_list (unsigned_intDI_type_node,
14192 NULL_TREE);
14193 else
14194 ftype = build_function_type_list (unsigned_intSI_type_node,
14195 NULL_TREE);
14196 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14198 ftype = build_function_type_list (double_type_node, NULL_TREE);
14199 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14201 ftype = build_function_type_list (void_type_node,
14202 intSI_type_node, double_type_node,
14203 NULL_TREE);
14204 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14206 #if TARGET_XCOFF
14207 /* AIX libm provides clog as __clog. */
14208 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14209 set_user_assembler_name (tdecl, "__clog");
14210 #endif
14212 #ifdef SUBTARGET_INIT_BUILTINS
14213 SUBTARGET_INIT_BUILTINS;
14214 #endif
14217 /* Returns the rs6000 builtin decl for CODE. */
14219 static tree
14220 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14222 HOST_WIDE_INT fnmask;
14224 if (code >= RS6000_BUILTIN_COUNT)
14225 return error_mark_node;
14227 fnmask = rs6000_builtin_info[code].mask;
14228 if ((fnmask & rs6000_builtin_mask) != fnmask)
14230 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14231 return error_mark_node;
14234 return rs6000_builtin_decls[code];
14237 static void
14238 spe_init_builtins (void)
14240 tree puint_type_node = build_pointer_type (unsigned_type_node);
14241 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14242 const struct builtin_description *d;
14243 size_t i;
14245 tree v2si_ftype_4_v2si
14246 = build_function_type_list (opaque_V2SI_type_node,
14247 opaque_V2SI_type_node,
14248 opaque_V2SI_type_node,
14249 opaque_V2SI_type_node,
14250 opaque_V2SI_type_node,
14251 NULL_TREE);
14253 tree v2sf_ftype_4_v2sf
14254 = build_function_type_list (opaque_V2SF_type_node,
14255 opaque_V2SF_type_node,
14256 opaque_V2SF_type_node,
14257 opaque_V2SF_type_node,
14258 opaque_V2SF_type_node,
14259 NULL_TREE);
14261 tree int_ftype_int_v2si_v2si
14262 = build_function_type_list (integer_type_node,
14263 integer_type_node,
14264 opaque_V2SI_type_node,
14265 opaque_V2SI_type_node,
14266 NULL_TREE);
14268 tree int_ftype_int_v2sf_v2sf
14269 = build_function_type_list (integer_type_node,
14270 integer_type_node,
14271 opaque_V2SF_type_node,
14272 opaque_V2SF_type_node,
14273 NULL_TREE);
14275 tree void_ftype_v2si_puint_int
14276 = build_function_type_list (void_type_node,
14277 opaque_V2SI_type_node,
14278 puint_type_node,
14279 integer_type_node,
14280 NULL_TREE);
14282 tree void_ftype_v2si_puint_char
14283 = build_function_type_list (void_type_node,
14284 opaque_V2SI_type_node,
14285 puint_type_node,
14286 char_type_node,
14287 NULL_TREE);
14289 tree void_ftype_v2si_pv2si_int
14290 = build_function_type_list (void_type_node,
14291 opaque_V2SI_type_node,
14292 opaque_p_V2SI_type_node,
14293 integer_type_node,
14294 NULL_TREE);
14296 tree void_ftype_v2si_pv2si_char
14297 = build_function_type_list (void_type_node,
14298 opaque_V2SI_type_node,
14299 opaque_p_V2SI_type_node,
14300 char_type_node,
14301 NULL_TREE);
14303 tree void_ftype_int
14304 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14306 tree int_ftype_void
14307 = build_function_type_list (integer_type_node, NULL_TREE);
14309 tree v2si_ftype_pv2si_int
14310 = build_function_type_list (opaque_V2SI_type_node,
14311 opaque_p_V2SI_type_node,
14312 integer_type_node,
14313 NULL_TREE);
14315 tree v2si_ftype_puint_int
14316 = build_function_type_list (opaque_V2SI_type_node,
14317 puint_type_node,
14318 integer_type_node,
14319 NULL_TREE);
14321 tree v2si_ftype_pushort_int
14322 = build_function_type_list (opaque_V2SI_type_node,
14323 pushort_type_node,
14324 integer_type_node,
14325 NULL_TREE);
14327 tree v2si_ftype_signed_char
14328 = build_function_type_list (opaque_V2SI_type_node,
14329 signed_char_type_node,
14330 NULL_TREE);
14332 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14334 /* Initialize irregular SPE builtins. */
14336 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14337 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14338 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14339 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14340 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14341 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14342 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14343 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14344 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14345 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14346 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14347 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14348 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14349 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14350 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14351 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14352 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14353 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14355 /* Loads. */
14356 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14357 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14358 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14359 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14360 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14361 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14362 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14363 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14364 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14365 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14366 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14367 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14368 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14369 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14370 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14371 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14372 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14373 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14374 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14375 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14376 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14377 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14379 /* Predicates. */
14380 d = bdesc_spe_predicates;
14381 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14383 tree type;
14385 switch (insn_data[d->icode].operand[1].mode)
14387 case V2SImode:
14388 type = int_ftype_int_v2si_v2si;
14389 break;
14390 case V2SFmode:
14391 type = int_ftype_int_v2sf_v2sf;
14392 break;
14393 default:
14394 gcc_unreachable ();
14397 def_builtin (d->name, type, d->code);
14400 /* Evsel predicates. */
14401 d = bdesc_spe_evsel;
14402 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14404 tree type;
14406 switch (insn_data[d->icode].operand[1].mode)
14408 case V2SImode:
14409 type = v2si_ftype_4_v2si;
14410 break;
14411 case V2SFmode:
14412 type = v2sf_ftype_4_v2sf;
14413 break;
14414 default:
14415 gcc_unreachable ();
14418 def_builtin (d->name, type, d->code);
14422 static void
14423 paired_init_builtins (void)
14425 const struct builtin_description *d;
14426 size_t i;
14428 tree int_ftype_int_v2sf_v2sf
14429 = build_function_type_list (integer_type_node,
14430 integer_type_node,
14431 V2SF_type_node,
14432 V2SF_type_node,
14433 NULL_TREE);
14434 tree pcfloat_type_node =
14435 build_pointer_type (build_qualified_type
14436 (float_type_node, TYPE_QUAL_CONST));
14438 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14439 long_integer_type_node,
14440 pcfloat_type_node,
14441 NULL_TREE);
14442 tree void_ftype_v2sf_long_pcfloat =
14443 build_function_type_list (void_type_node,
14444 V2SF_type_node,
14445 long_integer_type_node,
14446 pcfloat_type_node,
14447 NULL_TREE);
14450 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14451 PAIRED_BUILTIN_LX);
14454 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14455 PAIRED_BUILTIN_STX);
14457 /* Predicates. */
14458 d = bdesc_paired_preds;
14459 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14461 tree type;
14463 if (TARGET_DEBUG_BUILTIN)
14464 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14465 (int)i, get_insn_name (d->icode), (int)d->icode,
14466 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14468 switch (insn_data[d->icode].operand[1].mode)
14470 case V2SFmode:
14471 type = int_ftype_int_v2sf_v2sf;
14472 break;
14473 default:
14474 gcc_unreachable ();
14477 def_builtin (d->name, type, d->code);
14481 static void
14482 altivec_init_builtins (void)
14484 const struct builtin_description *d;
14485 size_t i;
14486 tree ftype;
14487 tree decl;
14489 tree pvoid_type_node = build_pointer_type (void_type_node);
14491 tree pcvoid_type_node
14492 = build_pointer_type (build_qualified_type (void_type_node,
14493 TYPE_QUAL_CONST));
14495 tree int_ftype_opaque
14496 = build_function_type_list (integer_type_node,
14497 opaque_V4SI_type_node, NULL_TREE);
14498 tree opaque_ftype_opaque
14499 = build_function_type_list (integer_type_node, NULL_TREE);
14500 tree opaque_ftype_opaque_int
14501 = build_function_type_list (opaque_V4SI_type_node,
14502 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14503 tree opaque_ftype_opaque_opaque_int
14504 = build_function_type_list (opaque_V4SI_type_node,
14505 opaque_V4SI_type_node, opaque_V4SI_type_node,
14506 integer_type_node, NULL_TREE);
14507 tree int_ftype_int_opaque_opaque
14508 = build_function_type_list (integer_type_node,
14509 integer_type_node, opaque_V4SI_type_node,
14510 opaque_V4SI_type_node, NULL_TREE);
14511 tree int_ftype_int_v4si_v4si
14512 = build_function_type_list (integer_type_node,
14513 integer_type_node, V4SI_type_node,
14514 V4SI_type_node, NULL_TREE);
14515 tree int_ftype_int_v2di_v2di
14516 = build_function_type_list (integer_type_node,
14517 integer_type_node, V2DI_type_node,
14518 V2DI_type_node, NULL_TREE);
14519 tree void_ftype_v4si
14520 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14521 tree v8hi_ftype_void
14522 = build_function_type_list (V8HI_type_node, NULL_TREE);
14523 tree void_ftype_void
14524 = build_function_type_list (void_type_node, NULL_TREE);
14525 tree void_ftype_int
14526 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14528 tree opaque_ftype_long_pcvoid
14529 = build_function_type_list (opaque_V4SI_type_node,
14530 long_integer_type_node, pcvoid_type_node,
14531 NULL_TREE);
14532 tree v16qi_ftype_long_pcvoid
14533 = build_function_type_list (V16QI_type_node,
14534 long_integer_type_node, pcvoid_type_node,
14535 NULL_TREE);
14536 tree v8hi_ftype_long_pcvoid
14537 = build_function_type_list (V8HI_type_node,
14538 long_integer_type_node, pcvoid_type_node,
14539 NULL_TREE);
14540 tree v4si_ftype_long_pcvoid
14541 = build_function_type_list (V4SI_type_node,
14542 long_integer_type_node, pcvoid_type_node,
14543 NULL_TREE);
14544 tree v4sf_ftype_long_pcvoid
14545 = build_function_type_list (V4SF_type_node,
14546 long_integer_type_node, pcvoid_type_node,
14547 NULL_TREE);
14548 tree v2df_ftype_long_pcvoid
14549 = build_function_type_list (V2DF_type_node,
14550 long_integer_type_node, pcvoid_type_node,
14551 NULL_TREE);
14552 tree v2di_ftype_long_pcvoid
14553 = build_function_type_list (V2DI_type_node,
14554 long_integer_type_node, pcvoid_type_node,
14555 NULL_TREE);
14557 tree void_ftype_opaque_long_pvoid
14558 = build_function_type_list (void_type_node,
14559 opaque_V4SI_type_node, long_integer_type_node,
14560 pvoid_type_node, NULL_TREE);
14561 tree void_ftype_v4si_long_pvoid
14562 = build_function_type_list (void_type_node,
14563 V4SI_type_node, long_integer_type_node,
14564 pvoid_type_node, NULL_TREE);
14565 tree void_ftype_v16qi_long_pvoid
14566 = build_function_type_list (void_type_node,
14567 V16QI_type_node, long_integer_type_node,
14568 pvoid_type_node, NULL_TREE);
14569 tree void_ftype_v8hi_long_pvoid
14570 = build_function_type_list (void_type_node,
14571 V8HI_type_node, long_integer_type_node,
14572 pvoid_type_node, NULL_TREE);
14573 tree void_ftype_v4sf_long_pvoid
14574 = build_function_type_list (void_type_node,
14575 V4SF_type_node, long_integer_type_node,
14576 pvoid_type_node, NULL_TREE);
14577 tree void_ftype_v2df_long_pvoid
14578 = build_function_type_list (void_type_node,
14579 V2DF_type_node, long_integer_type_node,
14580 pvoid_type_node, NULL_TREE);
14581 tree void_ftype_v2di_long_pvoid
14582 = build_function_type_list (void_type_node,
14583 V2DI_type_node, long_integer_type_node,
14584 pvoid_type_node, NULL_TREE);
14585 tree int_ftype_int_v8hi_v8hi
14586 = build_function_type_list (integer_type_node,
14587 integer_type_node, V8HI_type_node,
14588 V8HI_type_node, NULL_TREE);
14589 tree int_ftype_int_v16qi_v16qi
14590 = build_function_type_list (integer_type_node,
14591 integer_type_node, V16QI_type_node,
14592 V16QI_type_node, NULL_TREE);
14593 tree int_ftype_int_v4sf_v4sf
14594 = build_function_type_list (integer_type_node,
14595 integer_type_node, V4SF_type_node,
14596 V4SF_type_node, NULL_TREE);
14597 tree int_ftype_int_v2df_v2df
14598 = build_function_type_list (integer_type_node,
14599 integer_type_node, V2DF_type_node,
14600 V2DF_type_node, NULL_TREE);
14601 tree v2di_ftype_v2di
14602 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14603 tree v4si_ftype_v4si
14604 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14605 tree v8hi_ftype_v8hi
14606 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14607 tree v16qi_ftype_v16qi
14608 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14609 tree v4sf_ftype_v4sf
14610 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14611 tree v2df_ftype_v2df
14612 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14613 tree void_ftype_pcvoid_int_int
14614 = build_function_type_list (void_type_node,
14615 pcvoid_type_node, integer_type_node,
14616 integer_type_node, NULL_TREE);
14618 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14619 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14620 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14621 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14622 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14623 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14624 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14625 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14626 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14627 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14628 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14629 ALTIVEC_BUILTIN_LVXL_V2DF);
14630 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14631 ALTIVEC_BUILTIN_LVXL_V2DI);
14632 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14633 ALTIVEC_BUILTIN_LVXL_V4SF);
14634 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14635 ALTIVEC_BUILTIN_LVXL_V4SI);
14636 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14637 ALTIVEC_BUILTIN_LVXL_V8HI);
14638 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14639 ALTIVEC_BUILTIN_LVXL_V16QI);
14640 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14641 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14642 ALTIVEC_BUILTIN_LVX_V2DF);
14643 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14644 ALTIVEC_BUILTIN_LVX_V2DI);
14645 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14646 ALTIVEC_BUILTIN_LVX_V4SF);
14647 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14648 ALTIVEC_BUILTIN_LVX_V4SI);
14649 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14650 ALTIVEC_BUILTIN_LVX_V8HI);
14651 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14652 ALTIVEC_BUILTIN_LVX_V16QI);
14653 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14654 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14655 ALTIVEC_BUILTIN_STVX_V2DF);
14656 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14657 ALTIVEC_BUILTIN_STVX_V2DI);
14658 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14659 ALTIVEC_BUILTIN_STVX_V4SF);
14660 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14661 ALTIVEC_BUILTIN_STVX_V4SI);
14662 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14663 ALTIVEC_BUILTIN_STVX_V8HI);
14664 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14665 ALTIVEC_BUILTIN_STVX_V16QI);
14666 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14667 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14668 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14669 ALTIVEC_BUILTIN_STVXL_V2DF);
14670 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14671 ALTIVEC_BUILTIN_STVXL_V2DI);
14672 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14673 ALTIVEC_BUILTIN_STVXL_V4SF);
14674 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14675 ALTIVEC_BUILTIN_STVXL_V4SI);
14676 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14677 ALTIVEC_BUILTIN_STVXL_V8HI);
14678 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14679 ALTIVEC_BUILTIN_STVXL_V16QI);
14680 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14681 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14682 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14683 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14684 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14685 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14686 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14687 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14688 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14689 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14690 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14691 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14692 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14693 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14694 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14695 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14697 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14698 VSX_BUILTIN_LXVD2X_V2DF);
14699 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14700 VSX_BUILTIN_LXVD2X_V2DI);
14701 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14702 VSX_BUILTIN_LXVW4X_V4SF);
14703 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14704 VSX_BUILTIN_LXVW4X_V4SI);
14705 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14706 VSX_BUILTIN_LXVW4X_V8HI);
14707 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14708 VSX_BUILTIN_LXVW4X_V16QI);
14709 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14710 VSX_BUILTIN_STXVD2X_V2DF);
14711 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14712 VSX_BUILTIN_STXVD2X_V2DI);
14713 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14714 VSX_BUILTIN_STXVW4X_V4SF);
14715 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14716 VSX_BUILTIN_STXVW4X_V4SI);
14717 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14718 VSX_BUILTIN_STXVW4X_V8HI);
14719 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14720 VSX_BUILTIN_STXVW4X_V16QI);
14721 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14722 VSX_BUILTIN_VEC_LD);
14723 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14724 VSX_BUILTIN_VEC_ST);
14726 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14727 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14728 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14730 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14731 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14732 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14733 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14734 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14735 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14736 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14737 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14738 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14739 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14740 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14741 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14743 /* Cell builtins. */
14744 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14745 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
14746 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
14747 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
14749 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
14750 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
14751 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
14752 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
14754 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
14755 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
14756 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
14757 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
14759 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
14760 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
14761 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
14762 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
14764 /* Add the DST variants. */
14765 d = bdesc_dst;
14766 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14767 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
14769 /* Initialize the predicates. */
14770 d = bdesc_altivec_preds;
14771 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14773 enum machine_mode mode1;
14774 tree type;
14776 if (rs6000_overloaded_builtin_p (d->code))
14777 mode1 = VOIDmode;
14778 else
14779 mode1 = insn_data[d->icode].operand[1].mode;
14781 switch (mode1)
14783 case VOIDmode:
14784 type = int_ftype_int_opaque_opaque;
14785 break;
14786 case V2DImode:
14787 type = int_ftype_int_v2di_v2di;
14788 break;
14789 case V4SImode:
14790 type = int_ftype_int_v4si_v4si;
14791 break;
14792 case V8HImode:
14793 type = int_ftype_int_v8hi_v8hi;
14794 break;
14795 case V16QImode:
14796 type = int_ftype_int_v16qi_v16qi;
14797 break;
14798 case V4SFmode:
14799 type = int_ftype_int_v4sf_v4sf;
14800 break;
14801 case V2DFmode:
14802 type = int_ftype_int_v2df_v2df;
14803 break;
14804 default:
14805 gcc_unreachable ();
14808 def_builtin (d->name, type, d->code);
14811 /* Initialize the abs* operators. */
14812 d = bdesc_abs;
14813 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14815 enum machine_mode mode0;
14816 tree type;
14818 mode0 = insn_data[d->icode].operand[0].mode;
14820 switch (mode0)
14822 case V2DImode:
14823 type = v2di_ftype_v2di;
14824 break;
14825 case V4SImode:
14826 type = v4si_ftype_v4si;
14827 break;
14828 case V8HImode:
14829 type = v8hi_ftype_v8hi;
14830 break;
14831 case V16QImode:
14832 type = v16qi_ftype_v16qi;
14833 break;
14834 case V4SFmode:
14835 type = v4sf_ftype_v4sf;
14836 break;
14837 case V2DFmode:
14838 type = v2df_ftype_v2df;
14839 break;
14840 default:
14841 gcc_unreachable ();
14844 def_builtin (d->name, type, d->code);
14847 /* Initialize target builtin that implements
14848 targetm.vectorize.builtin_mask_for_load. */
14850 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
14851 v16qi_ftype_long_pcvoid,
14852 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
14853 BUILT_IN_MD, NULL, NULL_TREE);
14854 TREE_READONLY (decl) = 1;
14855 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
14856 altivec_builtin_mask_for_load = decl;
14858 /* Access to the vec_init patterns. */
14859 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
14860 integer_type_node, integer_type_node,
14861 integer_type_node, NULL_TREE);
14862 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
14864 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
14865 short_integer_type_node,
14866 short_integer_type_node,
14867 short_integer_type_node,
14868 short_integer_type_node,
14869 short_integer_type_node,
14870 short_integer_type_node,
14871 short_integer_type_node, NULL_TREE);
14872 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
14874 ftype = build_function_type_list (V16QI_type_node, char_type_node,
14875 char_type_node, char_type_node,
14876 char_type_node, char_type_node,
14877 char_type_node, char_type_node,
14878 char_type_node, char_type_node,
14879 char_type_node, char_type_node,
14880 char_type_node, char_type_node,
14881 char_type_node, char_type_node,
14882 char_type_node, NULL_TREE);
14883 def_builtin ("__builtin_vec_init_v16qi", ftype,
14884 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
14886 ftype = build_function_type_list (V4SF_type_node, float_type_node,
14887 float_type_node, float_type_node,
14888 float_type_node, NULL_TREE);
14889 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
14891 /* VSX builtins. */
14892 ftype = build_function_type_list (V2DF_type_node, double_type_node,
14893 double_type_node, NULL_TREE);
14894 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
14896 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
14897 intDI_type_node, NULL_TREE);
14898 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
14900 /* Access to the vec_set patterns. */
14901 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
14902 intSI_type_node,
14903 integer_type_node, NULL_TREE);
14904 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
14906 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14907 intHI_type_node,
14908 integer_type_node, NULL_TREE);
14909 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
14911 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
14912 intQI_type_node,
14913 integer_type_node, NULL_TREE);
14914 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
14916 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
14917 float_type_node,
14918 integer_type_node, NULL_TREE);
14919 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
14921 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
14922 double_type_node,
14923 integer_type_node, NULL_TREE);
14924 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
14926 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
14927 intDI_type_node,
14928 integer_type_node, NULL_TREE);
14929 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
14931 /* Access to the vec_extract patterns. */
14932 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
14933 integer_type_node, NULL_TREE);
14934 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
14936 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
14937 integer_type_node, NULL_TREE);
14938 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
14940 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
14941 integer_type_node, NULL_TREE);
14942 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
14944 ftype = build_function_type_list (float_type_node, V4SF_type_node,
14945 integer_type_node, NULL_TREE);
14946 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
14948 ftype = build_function_type_list (double_type_node, V2DF_type_node,
14949 integer_type_node, NULL_TREE);
14950 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
14952 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
14953 integer_type_node, NULL_TREE);
14954 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
14957 if (V1TI_type_node)
14959 tree v1ti_ftype_long_pcvoid
14960 = build_function_type_list (V1TI_type_node,
14961 long_integer_type_node, pcvoid_type_node,
14962 NULL_TREE);
14963 tree void_ftype_v1ti_long_pvoid
14964 = build_function_type_list (void_type_node,
14965 V1TI_type_node, long_integer_type_node,
14966 pvoid_type_node, NULL_TREE);
14967 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
14968 VSX_BUILTIN_LXVD2X_V1TI);
14969 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
14970 VSX_BUILTIN_STXVD2X_V1TI);
14971 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
14972 NULL_TREE, NULL_TREE);
14973 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
14974 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
14975 intTI_type_node,
14976 integer_type_node, NULL_TREE);
14977 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
14978 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
14979 integer_type_node, NULL_TREE);
14980 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
14985 static void
14986 htm_init_builtins (void)
14988 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
14989 const struct builtin_description *d;
14990 size_t i;
14992 d = bdesc_htm;
14993 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
14995 tree op[MAX_HTM_OPERANDS], type;
14996 HOST_WIDE_INT mask = d->mask;
14997 unsigned attr = rs6000_builtin_info[d->code].attr;
14998 bool void_func = (attr & RS6000_BTC_VOID);
14999 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15000 int nopnds = 0;
15001 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15002 : unsigned_type_node;
15004 if ((mask & builtin_mask) != mask)
15006 if (TARGET_DEBUG_BUILTIN)
15007 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15008 continue;
15011 if (d->name == 0)
15013 if (TARGET_DEBUG_BUILTIN)
15014 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15015 (long unsigned) i);
15016 continue;
15019 op[nopnds++] = (void_func) ? void_type_node : argtype;
15021 if (attr_args == RS6000_BTC_UNARY)
15022 op[nopnds++] = argtype;
15023 else if (attr_args == RS6000_BTC_BINARY)
15025 op[nopnds++] = argtype;
15026 op[nopnds++] = argtype;
15028 else if (attr_args == RS6000_BTC_TERNARY)
15030 op[nopnds++] = argtype;
15031 op[nopnds++] = argtype;
15032 op[nopnds++] = argtype;
15035 switch (nopnds)
15037 case 1:
15038 type = build_function_type_list (op[0], NULL_TREE);
15039 break;
15040 case 2:
15041 type = build_function_type_list (op[0], op[1], NULL_TREE);
15042 break;
15043 case 3:
15044 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15045 break;
15046 case 4:
15047 type = build_function_type_list (op[0], op[1], op[2], op[3],
15048 NULL_TREE);
15049 break;
15050 default:
15051 gcc_unreachable ();
15054 def_builtin (d->name, type, d->code);
15058 /* Hash function for builtin functions with up to 3 arguments and a return
15059 type. */
15060 static unsigned
15061 builtin_hash_function (const void *hash_entry)
15063 unsigned ret = 0;
15064 int i;
15065 const struct builtin_hash_struct *bh =
15066 (const struct builtin_hash_struct *) hash_entry;
15068 for (i = 0; i < 4; i++)
15070 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15071 ret = (ret * 2) + bh->uns_p[i];
15074 return ret;
15077 /* Compare builtin hash entries H1 and H2 for equivalence. */
15078 static int
15079 builtin_hash_eq (const void *h1, const void *h2)
15081 const struct builtin_hash_struct *p1 = (const struct builtin_hash_struct *) h1;
15082 const struct builtin_hash_struct *p2 = (const struct builtin_hash_struct *) h2;
15084 return ((p1->mode[0] == p2->mode[0])
15085 && (p1->mode[1] == p2->mode[1])
15086 && (p1->mode[2] == p2->mode[2])
15087 && (p1->mode[3] == p2->mode[3])
15088 && (p1->uns_p[0] == p2->uns_p[0])
15089 && (p1->uns_p[1] == p2->uns_p[1])
15090 && (p1->uns_p[2] == p2->uns_p[2])
15091 && (p1->uns_p[3] == p2->uns_p[3]));
15094 /* Map types for builtin functions with an explicit return type and up to 3
15095 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15096 of the argument. */
15097 static tree
15098 builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
15099 enum machine_mode mode_arg1, enum machine_mode mode_arg2,
15100 enum rs6000_builtins builtin, const char *name)
15102 struct builtin_hash_struct h;
15103 struct builtin_hash_struct *h2;
15104 void **found;
15105 int num_args = 3;
15106 int i;
15107 tree ret_type = NULL_TREE;
15108 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15110 /* Create builtin_hash_table. */
15111 if (builtin_hash_table == NULL)
15112 builtin_hash_table = htab_create_ggc (1500, builtin_hash_function,
15113 builtin_hash_eq, NULL);
15115 h.type = NULL_TREE;
15116 h.mode[0] = mode_ret;
15117 h.mode[1] = mode_arg0;
15118 h.mode[2] = mode_arg1;
15119 h.mode[3] = mode_arg2;
15120 h.uns_p[0] = 0;
15121 h.uns_p[1] = 0;
15122 h.uns_p[2] = 0;
15123 h.uns_p[3] = 0;
15125 /* If the builtin is a type that produces unsigned results or takes unsigned
15126 arguments, and it is returned as a decl for the vectorizer (such as
15127 widening multiplies, permute), make sure the arguments and return value
15128 are type correct. */
15129 switch (builtin)
15131 /* unsigned 1 argument functions. */
15132 case CRYPTO_BUILTIN_VSBOX:
15133 case P8V_BUILTIN_VGBBD:
15134 case MISC_BUILTIN_CDTBCD:
15135 case MISC_BUILTIN_CBCDTD:
15136 h.uns_p[0] = 1;
15137 h.uns_p[1] = 1;
15138 break;
15140 /* unsigned 2 argument functions. */
15141 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15142 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15143 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15144 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15145 case CRYPTO_BUILTIN_VCIPHER:
15146 case CRYPTO_BUILTIN_VCIPHERLAST:
15147 case CRYPTO_BUILTIN_VNCIPHER:
15148 case CRYPTO_BUILTIN_VNCIPHERLAST:
15149 case CRYPTO_BUILTIN_VPMSUMB:
15150 case CRYPTO_BUILTIN_VPMSUMH:
15151 case CRYPTO_BUILTIN_VPMSUMW:
15152 case CRYPTO_BUILTIN_VPMSUMD:
15153 case CRYPTO_BUILTIN_VPMSUM:
15154 case MISC_BUILTIN_ADDG6S:
15155 case MISC_BUILTIN_DIVWEU:
15156 case MISC_BUILTIN_DIVWEUO:
15157 case MISC_BUILTIN_DIVDEU:
15158 case MISC_BUILTIN_DIVDEUO:
15159 h.uns_p[0] = 1;
15160 h.uns_p[1] = 1;
15161 h.uns_p[2] = 1;
15162 break;
15164 /* unsigned 3 argument functions. */
15165 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15166 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15167 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15168 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15169 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15170 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15171 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15172 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15173 case VSX_BUILTIN_VPERM_16QI_UNS:
15174 case VSX_BUILTIN_VPERM_8HI_UNS:
15175 case VSX_BUILTIN_VPERM_4SI_UNS:
15176 case VSX_BUILTIN_VPERM_2DI_UNS:
15177 case VSX_BUILTIN_XXSEL_16QI_UNS:
15178 case VSX_BUILTIN_XXSEL_8HI_UNS:
15179 case VSX_BUILTIN_XXSEL_4SI_UNS:
15180 case VSX_BUILTIN_XXSEL_2DI_UNS:
15181 case CRYPTO_BUILTIN_VPERMXOR:
15182 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15183 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15184 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15185 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15186 case CRYPTO_BUILTIN_VSHASIGMAW:
15187 case CRYPTO_BUILTIN_VSHASIGMAD:
15188 case CRYPTO_BUILTIN_VSHASIGMA:
15189 h.uns_p[0] = 1;
15190 h.uns_p[1] = 1;
15191 h.uns_p[2] = 1;
15192 h.uns_p[3] = 1;
15193 break;
15195 /* signed permute functions with unsigned char mask. */
15196 case ALTIVEC_BUILTIN_VPERM_16QI:
15197 case ALTIVEC_BUILTIN_VPERM_8HI:
15198 case ALTIVEC_BUILTIN_VPERM_4SI:
15199 case ALTIVEC_BUILTIN_VPERM_4SF:
15200 case ALTIVEC_BUILTIN_VPERM_2DI:
15201 case ALTIVEC_BUILTIN_VPERM_2DF:
15202 case VSX_BUILTIN_VPERM_16QI:
15203 case VSX_BUILTIN_VPERM_8HI:
15204 case VSX_BUILTIN_VPERM_4SI:
15205 case VSX_BUILTIN_VPERM_4SF:
15206 case VSX_BUILTIN_VPERM_2DI:
15207 case VSX_BUILTIN_VPERM_2DF:
15208 h.uns_p[3] = 1;
15209 break;
15211 /* unsigned args, signed return. */
15212 case VSX_BUILTIN_XVCVUXDDP_UNS:
15213 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15214 h.uns_p[1] = 1;
15215 break;
15217 /* signed args, unsigned return. */
15218 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15219 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15220 case MISC_BUILTIN_UNPACK_TD:
15221 case MISC_BUILTIN_UNPACK_V1TI:
15222 h.uns_p[0] = 1;
15223 break;
15225 /* unsigned arguments for 128-bit pack instructions. */
15226 case MISC_BUILTIN_PACK_TD:
15227 case MISC_BUILTIN_PACK_V1TI:
15228 h.uns_p[1] = 1;
15229 h.uns_p[2] = 1;
15230 break;
15232 default:
15233 break;
15236 /* Figure out how many args are present. */
15237 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15238 num_args--;
15240 if (num_args == 0)
15241 fatal_error ("internal error: builtin function %s had no type", name);
15243 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15244 if (!ret_type && h.uns_p[0])
15245 ret_type = builtin_mode_to_type[h.mode[0]][0];
15247 if (!ret_type)
15248 fatal_error ("internal error: builtin function %s had an unexpected "
15249 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15251 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15252 arg_type[i] = NULL_TREE;
15254 for (i = 0; i < num_args; i++)
15256 int m = (int) h.mode[i+1];
15257 int uns_p = h.uns_p[i+1];
15259 arg_type[i] = builtin_mode_to_type[m][uns_p];
15260 if (!arg_type[i] && uns_p)
15261 arg_type[i] = builtin_mode_to_type[m][0];
15263 if (!arg_type[i])
15264 fatal_error ("internal error: builtin function %s, argument %d "
15265 "had unexpected argument type %s", name, i,
15266 GET_MODE_NAME (m));
15269 found = htab_find_slot (builtin_hash_table, &h, INSERT);
15270 if (*found == NULL)
15272 h2 = ggc_alloc_builtin_hash_struct ();
15273 *h2 = h;
15274 *found = (void *)h2;
15276 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15277 arg_type[2], NULL_TREE);
15280 return ((struct builtin_hash_struct *)(*found))->type;
15283 static void
15284 rs6000_common_init_builtins (void)
15286 const struct builtin_description *d;
15287 size_t i;
15289 tree opaque_ftype_opaque = NULL_TREE;
15290 tree opaque_ftype_opaque_opaque = NULL_TREE;
15291 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15292 tree v2si_ftype_qi = NULL_TREE;
15293 tree v2si_ftype_v2si_qi = NULL_TREE;
15294 tree v2si_ftype_int_qi = NULL_TREE;
15295 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15297 if (!TARGET_PAIRED_FLOAT)
15299 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15300 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15303 /* Paired and SPE builtins are only available if you build a compiler with
15304 the appropriate options, so only create those builtins with the
15305 appropriate compiler option. Create Altivec and VSX builtins on machines
15306 with at least the general purpose extensions (970 and newer) to allow the
15307 use of the target attribute.. */
15309 if (TARGET_EXTRA_BUILTINS)
15310 builtin_mask |= RS6000_BTM_COMMON;
15312 /* Add the ternary operators. */
15313 d = bdesc_3arg;
15314 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15316 tree type;
15317 HOST_WIDE_INT mask = d->mask;
15319 if ((mask & builtin_mask) != mask)
15321 if (TARGET_DEBUG_BUILTIN)
15322 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15323 continue;
15326 if (rs6000_overloaded_builtin_p (d->code))
15328 if (! (type = opaque_ftype_opaque_opaque_opaque))
15329 type = opaque_ftype_opaque_opaque_opaque
15330 = build_function_type_list (opaque_V4SI_type_node,
15331 opaque_V4SI_type_node,
15332 opaque_V4SI_type_node,
15333 opaque_V4SI_type_node,
15334 NULL_TREE);
15336 else
15338 enum insn_code icode = d->icode;
15339 if (d->name == 0)
15341 if (TARGET_DEBUG_BUILTIN)
15342 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15343 (long unsigned)i);
15345 continue;
15348 if (icode == CODE_FOR_nothing)
15350 if (TARGET_DEBUG_BUILTIN)
15351 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15352 d->name);
15354 continue;
15357 type = builtin_function_type (insn_data[icode].operand[0].mode,
15358 insn_data[icode].operand[1].mode,
15359 insn_data[icode].operand[2].mode,
15360 insn_data[icode].operand[3].mode,
15361 d->code, d->name);
15364 def_builtin (d->name, type, d->code);
15367 /* Add the binary operators. */
15368 d = bdesc_2arg;
15369 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15371 enum machine_mode mode0, mode1, mode2;
15372 tree type;
15373 HOST_WIDE_INT mask = d->mask;
15375 if ((mask & builtin_mask) != mask)
15377 if (TARGET_DEBUG_BUILTIN)
15378 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15379 continue;
15382 if (rs6000_overloaded_builtin_p (d->code))
15384 if (! (type = opaque_ftype_opaque_opaque))
15385 type = opaque_ftype_opaque_opaque
15386 = build_function_type_list (opaque_V4SI_type_node,
15387 opaque_V4SI_type_node,
15388 opaque_V4SI_type_node,
15389 NULL_TREE);
15391 else
15393 enum insn_code icode = d->icode;
15394 if (d->name == 0)
15396 if (TARGET_DEBUG_BUILTIN)
15397 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15398 (long unsigned)i);
15400 continue;
15403 if (icode == CODE_FOR_nothing)
15405 if (TARGET_DEBUG_BUILTIN)
15406 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15407 d->name);
15409 continue;
15412 mode0 = insn_data[icode].operand[0].mode;
15413 mode1 = insn_data[icode].operand[1].mode;
15414 mode2 = insn_data[icode].operand[2].mode;
15416 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15418 if (! (type = v2si_ftype_v2si_qi))
15419 type = v2si_ftype_v2si_qi
15420 = build_function_type_list (opaque_V2SI_type_node,
15421 opaque_V2SI_type_node,
15422 char_type_node,
15423 NULL_TREE);
15426 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15427 && mode2 == QImode)
15429 if (! (type = v2si_ftype_int_qi))
15430 type = v2si_ftype_int_qi
15431 = build_function_type_list (opaque_V2SI_type_node,
15432 integer_type_node,
15433 char_type_node,
15434 NULL_TREE);
15437 else
15438 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15439 d->code, d->name);
15442 def_builtin (d->name, type, d->code);
15445 /* Add the simple unary operators. */
15446 d = bdesc_1arg;
15447 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15449 enum machine_mode mode0, mode1;
15450 tree type;
15451 HOST_WIDE_INT mask = d->mask;
15453 if ((mask & builtin_mask) != mask)
15455 if (TARGET_DEBUG_BUILTIN)
15456 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15457 continue;
15460 if (rs6000_overloaded_builtin_p (d->code))
15462 if (! (type = opaque_ftype_opaque))
15463 type = opaque_ftype_opaque
15464 = build_function_type_list (opaque_V4SI_type_node,
15465 opaque_V4SI_type_node,
15466 NULL_TREE);
15468 else
15470 enum insn_code icode = d->icode;
15471 if (d->name == 0)
15473 if (TARGET_DEBUG_BUILTIN)
15474 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15475 (long unsigned)i);
15477 continue;
15480 if (icode == CODE_FOR_nothing)
15482 if (TARGET_DEBUG_BUILTIN)
15483 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15484 d->name);
15486 continue;
15489 mode0 = insn_data[icode].operand[0].mode;
15490 mode1 = insn_data[icode].operand[1].mode;
15492 if (mode0 == V2SImode && mode1 == QImode)
15494 if (! (type = v2si_ftype_qi))
15495 type = v2si_ftype_qi
15496 = build_function_type_list (opaque_V2SI_type_node,
15497 char_type_node,
15498 NULL_TREE);
15501 else
15502 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15503 d->code, d->name);
15506 def_builtin (d->name, type, d->code);
15510 static void
15511 rs6000_init_libfuncs (void)
15513 if (!TARGET_IEEEQUAD)
15514 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15515 if (!TARGET_XL_COMPAT)
15517 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15518 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15519 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15520 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15522 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15524 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15525 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15526 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15527 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15528 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15529 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15530 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15532 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15533 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15534 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15535 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15536 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15537 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15538 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15539 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15542 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15543 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15545 else
15547 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15548 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15549 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15550 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15552 else
15554 /* 32-bit SVR4 quad floating point routines. */
15556 set_optab_libfunc (add_optab, TFmode, "_q_add");
15557 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15558 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15559 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15560 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15561 if (TARGET_PPC_GPOPT)
15562 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15564 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15565 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15566 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15567 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15568 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15569 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15571 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15572 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15573 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15574 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15575 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15576 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15577 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15578 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15583 /* Expand a block clear operation, and return 1 if successful. Return 0
15584 if we should let the compiler generate normal code.
15586 operands[0] is the destination
15587 operands[1] is the length
15588 operands[3] is the alignment */
15591 expand_block_clear (rtx operands[])
15593 rtx orig_dest = operands[0];
15594 rtx bytes_rtx = operands[1];
15595 rtx align_rtx = operands[3];
15596 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15597 HOST_WIDE_INT align;
15598 HOST_WIDE_INT bytes;
15599 int offset;
15600 int clear_bytes;
15601 int clear_step;
15603 /* If this is not a fixed size move, just call memcpy */
15604 if (! constp)
15605 return 0;
15607 /* This must be a fixed size alignment */
15608 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15609 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15611 /* Anything to clear? */
15612 bytes = INTVAL (bytes_rtx);
15613 if (bytes <= 0)
15614 return 1;
15616 /* Use the builtin memset after a point, to avoid huge code bloat.
15617 When optimize_size, avoid any significant code bloat; calling
15618 memset is about 4 instructions, so allow for one instruction to
15619 load zero and three to do clearing. */
15620 if (TARGET_ALTIVEC && align >= 128)
15621 clear_step = 16;
15622 else if (TARGET_POWERPC64 && align >= 32)
15623 clear_step = 8;
15624 else if (TARGET_SPE && align >= 64)
15625 clear_step = 8;
15626 else
15627 clear_step = 4;
15629 if (optimize_size && bytes > 3 * clear_step)
15630 return 0;
15631 if (! optimize_size && bytes > 8 * clear_step)
15632 return 0;
15634 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15636 enum machine_mode mode = BLKmode;
15637 rtx dest;
15639 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15641 clear_bytes = 16;
15642 mode = V4SImode;
15644 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15646 clear_bytes = 8;
15647 mode = V2SImode;
15649 else if (bytes >= 8 && TARGET_POWERPC64
15650 /* 64-bit loads and stores require word-aligned
15651 displacements. */
15652 && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
15654 clear_bytes = 8;
15655 mode = DImode;
15657 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15658 { /* move 4 bytes */
15659 clear_bytes = 4;
15660 mode = SImode;
15662 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15663 { /* move 2 bytes */
15664 clear_bytes = 2;
15665 mode = HImode;
15667 else /* move 1 byte at a time */
15669 clear_bytes = 1;
15670 mode = QImode;
15673 dest = adjust_address (orig_dest, mode, offset);
15675 emit_move_insn (dest, CONST0_RTX (mode));
15678 return 1;
15682 /* Expand a block move operation, and return 1 if successful. Return 0
15683 if we should let the compiler generate normal code.
15685 operands[0] is the destination
15686 operands[1] is the source
15687 operands[2] is the length
15688 operands[3] is the alignment */
15690 #define MAX_MOVE_REG 4
15693 expand_block_move (rtx operands[])
15695 rtx orig_dest = operands[0];
15696 rtx orig_src = operands[1];
15697 rtx bytes_rtx = operands[2];
15698 rtx align_rtx = operands[3];
15699 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15700 int align;
15701 int bytes;
15702 int offset;
15703 int move_bytes;
15704 rtx stores[MAX_MOVE_REG];
15705 int num_reg = 0;
15707 /* If this is not a fixed size move, just call memcpy */
15708 if (! constp)
15709 return 0;
15711 /* This must be a fixed size alignment */
15712 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15713 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15715 /* Anything to move? */
15716 bytes = INTVAL (bytes_rtx);
15717 if (bytes <= 0)
15718 return 1;
15720 if (bytes > rs6000_block_move_inline_limit)
15721 return 0;
15723 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15725 union {
15726 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15727 rtx (*mov) (rtx, rtx);
15728 } gen_func;
15729 enum machine_mode mode = BLKmode;
15730 rtx src, dest;
15732 /* Altivec first, since it will be faster than a string move
15733 when it applies, and usually not significantly larger. */
15734 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
15736 move_bytes = 16;
15737 mode = V4SImode;
15738 gen_func.mov = gen_movv4si;
15740 else if (TARGET_SPE && bytes >= 8 && align >= 64)
15742 move_bytes = 8;
15743 mode = V2SImode;
15744 gen_func.mov = gen_movv2si;
15746 else if (TARGET_STRING
15747 && bytes > 24 /* move up to 32 bytes at a time */
15748 && ! fixed_regs[5]
15749 && ! fixed_regs[6]
15750 && ! fixed_regs[7]
15751 && ! fixed_regs[8]
15752 && ! fixed_regs[9]
15753 && ! fixed_regs[10]
15754 && ! fixed_regs[11]
15755 && ! fixed_regs[12])
15757 move_bytes = (bytes > 32) ? 32 : bytes;
15758 gen_func.movmemsi = gen_movmemsi_8reg;
15760 else if (TARGET_STRING
15761 && bytes > 16 /* move up to 24 bytes at a time */
15762 && ! fixed_regs[5]
15763 && ! fixed_regs[6]
15764 && ! fixed_regs[7]
15765 && ! fixed_regs[8]
15766 && ! fixed_regs[9]
15767 && ! fixed_regs[10])
15769 move_bytes = (bytes > 24) ? 24 : bytes;
15770 gen_func.movmemsi = gen_movmemsi_6reg;
15772 else if (TARGET_STRING
15773 && bytes > 8 /* move up to 16 bytes at a time */
15774 && ! fixed_regs[5]
15775 && ! fixed_regs[6]
15776 && ! fixed_regs[7]
15777 && ! fixed_regs[8])
15779 move_bytes = (bytes > 16) ? 16 : bytes;
15780 gen_func.movmemsi = gen_movmemsi_4reg;
15782 else if (bytes >= 8 && TARGET_POWERPC64
15783 /* 64-bit loads and stores require word-aligned
15784 displacements. */
15785 && (align >= 64 || (!STRICT_ALIGNMENT && align >= 32)))
15787 move_bytes = 8;
15788 mode = DImode;
15789 gen_func.mov = gen_movdi;
15791 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
15792 { /* move up to 8 bytes at a time */
15793 move_bytes = (bytes > 8) ? 8 : bytes;
15794 gen_func.movmemsi = gen_movmemsi_2reg;
15796 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15797 { /* move 4 bytes */
15798 move_bytes = 4;
15799 mode = SImode;
15800 gen_func.mov = gen_movsi;
15802 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15803 { /* move 2 bytes */
15804 move_bytes = 2;
15805 mode = HImode;
15806 gen_func.mov = gen_movhi;
15808 else if (TARGET_STRING && bytes > 1)
15809 { /* move up to 4 bytes at a time */
15810 move_bytes = (bytes > 4) ? 4 : bytes;
15811 gen_func.movmemsi = gen_movmemsi_1reg;
15813 else /* move 1 byte at a time */
15815 move_bytes = 1;
15816 mode = QImode;
15817 gen_func.mov = gen_movqi;
15820 src = adjust_address (orig_src, mode, offset);
15821 dest = adjust_address (orig_dest, mode, offset);
15823 if (mode != BLKmode)
15825 rtx tmp_reg = gen_reg_rtx (mode);
15827 emit_insn ((*gen_func.mov) (tmp_reg, src));
15828 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
15831 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
15833 int i;
15834 for (i = 0; i < num_reg; i++)
15835 emit_insn (stores[i]);
15836 num_reg = 0;
15839 if (mode == BLKmode)
15841 /* Move the address into scratch registers. The movmemsi
15842 patterns require zero offset. */
15843 if (!REG_P (XEXP (src, 0)))
15845 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
15846 src = replace_equiv_address (src, src_reg);
15848 set_mem_size (src, move_bytes);
15850 if (!REG_P (XEXP (dest, 0)))
15852 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
15853 dest = replace_equiv_address (dest, dest_reg);
15855 set_mem_size (dest, move_bytes);
15857 emit_insn ((*gen_func.movmemsi) (dest, src,
15858 GEN_INT (move_bytes & 31),
15859 align_rtx));
15863 return 1;
15867 /* Return a string to perform a load_multiple operation.
15868 operands[0] is the vector.
15869 operands[1] is the source address.
15870 operands[2] is the first destination register. */
15872 const char *
15873 rs6000_output_load_multiple (rtx operands[3])
15875 /* We have to handle the case where the pseudo used to contain the address
15876 is assigned to one of the output registers. */
15877 int i, j;
15878 int words = XVECLEN (operands[0], 0);
15879 rtx xop[10];
15881 if (XVECLEN (operands[0], 0) == 1)
15882 return "lwz %2,0(%1)";
15884 for (i = 0; i < words; i++)
15885 if (refers_to_regno_p (REGNO (operands[2]) + i,
15886 REGNO (operands[2]) + i + 1, operands[1], 0))
15888 if (i == words-1)
15890 xop[0] = GEN_INT (4 * (words-1));
15891 xop[1] = operands[1];
15892 xop[2] = operands[2];
15893 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
15894 return "";
15896 else if (i == 0)
15898 xop[0] = GEN_INT (4 * (words-1));
15899 xop[1] = operands[1];
15900 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15901 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
15902 return "";
15904 else
15906 for (j = 0; j < words; j++)
15907 if (j != i)
15909 xop[0] = GEN_INT (j * 4);
15910 xop[1] = operands[1];
15911 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
15912 output_asm_insn ("lwz %2,%0(%1)", xop);
15914 xop[0] = GEN_INT (i * 4);
15915 xop[1] = operands[1];
15916 output_asm_insn ("lwz %1,%0(%1)", xop);
15917 return "";
15921 return "lswi %2,%1,%N0";
15925 /* A validation routine: say whether CODE, a condition code, and MODE
15926 match. The other alternatives either don't make sense or should
15927 never be generated. */
15929 void
15930 validate_condition_mode (enum rtx_code code, enum machine_mode mode)
15932 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
15933 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
15934 && GET_MODE_CLASS (mode) == MODE_CC);
15936 /* These don't make sense. */
15937 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
15938 || mode != CCUNSmode);
15940 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
15941 || mode == CCUNSmode);
15943 gcc_assert (mode == CCFPmode
15944 || (code != ORDERED && code != UNORDERED
15945 && code != UNEQ && code != LTGT
15946 && code != UNGT && code != UNLT
15947 && code != UNGE && code != UNLE));
15949 /* These should never be generated except for
15950 flag_finite_math_only. */
15951 gcc_assert (mode != CCFPmode
15952 || flag_finite_math_only
15953 || (code != LE && code != GE
15954 && code != UNEQ && code != LTGT
15955 && code != UNGT && code != UNLT));
15957 /* These are invalid; the information is not there. */
15958 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
15962 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
15963 mask required to convert the result of a rotate insn into a shift
15964 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
15967 includes_lshift_p (rtx shiftop, rtx andop)
15969 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
15971 shift_mask <<= INTVAL (shiftop);
15973 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
15976 /* Similar, but for right shift. */
15979 includes_rshift_p (rtx shiftop, rtx andop)
15981 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
15983 shift_mask >>= INTVAL (shiftop);
15985 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
15988 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
15989 to perform a left shift. It must have exactly SHIFTOP least
15990 significant 0's, then one or more 1's, then zero or more 0's. */
15993 includes_rldic_lshift_p (rtx shiftop, rtx andop)
15995 if (GET_CODE (andop) == CONST_INT)
15997 HOST_WIDE_INT c, lsb, shift_mask;
15999 c = INTVAL (andop);
16000 if (c == 0 || c == ~0)
16001 return 0;
16003 shift_mask = ~0;
16004 shift_mask <<= INTVAL (shiftop);
16006 /* Find the least significant one bit. */
16007 lsb = c & -c;
16009 /* It must coincide with the LSB of the shift mask. */
16010 if (-lsb != shift_mask)
16011 return 0;
16013 /* Invert to look for the next transition (if any). */
16014 c = ~c;
16016 /* Remove the low group of ones (originally low group of zeros). */
16017 c &= -lsb;
16019 /* Again find the lsb, and check we have all 1's above. */
16020 lsb = c & -c;
16021 return c == -lsb;
16023 else
16024 return 0;
16027 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16028 to perform a left shift. It must have SHIFTOP or more least
16029 significant 0's, with the remainder of the word 1's. */
16032 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16034 if (GET_CODE (andop) == CONST_INT)
16036 HOST_WIDE_INT c, lsb, shift_mask;
16038 shift_mask = ~0;
16039 shift_mask <<= INTVAL (shiftop);
16040 c = INTVAL (andop);
16042 /* Find the least significant one bit. */
16043 lsb = c & -c;
16045 /* It must be covered by the shift mask.
16046 This test also rejects c == 0. */
16047 if ((lsb & shift_mask) == 0)
16048 return 0;
16050 /* Check we have all 1's above the transition, and reject all 1's. */
16051 return c == -lsb && lsb != 1;
16053 else
16054 return 0;
16057 /* Return 1 if operands will generate a valid arguments to rlwimi
16058 instruction for insert with right shift in 64-bit mode. The mask may
16059 not start on the first bit or stop on the last bit because wrap-around
16060 effects of instruction do not correspond to semantics of RTL insn. */
16063 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16065 if (INTVAL (startop) > 32
16066 && INTVAL (startop) < 64
16067 && INTVAL (sizeop) > 1
16068 && INTVAL (sizeop) + INTVAL (startop) < 64
16069 && INTVAL (shiftop) > 0
16070 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16071 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16072 return 1;
16074 return 0;
16077 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16078 for lfq and stfq insns iff the registers are hard registers. */
16081 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16083 /* We might have been passed a SUBREG. */
16084 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16085 return 0;
16087 /* We might have been passed non floating point registers. */
16088 if (!FP_REGNO_P (REGNO (reg1))
16089 || !FP_REGNO_P (REGNO (reg2)))
16090 return 0;
16092 return (REGNO (reg1) == REGNO (reg2) - 1);
16095 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16096 addr1 and addr2 must be in consecutive memory locations
16097 (addr2 == addr1 + 8). */
16100 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16102 rtx addr1, addr2;
16103 unsigned int reg1, reg2;
16104 int offset1, offset2;
16106 /* The mems cannot be volatile. */
16107 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16108 return 0;
16110 addr1 = XEXP (mem1, 0);
16111 addr2 = XEXP (mem2, 0);
16113 /* Extract an offset (if used) from the first addr. */
16114 if (GET_CODE (addr1) == PLUS)
16116 /* If not a REG, return zero. */
16117 if (GET_CODE (XEXP (addr1, 0)) != REG)
16118 return 0;
16119 else
16121 reg1 = REGNO (XEXP (addr1, 0));
16122 /* The offset must be constant! */
16123 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16124 return 0;
16125 offset1 = INTVAL (XEXP (addr1, 1));
16128 else if (GET_CODE (addr1) != REG)
16129 return 0;
16130 else
16132 reg1 = REGNO (addr1);
16133 /* This was a simple (mem (reg)) expression. Offset is 0. */
16134 offset1 = 0;
16137 /* And now for the second addr. */
16138 if (GET_CODE (addr2) == PLUS)
16140 /* If not a REG, return zero. */
16141 if (GET_CODE (XEXP (addr2, 0)) != REG)
16142 return 0;
16143 else
16145 reg2 = REGNO (XEXP (addr2, 0));
16146 /* The offset must be constant. */
16147 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16148 return 0;
16149 offset2 = INTVAL (XEXP (addr2, 1));
16152 else if (GET_CODE (addr2) != REG)
16153 return 0;
16154 else
16156 reg2 = REGNO (addr2);
16157 /* This was a simple (mem (reg)) expression. Offset is 0. */
16158 offset2 = 0;
16161 /* Both of these must have the same base register. */
16162 if (reg1 != reg2)
16163 return 0;
16165 /* The offset for the second addr must be 8 more than the first addr. */
16166 if (offset2 != offset1 + 8)
16167 return 0;
16169 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16170 instructions. */
16171 return 1;
16176 rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
16178 static bool eliminated = false;
16179 rtx ret;
16181 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16182 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16183 else
16185 rtx mem = cfun->machine->sdmode_stack_slot;
16186 gcc_assert (mem != NULL_RTX);
16188 if (!eliminated)
16190 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16191 cfun->machine->sdmode_stack_slot = mem;
16192 eliminated = true;
16194 ret = mem;
16197 if (TARGET_DEBUG_ADDR)
16199 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16200 GET_MODE_NAME (mode));
16201 if (!ret)
16202 fprintf (stderr, "\tNULL_RTX\n");
16203 else
16204 debug_rtx (ret);
16207 return ret;
16210 /* Return the mode to be used for memory when a secondary memory
16211 location is needed. For SDmode values we need to use DDmode, in
16212 all other cases we can use the same mode. */
16213 enum machine_mode
16214 rs6000_secondary_memory_needed_mode (enum machine_mode mode)
16216 if (lra_in_progress && mode == SDmode)
16217 return DDmode;
16218 return mode;
16221 static tree
16222 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16224 /* Don't walk into types. */
16225 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16227 *walk_subtrees = 0;
16228 return NULL_TREE;
16231 switch (TREE_CODE (*tp))
16233 case VAR_DECL:
16234 case PARM_DECL:
16235 case FIELD_DECL:
16236 case RESULT_DECL:
16237 case SSA_NAME:
16238 case REAL_CST:
16239 case MEM_REF:
16240 case VIEW_CONVERT_EXPR:
16241 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16242 return *tp;
16243 break;
16244 default:
16245 break;
16248 return NULL_TREE;
16251 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16252 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16253 only work on the traditional altivec registers, note if an altivec register
16254 was chosen. */
16256 static enum rs6000_reg_type
16257 register_to_reg_type (rtx reg, bool *is_altivec)
16259 HOST_WIDE_INT regno;
16260 enum reg_class rclass;
16262 if (GET_CODE (reg) == SUBREG)
16263 reg = SUBREG_REG (reg);
16265 if (!REG_P (reg))
16266 return NO_REG_TYPE;
16268 regno = REGNO (reg);
16269 if (regno >= FIRST_PSEUDO_REGISTER)
16271 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16272 return PSEUDO_REG_TYPE;
16274 regno = true_regnum (reg);
16275 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16276 return PSEUDO_REG_TYPE;
16279 gcc_assert (regno >= 0);
16281 if (is_altivec && ALTIVEC_REGNO_P (regno))
16282 *is_altivec = true;
16284 rclass = rs6000_regno_regclass[regno];
16285 return reg_class_to_reg_type[(int)rclass];
16288 /* Helper function for rs6000_secondary_reload to return true if a move to a
16289 different register classe is really a simple move. */
16291 static bool
16292 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16293 enum rs6000_reg_type from_type,
16294 enum machine_mode mode)
16296 int size;
16298 /* Add support for various direct moves available. In this function, we only
16299 look at cases where we don't need any extra registers, and one or more
16300 simple move insns are issued. At present, 32-bit integers are not allowed
16301 in FPR/VSX registers. Single precision binary floating is not a simple
16302 move because we need to convert to the single precision memory layout.
16303 The 4-byte SDmode can be moved. */
16304 size = GET_MODE_SIZE (mode);
16305 if (TARGET_DIRECT_MOVE
16306 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16307 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16308 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16309 return true;
16311 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16312 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16313 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16314 return true;
16316 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16317 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16318 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16319 return true;
16321 return false;
16324 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16325 special direct moves that involve allocating an extra register, return the
16326 insn code of the helper function if there is such a function or
16327 CODE_FOR_nothing if not. */
16329 static bool
16330 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16331 enum rs6000_reg_type from_type,
16332 enum machine_mode mode,
16333 secondary_reload_info *sri,
16334 bool altivec_p)
16336 bool ret = false;
16337 enum insn_code icode = CODE_FOR_nothing;
16338 int cost = 0;
16339 int size = GET_MODE_SIZE (mode);
16341 if (TARGET_POWERPC64)
16343 if (size == 16)
16345 /* Handle moving 128-bit values from GPRs to VSX point registers on
16346 power8 when running in 64-bit mode using XXPERMDI to glue the two
16347 64-bit values back together. */
16348 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16350 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16351 icode = reg_addr[mode].reload_vsx_gpr;
16354 /* Handle moving 128-bit values from VSX point registers to GPRs on
16355 power8 when running in 64-bit mode using XXPERMDI to get access to the
16356 bottom 64-bit value. */
16357 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16359 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16360 icode = reg_addr[mode].reload_gpr_vsx;
16364 else if (mode == SFmode)
16366 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16368 cost = 3; /* xscvdpspn, mfvsrd, and. */
16369 icode = reg_addr[mode].reload_gpr_vsx;
16372 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16374 cost = 2; /* mtvsrz, xscvspdpn. */
16375 icode = reg_addr[mode].reload_vsx_gpr;
16380 if (TARGET_POWERPC64 && size == 16)
16382 /* Handle moving 128-bit values from GPRs to VSX point registers on
16383 power8 when running in 64-bit mode using XXPERMDI to glue the two
16384 64-bit values back together. */
16385 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16387 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16388 icode = reg_addr[mode].reload_vsx_gpr;
16391 /* Handle moving 128-bit values from VSX point registers to GPRs on
16392 power8 when running in 64-bit mode using XXPERMDI to get access to the
16393 bottom 64-bit value. */
16394 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16396 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16397 icode = reg_addr[mode].reload_gpr_vsx;
16401 else if (!TARGET_POWERPC64 && size == 8)
16403 /* Handle moving 64-bit values from GPRs to floating point registers on
16404 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16405 values back together. Altivec register classes must be handled
16406 specially since a different instruction is used, and the secondary
16407 reload support requires a single instruction class in the scratch
16408 register constraint. However, right now TFmode is not allowed in
16409 Altivec registers, so the pattern will never match. */
16410 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16412 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16413 icode = reg_addr[mode].reload_fpr_gpr;
16417 if (icode != CODE_FOR_nothing)
16419 ret = true;
16420 if (sri)
16422 sri->icode = icode;
16423 sri->extra_cost = cost;
16427 return ret;
16430 /* Return whether a move between two register classes can be done either
16431 directly (simple move) or via a pattern that uses a single extra temporary
16432 (using power8's direct move in this case. */
16434 static bool
16435 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16436 enum rs6000_reg_type from_type,
16437 enum machine_mode mode,
16438 secondary_reload_info *sri,
16439 bool altivec_p)
16441 /* Fall back to load/store reloads if either type is not a register. */
16442 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16443 return false;
16445 /* If we haven't allocated registers yet, assume the move can be done for the
16446 standard register types. */
16447 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
16448 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
16449 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
16450 return true;
16452 /* Moves to the same set of registers is a simple move for non-specialized
16453 registers. */
16454 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
16455 return true;
16457 /* Check whether a simple move can be done directly. */
16458 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
16460 if (sri)
16462 sri->icode = CODE_FOR_nothing;
16463 sri->extra_cost = 0;
16465 return true;
16468 /* Now check if we can do it in a few steps. */
16469 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
16470 altivec_p);
16473 /* Inform reload about cases where moving X with a mode MODE to a register in
16474 RCLASS requires an extra scratch or immediate register. Return the class
16475 needed for the immediate register.
16477 For VSX and Altivec, we may need a register to convert sp+offset into
16478 reg+sp.
16480 For misaligned 64-bit gpr loads and stores we need a register to
16481 convert an offset address to indirect. */
16483 static reg_class_t
16484 rs6000_secondary_reload (bool in_p,
16485 rtx x,
16486 reg_class_t rclass_i,
16487 enum machine_mode mode,
16488 secondary_reload_info *sri)
16490 enum reg_class rclass = (enum reg_class) rclass_i;
16491 reg_class_t ret = ALL_REGS;
16492 enum insn_code icode;
16493 bool default_p = false;
16495 sri->icode = CODE_FOR_nothing;
16496 icode = ((in_p)
16497 ? reg_addr[mode].reload_load
16498 : reg_addr[mode].reload_store);
16500 if (REG_P (x) || register_operand (x, mode))
16502 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
16503 bool altivec_p = (rclass == ALTIVEC_REGS);
16504 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
16506 if (!in_p)
16508 enum rs6000_reg_type exchange = to_type;
16509 to_type = from_type;
16510 from_type = exchange;
16513 /* Can we do a direct move of some sort? */
16514 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
16515 altivec_p))
16517 icode = (enum insn_code)sri->icode;
16518 default_p = false;
16519 ret = NO_REGS;
16523 /* Handle vector moves with reload helper functions. */
16524 if (ret == ALL_REGS && icode != CODE_FOR_nothing)
16526 ret = NO_REGS;
16527 sri->icode = CODE_FOR_nothing;
16528 sri->extra_cost = 0;
16530 if (GET_CODE (x) == MEM)
16532 rtx addr = XEXP (x, 0);
16534 /* Loads to and stores from gprs can do reg+offset, and wouldn't need
16535 an extra register in that case, but it would need an extra
16536 register if the addressing is reg+reg or (reg+reg)&(-16). Special
16537 case load/store quad. */
16538 if (rclass == GENERAL_REGS || rclass == BASE_REGS)
16540 if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
16541 && GET_MODE_SIZE (mode) == 16
16542 && quad_memory_operand (x, mode))
16544 sri->icode = icode;
16545 sri->extra_cost = 2;
16548 else if (!legitimate_indirect_address_p (addr, false)
16549 && !rs6000_legitimate_offset_address_p (PTImode, addr,
16550 false, true))
16552 sri->icode = icode;
16553 /* account for splitting the loads, and converting the
16554 address from reg+reg to reg. */
16555 sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
16556 + ((GET_CODE (addr) == AND) ? 1 : 0));
16559 /* Allow scalar loads to/from the traditional floating point
16560 registers, even if VSX memory is set. */
16561 else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
16562 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
16563 && (legitimate_indirect_address_p (addr, false)
16564 || legitimate_indirect_address_p (addr, false)
16565 || rs6000_legitimate_offset_address_p (mode, addr,
16566 false, true)))
16569 /* Loads to and stores from vector registers can only do reg+reg
16570 addressing. Altivec registers can also do (reg+reg)&(-16). Allow
16571 scalar modes loading up the traditional floating point registers
16572 to use offset addresses. */
16573 else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
16574 || rclass == FLOAT_REGS || rclass == NO_REGS)
16576 if (!VECTOR_MEM_ALTIVEC_P (mode)
16577 && GET_CODE (addr) == AND
16578 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16579 && INTVAL (XEXP (addr, 1)) == -16
16580 && (legitimate_indirect_address_p (XEXP (addr, 0), false)
16581 || legitimate_indexed_address_p (XEXP (addr, 0), false)))
16583 sri->icode = icode;
16584 sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS)
16585 ? 2 : 1);
16587 else if (!legitimate_indirect_address_p (addr, false)
16588 && (rclass == NO_REGS
16589 || !legitimate_indexed_address_p (addr, false)))
16591 sri->icode = icode;
16592 sri->extra_cost = 1;
16594 else
16595 icode = CODE_FOR_nothing;
16597 /* Any other loads, including to pseudo registers which haven't been
16598 assigned to a register yet, default to require a scratch
16599 register. */
16600 else
16602 sri->icode = icode;
16603 sri->extra_cost = 2;
16606 else if (REG_P (x))
16608 int regno = true_regnum (x);
16610 icode = CODE_FOR_nothing;
16611 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16612 default_p = true;
16613 else
16615 enum reg_class xclass = REGNO_REG_CLASS (regno);
16616 enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
16617 enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
16619 /* If memory is needed, use default_secondary_reload to create the
16620 stack slot. */
16621 if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
16622 default_p = true;
16623 else
16624 ret = NO_REGS;
16627 else
16628 default_p = true;
16630 else if (TARGET_POWERPC64
16631 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
16632 && MEM_P (x)
16633 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
16635 rtx addr = XEXP (x, 0);
16636 rtx off = address_offset (addr);
16638 if (off != NULL_RTX)
16640 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
16641 unsigned HOST_WIDE_INT offset = INTVAL (off);
16643 /* We need a secondary reload when our legitimate_address_p
16644 says the address is good (as otherwise the entire address
16645 will be reloaded), and the offset is not a multiple of
16646 four or we have an address wrap. Address wrap will only
16647 occur for LO_SUMs since legitimate_offset_address_p
16648 rejects addresses for 16-byte mems that will wrap. */
16649 if (GET_CODE (addr) == LO_SUM
16650 ? (1 /* legitimate_address_p allows any offset for lo_sum */
16651 && ((offset & 3) != 0
16652 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
16653 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
16654 && (offset & 3) != 0))
16656 if (in_p)
16657 sri->icode = CODE_FOR_reload_di_load;
16658 else
16659 sri->icode = CODE_FOR_reload_di_store;
16660 sri->extra_cost = 2;
16661 ret = NO_REGS;
16663 else
16664 default_p = true;
16666 else
16667 default_p = true;
16669 else if (!TARGET_POWERPC64
16670 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
16671 && MEM_P (x)
16672 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
16674 rtx addr = XEXP (x, 0);
16675 rtx off = address_offset (addr);
16677 if (off != NULL_RTX)
16679 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
16680 unsigned HOST_WIDE_INT offset = INTVAL (off);
16682 /* We need a secondary reload when our legitimate_address_p
16683 says the address is good (as otherwise the entire address
16684 will be reloaded), and we have a wrap.
16686 legitimate_lo_sum_address_p allows LO_SUM addresses to
16687 have any offset so test for wrap in the low 16 bits.
16689 legitimate_offset_address_p checks for the range
16690 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
16691 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
16692 [0x7ff4,0x7fff] respectively, so test for the
16693 intersection of these ranges, [0x7ffc,0x7fff] and
16694 [0x7ff4,0x7ff7] respectively.
16696 Note that the address we see here may have been
16697 manipulated by legitimize_reload_address. */
16698 if (GET_CODE (addr) == LO_SUM
16699 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
16700 : offset - (0x8000 - extra) < UNITS_PER_WORD)
16702 if (in_p)
16703 sri->icode = CODE_FOR_reload_si_load;
16704 else
16705 sri->icode = CODE_FOR_reload_si_store;
16706 sri->extra_cost = 2;
16707 ret = NO_REGS;
16709 else
16710 default_p = true;
16712 else
16713 default_p = true;
16715 else
16716 default_p = true;
16718 if (default_p)
16719 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
16721 gcc_assert (ret != ALL_REGS);
16723 if (TARGET_DEBUG_ADDR)
16725 fprintf (stderr,
16726 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
16727 "mode = %s",
16728 reg_class_names[ret],
16729 in_p ? "true" : "false",
16730 reg_class_names[rclass],
16731 GET_MODE_NAME (mode));
16733 if (default_p)
16734 fprintf (stderr, ", default secondary reload");
16736 if (sri->icode != CODE_FOR_nothing)
16737 fprintf (stderr, ", reload func = %s, extra cost = %d\n",
16738 insn_data[sri->icode].name, sri->extra_cost);
16739 else
16740 fprintf (stderr, "\n");
16742 debug_rtx (x);
16745 return ret;
16748 /* Better tracing for rs6000_secondary_reload_inner. */
16750 static void
16751 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
16752 bool store_p)
16754 rtx set, clobber;
16756 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
16758 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
16759 store_p ? "store" : "load");
16761 if (store_p)
16762 set = gen_rtx_SET (VOIDmode, mem, reg);
16763 else
16764 set = gen_rtx_SET (VOIDmode, reg, mem);
16766 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16767 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
16770 static void
16771 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
16772 bool store_p)
16774 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
16775 gcc_unreachable ();
16778 /* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
16779 to SP+reg addressing. */
16781 void
16782 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
16784 int regno = true_regnum (reg);
16785 enum machine_mode mode = GET_MODE (reg);
16786 enum reg_class rclass;
16787 rtx addr;
16788 rtx and_op2 = NULL_RTX;
16789 rtx addr_op1;
16790 rtx addr_op2;
16791 rtx scratch_or_premodify = scratch;
16792 rtx and_rtx;
16793 rtx cc_clobber;
16795 if (TARGET_DEBUG_ADDR)
16796 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
16798 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16799 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16801 if (GET_CODE (mem) != MEM)
16802 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16804 rclass = REGNO_REG_CLASS (regno);
16805 addr = find_replacement (&XEXP (mem, 0));
16807 switch (rclass)
16809 /* GPRs can handle reg + small constant, all other addresses need to use
16810 the scratch register. */
16811 case GENERAL_REGS:
16812 case BASE_REGS:
16813 if (GET_CODE (addr) == AND)
16815 and_op2 = XEXP (addr, 1);
16816 addr = find_replacement (&XEXP (addr, 0));
16819 if (GET_CODE (addr) == PRE_MODIFY)
16821 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
16822 if (!REG_P (scratch_or_premodify))
16823 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16825 addr = find_replacement (&XEXP (addr, 1));
16826 if (GET_CODE (addr) != PLUS)
16827 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16830 if (GET_CODE (addr) == PLUS
16831 && (and_op2 != NULL_RTX
16832 || !rs6000_legitimate_offset_address_p (PTImode, addr,
16833 false, true)))
16835 /* find_replacement already recurses into both operands of
16836 PLUS so we don't need to call it here. */
16837 addr_op1 = XEXP (addr, 0);
16838 addr_op2 = XEXP (addr, 1);
16839 if (!legitimate_indirect_address_p (addr_op1, false))
16840 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16842 if (!REG_P (addr_op2)
16843 && (GET_CODE (addr_op2) != CONST_INT
16844 || !satisfies_constraint_I (addr_op2)))
16846 if (TARGET_DEBUG_ADDR)
16848 fprintf (stderr,
16849 "\nMove plus addr to register %s, mode = %s: ",
16850 rs6000_reg_names[REGNO (scratch)],
16851 GET_MODE_NAME (mode));
16852 debug_rtx (addr_op2);
16854 rs6000_emit_move (scratch, addr_op2, Pmode);
16855 addr_op2 = scratch;
16858 emit_insn (gen_rtx_SET (VOIDmode,
16859 scratch_or_premodify,
16860 gen_rtx_PLUS (Pmode,
16861 addr_op1,
16862 addr_op2)));
16864 addr = scratch_or_premodify;
16865 scratch_or_premodify = scratch;
16867 else if (!legitimate_indirect_address_p (addr, false)
16868 && !rs6000_legitimate_offset_address_p (PTImode, addr,
16869 false, true))
16871 if (TARGET_DEBUG_ADDR)
16873 fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
16874 rs6000_reg_names[REGNO (scratch_or_premodify)],
16875 GET_MODE_NAME (mode));
16876 debug_rtx (addr);
16878 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
16879 addr = scratch_or_premodify;
16880 scratch_or_premodify = scratch;
16882 break;
16884 /* Float registers can do offset+reg addressing for scalar types. */
16885 case FLOAT_REGS:
16886 if (legitimate_indirect_address_p (addr, false) /* reg */
16887 || legitimate_indexed_address_p (addr, false) /* reg+reg */
16888 || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
16889 && and_op2 == NULL_RTX
16890 && scratch_or_premodify == scratch
16891 && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
16892 break;
16894 /* If this isn't a legacy floating point load/store, fall through to the
16895 VSX defaults. */
16897 /* VSX/Altivec registers can only handle reg+reg addressing. Move other
16898 addresses into a scratch register. */
16899 case VSX_REGS:
16900 case ALTIVEC_REGS:
16902 /* With float regs, we need to handle the AND ourselves, since we can't
16903 use the Altivec instruction with an implicit AND -16. Allow scalar
16904 loads to float registers to use reg+offset even if VSX. */
16905 if (GET_CODE (addr) == AND
16906 && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16
16907 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16908 || INTVAL (XEXP (addr, 1)) != -16
16909 || !VECTOR_MEM_ALTIVEC_P (mode)))
16911 and_op2 = XEXP (addr, 1);
16912 addr = find_replacement (&XEXP (addr, 0));
16915 /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
16916 as the address later. */
16917 if (GET_CODE (addr) == PRE_MODIFY
16918 && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
16919 && (rclass != FLOAT_REGS
16920 || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
16921 || and_op2 != NULL_RTX
16922 || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
16924 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
16925 if (!legitimate_indirect_address_p (scratch_or_premodify, false))
16926 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16928 addr = find_replacement (&XEXP (addr, 1));
16929 if (GET_CODE (addr) != PLUS)
16930 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16933 if (legitimate_indirect_address_p (addr, false) /* reg */
16934 || legitimate_indexed_address_p (addr, false) /* reg+reg */
16935 || (GET_CODE (addr) == AND /* Altivec memory */
16936 && rclass == ALTIVEC_REGS
16937 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16938 && INTVAL (XEXP (addr, 1)) == -16
16939 && (legitimate_indirect_address_p (XEXP (addr, 0), false)
16940 || legitimate_indexed_address_p (XEXP (addr, 0), false))))
16943 else if (GET_CODE (addr) == PLUS)
16945 addr_op1 = XEXP (addr, 0);
16946 addr_op2 = XEXP (addr, 1);
16947 if (!REG_P (addr_op1))
16948 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16950 if (TARGET_DEBUG_ADDR)
16952 fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ",
16953 rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
16954 debug_rtx (addr_op2);
16956 rs6000_emit_move (scratch, addr_op2, Pmode);
16957 emit_insn (gen_rtx_SET (VOIDmode,
16958 scratch_or_premodify,
16959 gen_rtx_PLUS (Pmode,
16960 addr_op1,
16961 scratch)));
16962 addr = scratch_or_premodify;
16963 scratch_or_premodify = scratch;
16966 else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
16967 || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
16968 || REG_P (addr))
16970 if (TARGET_DEBUG_ADDR)
16972 fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
16973 rs6000_reg_names[REGNO (scratch_or_premodify)],
16974 GET_MODE_NAME (mode));
16975 debug_rtx (addr);
16978 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
16979 addr = scratch_or_premodify;
16980 scratch_or_premodify = scratch;
16983 else
16984 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16986 break;
16988 default:
16989 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16992 /* If the original address involved a pre-modify that we couldn't use the VSX
16993 memory instruction with update, and we haven't taken care of already,
16994 store the address in the pre-modify register and use that as the
16995 address. */
16996 if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
16998 emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
16999 addr = scratch_or_premodify;
17002 /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
17003 memory instruction, recreate the AND now, including the clobber which is
17004 generated by the general ANDSI3/ANDDI3 patterns for the
17005 andi. instruction. */
17006 if (and_op2 != NULL_RTX)
17008 if (! legitimate_indirect_address_p (addr, false))
17010 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17011 addr = scratch;
17014 if (TARGET_DEBUG_ADDR)
17016 fprintf (stderr, "\nAnd addr to register %s, mode = %s: ",
17017 rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
17018 debug_rtx (and_op2);
17021 and_rtx = gen_rtx_SET (VOIDmode,
17022 scratch,
17023 gen_rtx_AND (Pmode,
17024 addr,
17025 and_op2));
17027 cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
17028 emit_insn (gen_rtx_PARALLEL (VOIDmode,
17029 gen_rtvec (2, and_rtx, cc_clobber)));
17030 addr = scratch;
17033 /* Adjust the address if it changed. */
17034 if (addr != XEXP (mem, 0))
17036 mem = replace_equiv_address_nv (mem, addr);
17037 if (TARGET_DEBUG_ADDR)
17038 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17041 /* Now create the move. */
17042 if (store_p)
17043 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17044 else
17045 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17047 return;
17050 /* Convert reloads involving 64-bit gprs and misaligned offset
17051 addressing, or multiple 32-bit gprs and offsets that are too large,
17052 to use indirect addressing. */
17054 void
17055 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17057 int regno = true_regnum (reg);
17058 enum reg_class rclass;
17059 rtx addr;
17060 rtx scratch_or_premodify = scratch;
17062 if (TARGET_DEBUG_ADDR)
17064 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17065 store_p ? "store" : "load");
17066 fprintf (stderr, "reg:\n");
17067 debug_rtx (reg);
17068 fprintf (stderr, "mem:\n");
17069 debug_rtx (mem);
17070 fprintf (stderr, "scratch:\n");
17071 debug_rtx (scratch);
17074 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17075 gcc_assert (GET_CODE (mem) == MEM);
17076 rclass = REGNO_REG_CLASS (regno);
17077 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17078 addr = XEXP (mem, 0);
17080 if (GET_CODE (addr) == PRE_MODIFY)
17082 scratch_or_premodify = XEXP (addr, 0);
17083 gcc_assert (REG_P (scratch_or_premodify));
17084 addr = XEXP (addr, 1);
17086 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17088 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17090 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17092 /* Now create the move. */
17093 if (store_p)
17094 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17095 else
17096 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17098 return;
17101 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17102 this function has any SDmode references. If we are on a power7 or later, we
17103 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17104 can load/store the value. */
17106 static void
17107 rs6000_alloc_sdmode_stack_slot (void)
17109 tree t;
17110 basic_block bb;
17111 gimple_stmt_iterator gsi;
17113 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17114 /* We use a different approach for dealing with the secondary
17115 memory in LRA. */
17116 if (ira_use_lra_p)
17117 return;
17119 if (TARGET_NO_SDMODE_STACK)
17120 return;
17122 FOR_EACH_BB_FN (bb, cfun)
17123 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17125 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17126 if (ret)
17128 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17129 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17130 SDmode, 0);
17131 return;
17135 /* Check for any SDmode parameters of the function. */
17136 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17138 if (TREE_TYPE (t) == error_mark_node)
17139 continue;
17141 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17142 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17144 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17145 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17146 SDmode, 0);
17147 return;
17152 static void
17153 rs6000_instantiate_decls (void)
17155 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17156 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17159 /* Given an rtx X being reloaded into a reg required to be
17160 in class CLASS, return the class of reg to actually use.
17161 In general this is just CLASS; but on some machines
17162 in some cases it is preferable to use a more restrictive class.
17164 On the RS/6000, we have to return NO_REGS when we want to reload a
17165 floating-point CONST_DOUBLE to force it to be copied to memory.
17167 We also don't want to reload integer values into floating-point
17168 registers if we can at all help it. In fact, this can
17169 cause reload to die, if it tries to generate a reload of CTR
17170 into a FP register and discovers it doesn't have the memory location
17171 required.
17173 ??? Would it be a good idea to have reload do the converse, that is
17174 try to reload floating modes into FP registers if possible?
17177 static enum reg_class
17178 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17180 enum machine_mode mode = GET_MODE (x);
17182 if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17183 return rclass;
17185 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
17186 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
17187 && easy_vector_constant (x, mode))
17188 return ALTIVEC_REGS;
17190 if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
17192 if (reg_class_subset_p (GENERAL_REGS, rclass))
17193 return GENERAL_REGS;
17194 if (reg_class_subset_p (BASE_REGS, rclass))
17195 return BASE_REGS;
17196 return NO_REGS;
17199 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17200 return GENERAL_REGS;
17202 /* For VSX, prefer the traditional registers for 64-bit values because we can
17203 use the non-VSX loads. Prefer the Altivec registers if Altivec is
17204 handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we
17205 prefer Altivec loads.. */
17206 if (rclass == VSX_REGS)
17208 if (MEM_P (x) && reg_addr[mode].scalar_in_vmx_p)
17210 rtx addr = XEXP (x, 0);
17211 if (rs6000_legitimate_offset_address_p (mode, addr, false, true)
17212 || legitimate_lo_sum_address_p (mode, addr, false))
17213 return FLOAT_REGS;
17215 else if (GET_MODE_SIZE (mode) <= 8 && !reg_addr[mode].scalar_in_vmx_p)
17216 return FLOAT_REGS;
17218 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17219 || mode == V1TImode)
17220 return ALTIVEC_REGS;
17222 return rclass;
17225 return rclass;
17228 /* Debug version of rs6000_preferred_reload_class. */
17229 static enum reg_class
17230 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17232 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17234 fprintf (stderr,
17235 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17236 "mode = %s, x:\n",
17237 reg_class_names[ret], reg_class_names[rclass],
17238 GET_MODE_NAME (GET_MODE (x)));
17239 debug_rtx (x);
17241 return ret;
17244 /* If we are copying between FP or AltiVec registers and anything else, we need
17245 a memory location. The exception is when we are targeting ppc64 and the
17246 move to/from fpr to gpr instructions are available. Also, under VSX, you
17247 can copy vector registers from the FP register set to the Altivec register
17248 set and vice versa. */
17250 static bool
17251 rs6000_secondary_memory_needed (enum reg_class from_class,
17252 enum reg_class to_class,
17253 enum machine_mode mode)
17255 enum rs6000_reg_type from_type, to_type;
17256 bool altivec_p = ((from_class == ALTIVEC_REGS)
17257 || (to_class == ALTIVEC_REGS));
17259 /* If a simple/direct move is available, we don't need secondary memory */
17260 from_type = reg_class_to_reg_type[(int)from_class];
17261 to_type = reg_class_to_reg_type[(int)to_class];
17263 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17264 (secondary_reload_info *)0, altivec_p))
17265 return false;
17267 /* If we have a floating point or vector register class, we need to use
17268 memory to transfer the data. */
17269 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17270 return true;
17272 return false;
17275 /* Debug version of rs6000_secondary_memory_needed. */
17276 static bool
17277 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17278 enum reg_class to_class,
17279 enum machine_mode mode)
17281 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17283 fprintf (stderr,
17284 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17285 "to_class = %s, mode = %s\n",
17286 ret ? "true" : "false",
17287 reg_class_names[from_class],
17288 reg_class_names[to_class],
17289 GET_MODE_NAME (mode));
17291 return ret;
17294 /* Return the register class of a scratch register needed to copy IN into
17295 or out of a register in RCLASS in MODE. If it can be done directly,
17296 NO_REGS is returned. */
17298 static enum reg_class
17299 rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
17300 rtx in)
17302 int regno;
17304 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17305 #if TARGET_MACHO
17306 && MACHOPIC_INDIRECT
17307 #endif
17310 /* We cannot copy a symbolic operand directly into anything
17311 other than BASE_REGS for TARGET_ELF. So indicate that a
17312 register from BASE_REGS is needed as an intermediate
17313 register.
17315 On Darwin, pic addresses require a load from memory, which
17316 needs a base register. */
17317 if (rclass != BASE_REGS
17318 && (GET_CODE (in) == SYMBOL_REF
17319 || GET_CODE (in) == HIGH
17320 || GET_CODE (in) == LABEL_REF
17321 || GET_CODE (in) == CONST))
17322 return BASE_REGS;
17325 if (GET_CODE (in) == REG)
17327 regno = REGNO (in);
17328 if (regno >= FIRST_PSEUDO_REGISTER)
17330 regno = true_regnum (in);
17331 if (regno >= FIRST_PSEUDO_REGISTER)
17332 regno = -1;
17335 else if (GET_CODE (in) == SUBREG)
17337 regno = true_regnum (in);
17338 if (regno >= FIRST_PSEUDO_REGISTER)
17339 regno = -1;
17341 else
17342 regno = -1;
17344 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17345 into anything. */
17346 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17347 || (regno >= 0 && INT_REGNO_P (regno)))
17348 return NO_REGS;
17350 /* Constants, memory, and FP registers can go into FP registers. */
17351 if ((regno == -1 || FP_REGNO_P (regno))
17352 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17353 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17355 /* Memory, and FP/altivec registers can go into fp/altivec registers under
17356 VSX. However, for scalar variables, use the traditional floating point
17357 registers so that we can use offset+register addressing. */
17358 if (TARGET_VSX
17359 && (regno == -1 || VSX_REGNO_P (regno))
17360 && VSX_REG_CLASS_P (rclass))
17362 if (GET_MODE_SIZE (mode) < 16)
17363 return FLOAT_REGS;
17365 return NO_REGS;
17368 /* Memory, and AltiVec registers can go into AltiVec registers. */
17369 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17370 && rclass == ALTIVEC_REGS)
17371 return NO_REGS;
17373 /* We can copy among the CR registers. */
17374 if ((rclass == CR_REGS || rclass == CR0_REGS)
17375 && regno >= 0 && CR_REGNO_P (regno))
17376 return NO_REGS;
17378 /* Otherwise, we need GENERAL_REGS. */
17379 return GENERAL_REGS;
17382 /* Debug version of rs6000_secondary_reload_class. */
17383 static enum reg_class
17384 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17385 enum machine_mode mode, rtx in)
17387 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17388 fprintf (stderr,
17389 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17390 "mode = %s, input rtx:\n",
17391 reg_class_names[ret], reg_class_names[rclass],
17392 GET_MODE_NAME (mode));
17393 debug_rtx (in);
17395 return ret;
17398 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17400 static bool
17401 rs6000_cannot_change_mode_class (enum machine_mode from,
17402 enum machine_mode to,
17403 enum reg_class rclass)
17405 unsigned from_size = GET_MODE_SIZE (from);
17406 unsigned to_size = GET_MODE_SIZE (to);
17408 if (from_size != to_size)
17410 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17412 if (reg_classes_intersect_p (xclass, rclass))
17414 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17415 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17417 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17418 single register under VSX because the scalar part of the register
17419 is in the upper 64-bits, and not the lower 64-bits. Types like
17420 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17421 IEEE floating point can't overlap, and neither can small
17422 values. */
17424 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17425 return true;
17427 /* TDmode in floating-mode registers must always go into a register
17428 pair with the most significant word in the even-numbered register
17429 to match ISA requirements. In little-endian mode, this does not
17430 match subreg numbering, so we cannot allow subregs. */
17431 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17432 return true;
17434 if (from_size < 8 || to_size < 8)
17435 return true;
17437 if (from_size == 8 && (8 * to_nregs) != to_size)
17438 return true;
17440 if (to_size == 8 && (8 * from_nregs) != from_size)
17441 return true;
17443 return false;
17445 else
17446 return false;
17449 if (TARGET_E500_DOUBLE
17450 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17451 || (((to) == TFmode) + ((from) == TFmode)) == 1
17452 || (((to) == DDmode) + ((from) == DDmode)) == 1
17453 || (((to) == TDmode) + ((from) == TDmode)) == 1
17454 || (((to) == DImode) + ((from) == DImode)) == 1))
17455 return true;
17457 /* Since the VSX register set includes traditional floating point registers
17458 and altivec registers, just check for the size being different instead of
17459 trying to check whether the modes are vector modes. Otherwise it won't
17460 allow say DF and DI to change classes. For types like TFmode and TDmode
17461 that take 2 64-bit registers, rather than a single 128-bit register, don't
17462 allow subregs of those types to other 128 bit types. */
17463 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17465 unsigned num_regs = (from_size + 15) / 16;
17466 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17467 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17468 return true;
17470 return (from_size != 8 && from_size != 16);
17473 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17474 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17475 return true;
17477 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17478 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17479 return true;
17481 return false;
17484 /* Debug version of rs6000_cannot_change_mode_class. */
17485 static bool
17486 rs6000_debug_cannot_change_mode_class (enum machine_mode from,
17487 enum machine_mode to,
17488 enum reg_class rclass)
17490 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17492 fprintf (stderr,
17493 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17494 "to = %s, rclass = %s\n",
17495 ret ? "true" : "false",
17496 GET_MODE_NAME (from), GET_MODE_NAME (to),
17497 reg_class_names[rclass]);
17499 return ret;
17502 /* Return a string to do a move operation of 128 bits of data. */
17504 const char *
17505 rs6000_output_move_128bit (rtx operands[])
17507 rtx dest = operands[0];
17508 rtx src = operands[1];
17509 enum machine_mode mode = GET_MODE (dest);
17510 int dest_regno;
17511 int src_regno;
17512 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17513 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17515 if (REG_P (dest))
17517 dest_regno = REGNO (dest);
17518 dest_gpr_p = INT_REGNO_P (dest_regno);
17519 dest_fp_p = FP_REGNO_P (dest_regno);
17520 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17521 dest_vsx_p = dest_fp_p | dest_vmx_p;
17523 else
17525 dest_regno = -1;
17526 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17529 if (REG_P (src))
17531 src_regno = REGNO (src);
17532 src_gpr_p = INT_REGNO_P (src_regno);
17533 src_fp_p = FP_REGNO_P (src_regno);
17534 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17535 src_vsx_p = src_fp_p | src_vmx_p;
17537 else
17539 src_regno = -1;
17540 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17543 /* Register moves. */
17544 if (dest_regno >= 0 && src_regno >= 0)
17546 if (dest_gpr_p)
17548 if (src_gpr_p)
17549 return "#";
17551 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
17552 return "#";
17555 else if (TARGET_VSX && dest_vsx_p)
17557 if (src_vsx_p)
17558 return "xxlor %x0,%x1,%x1";
17560 else if (TARGET_DIRECT_MOVE && src_gpr_p)
17561 return "#";
17564 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
17565 return "vor %0,%1,%1";
17567 else if (dest_fp_p && src_fp_p)
17568 return "#";
17571 /* Loads. */
17572 else if (dest_regno >= 0 && MEM_P (src))
17574 if (dest_gpr_p)
17576 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17577 return "lq %0,%1";
17578 else
17579 return "#";
17582 else if (TARGET_ALTIVEC && dest_vmx_p
17583 && altivec_indexed_or_indirect_operand (src, mode))
17584 return "lvx %0,%y1";
17586 else if (TARGET_VSX && dest_vsx_p)
17588 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17589 return "lxvw4x %x0,%y1";
17590 else
17591 return "lxvd2x %x0,%y1";
17594 else if (TARGET_ALTIVEC && dest_vmx_p)
17595 return "lvx %0,%y1";
17597 else if (dest_fp_p)
17598 return "#";
17601 /* Stores. */
17602 else if (src_regno >= 0 && MEM_P (dest))
17604 if (src_gpr_p)
17606 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17607 return "stq %1,%0";
17608 else
17609 return "#";
17612 else if (TARGET_ALTIVEC && src_vmx_p
17613 && altivec_indexed_or_indirect_operand (src, mode))
17614 return "stvx %1,%y0";
17616 else if (TARGET_VSX && src_vsx_p)
17618 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17619 return "stxvw4x %x1,%y0";
17620 else
17621 return "stxvd2x %x1,%y0";
17624 else if (TARGET_ALTIVEC && src_vmx_p)
17625 return "stvx %1,%y0";
17627 else if (src_fp_p)
17628 return "#";
17631 /* Constants. */
17632 else if (dest_regno >= 0
17633 && (GET_CODE (src) == CONST_INT
17634 || GET_CODE (src) == CONST_DOUBLE
17635 || GET_CODE (src) == CONST_VECTOR))
17637 if (dest_gpr_p)
17638 return "#";
17640 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
17641 return "xxlxor %x0,%x0,%x0";
17643 else if (TARGET_ALTIVEC && dest_vmx_p)
17644 return output_vec_const_move (operands);
17647 if (TARGET_DEBUG_ADDR)
17649 fprintf (stderr, "\n===== Bad 128 bit move:\n");
17650 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
17653 gcc_unreachable ();
17656 /* Validate a 128-bit move. */
17657 bool
17658 rs6000_move_128bit_ok_p (rtx operands[])
17660 enum machine_mode mode = GET_MODE (operands[0]);
17661 return (gpc_reg_operand (operands[0], mode)
17662 || gpc_reg_operand (operands[1], mode));
17665 /* Return true if a 128-bit move needs to be split. */
17666 bool
17667 rs6000_split_128bit_ok_p (rtx operands[])
17669 if (!reload_completed)
17670 return false;
17672 if (!gpr_or_gpr_p (operands[0], operands[1]))
17673 return false;
17675 if (quad_load_store_p (operands[0], operands[1]))
17676 return false;
17678 return true;
17682 /* Given a comparison operation, return the bit number in CCR to test. We
17683 know this is a valid comparison.
17685 SCC_P is 1 if this is for an scc. That means that %D will have been
17686 used instead of %C, so the bits will be in different places.
17688 Return -1 if OP isn't a valid comparison for some reason. */
17691 ccr_bit (rtx op, int scc_p)
17693 enum rtx_code code = GET_CODE (op);
17694 enum machine_mode cc_mode;
17695 int cc_regnum;
17696 int base_bit;
17697 rtx reg;
17699 if (!COMPARISON_P (op))
17700 return -1;
17702 reg = XEXP (op, 0);
17704 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
17706 cc_mode = GET_MODE (reg);
17707 cc_regnum = REGNO (reg);
17708 base_bit = 4 * (cc_regnum - CR0_REGNO);
17710 validate_condition_mode (code, cc_mode);
17712 /* When generating a sCOND operation, only positive conditions are
17713 allowed. */
17714 gcc_assert (!scc_p
17715 || code == EQ || code == GT || code == LT || code == UNORDERED
17716 || code == GTU || code == LTU);
17718 switch (code)
17720 case NE:
17721 return scc_p ? base_bit + 3 : base_bit + 2;
17722 case EQ:
17723 return base_bit + 2;
17724 case GT: case GTU: case UNLE:
17725 return base_bit + 1;
17726 case LT: case LTU: case UNGE:
17727 return base_bit;
17728 case ORDERED: case UNORDERED:
17729 return base_bit + 3;
17731 case GE: case GEU:
17732 /* If scc, we will have done a cror to put the bit in the
17733 unordered position. So test that bit. For integer, this is ! LT
17734 unless this is an scc insn. */
17735 return scc_p ? base_bit + 3 : base_bit;
17737 case LE: case LEU:
17738 return scc_p ? base_bit + 3 : base_bit + 1;
17740 default:
17741 gcc_unreachable ();
17745 /* Return the GOT register. */
17748 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
17750 /* The second flow pass currently (June 1999) can't update
17751 regs_ever_live without disturbing other parts of the compiler, so
17752 update it here to make the prolog/epilogue code happy. */
17753 if (!can_create_pseudo_p ()
17754 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
17755 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
17757 crtl->uses_pic_offset_table = 1;
17759 return pic_offset_table_rtx;
17762 static rs6000_stack_t stack_info;
17764 /* Function to init struct machine_function.
17765 This will be called, via a pointer variable,
17766 from push_function_context. */
17768 static struct machine_function *
17769 rs6000_init_machine_status (void)
17771 stack_info.reload_completed = 0;
17772 return ggc_alloc_cleared_machine_function ();
17775 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
17778 extract_MB (rtx op)
17780 int i;
17781 unsigned long val = INTVAL (op);
17783 /* If the high bit is zero, the value is the first 1 bit we find
17784 from the left. */
17785 if ((val & 0x80000000) == 0)
17787 gcc_assert (val & 0xffffffff);
17789 i = 1;
17790 while (((val <<= 1) & 0x80000000) == 0)
17791 ++i;
17792 return i;
17795 /* If the high bit is set and the low bit is not, or the mask is all
17796 1's, the value is zero. */
17797 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
17798 return 0;
17800 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
17801 from the right. */
17802 i = 31;
17803 while (((val >>= 1) & 1) != 0)
17804 --i;
17806 return i;
17810 extract_ME (rtx op)
17812 int i;
17813 unsigned long val = INTVAL (op);
17815 /* If the low bit is zero, the value is the first 1 bit we find from
17816 the right. */
17817 if ((val & 1) == 0)
17819 gcc_assert (val & 0xffffffff);
17821 i = 30;
17822 while (((val >>= 1) & 1) == 0)
17823 --i;
17825 return i;
17828 /* If the low bit is set and the high bit is not, or the mask is all
17829 1's, the value is 31. */
17830 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
17831 return 31;
17833 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
17834 from the left. */
17835 i = 0;
17836 while (((val <<= 1) & 0x80000000) != 0)
17837 ++i;
17839 return i;
17842 /* Locate some local-dynamic symbol still in use by this function
17843 so that we can print its name in some tls_ld pattern. */
17845 static const char *
17846 rs6000_get_some_local_dynamic_name (void)
17848 rtx insn;
17850 if (cfun->machine->some_ld_name)
17851 return cfun->machine->some_ld_name;
17853 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
17854 if (INSN_P (insn)
17855 && for_each_rtx (&PATTERN (insn),
17856 rs6000_get_some_local_dynamic_name_1, 0))
17857 return cfun->machine->some_ld_name;
17859 gcc_unreachable ();
17862 /* Helper function for rs6000_get_some_local_dynamic_name. */
17864 static int
17865 rs6000_get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
17867 rtx x = *px;
17869 if (GET_CODE (x) == SYMBOL_REF)
17871 const char *str = XSTR (x, 0);
17872 if (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
17874 cfun->machine->some_ld_name = str;
17875 return 1;
17879 return 0;
17882 /* Write out a function code label. */
17884 void
17885 rs6000_output_function_entry (FILE *file, const char *fname)
17887 if (fname[0] != '.')
17889 switch (DEFAULT_ABI)
17891 default:
17892 gcc_unreachable ();
17894 case ABI_AIX:
17895 if (DOT_SYMBOLS)
17896 putc ('.', file);
17897 else
17898 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
17899 break;
17901 case ABI_ELFv2:
17902 case ABI_V4:
17903 case ABI_DARWIN:
17904 break;
17908 RS6000_OUTPUT_BASENAME (file, fname);
17911 /* Print an operand. Recognize special options, documented below. */
17913 #if TARGET_ELF
17914 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
17915 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
17916 #else
17917 #define SMALL_DATA_RELOC "sda21"
17918 #define SMALL_DATA_REG 0
17919 #endif
17921 void
17922 print_operand (FILE *file, rtx x, int code)
17924 int i;
17925 unsigned HOST_WIDE_INT uval;
17927 switch (code)
17929 /* %a is output_address. */
17931 case 'b':
17932 /* If constant, low-order 16 bits of constant, unsigned.
17933 Otherwise, write normally. */
17934 if (INT_P (x))
17935 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
17936 else
17937 print_operand (file, x, 0);
17938 return;
17940 case 'B':
17941 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
17942 for 64-bit mask direction. */
17943 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
17944 return;
17946 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
17947 output_operand. */
17949 case 'D':
17950 /* Like 'J' but get to the GT bit only. */
17951 gcc_assert (REG_P (x));
17953 /* Bit 1 is GT bit. */
17954 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
17956 /* Add one for shift count in rlinm for scc. */
17957 fprintf (file, "%d", i + 1);
17958 return;
17960 case 'E':
17961 /* X is a CR register. Print the number of the EQ bit of the CR */
17962 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
17963 output_operand_lossage ("invalid %%E value");
17964 else
17965 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
17966 return;
17968 case 'f':
17969 /* X is a CR register. Print the shift count needed to move it
17970 to the high-order four bits. */
17971 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
17972 output_operand_lossage ("invalid %%f value");
17973 else
17974 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
17975 return;
17977 case 'F':
17978 /* Similar, but print the count for the rotate in the opposite
17979 direction. */
17980 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
17981 output_operand_lossage ("invalid %%F value");
17982 else
17983 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
17984 return;
17986 case 'G':
17987 /* X is a constant integer. If it is negative, print "m",
17988 otherwise print "z". This is to make an aze or ame insn. */
17989 if (GET_CODE (x) != CONST_INT)
17990 output_operand_lossage ("invalid %%G value");
17991 else if (INTVAL (x) >= 0)
17992 putc ('z', file);
17993 else
17994 putc ('m', file);
17995 return;
17997 case 'h':
17998 /* If constant, output low-order five bits. Otherwise, write
17999 normally. */
18000 if (INT_P (x))
18001 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18002 else
18003 print_operand (file, x, 0);
18004 return;
18006 case 'H':
18007 /* If constant, output low-order six bits. Otherwise, write
18008 normally. */
18009 if (INT_P (x))
18010 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18011 else
18012 print_operand (file, x, 0);
18013 return;
18015 case 'I':
18016 /* Print `i' if this is a constant, else nothing. */
18017 if (INT_P (x))
18018 putc ('i', file);
18019 return;
18021 case 'j':
18022 /* Write the bit number in CCR for jump. */
18023 i = ccr_bit (x, 0);
18024 if (i == -1)
18025 output_operand_lossage ("invalid %%j code");
18026 else
18027 fprintf (file, "%d", i);
18028 return;
18030 case 'J':
18031 /* Similar, but add one for shift count in rlinm for scc and pass
18032 scc flag to `ccr_bit'. */
18033 i = ccr_bit (x, 1);
18034 if (i == -1)
18035 output_operand_lossage ("invalid %%J code");
18036 else
18037 /* If we want bit 31, write a shift count of zero, not 32. */
18038 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18039 return;
18041 case 'k':
18042 /* X must be a constant. Write the 1's complement of the
18043 constant. */
18044 if (! INT_P (x))
18045 output_operand_lossage ("invalid %%k value");
18046 else
18047 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18048 return;
18050 case 'K':
18051 /* X must be a symbolic constant on ELF. Write an
18052 expression suitable for an 'addi' that adds in the low 16
18053 bits of the MEM. */
18054 if (GET_CODE (x) == CONST)
18056 if (GET_CODE (XEXP (x, 0)) != PLUS
18057 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18058 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18059 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18060 output_operand_lossage ("invalid %%K value");
18062 print_operand_address (file, x);
18063 fputs ("@l", file);
18064 return;
18066 /* %l is output_asm_label. */
18068 case 'L':
18069 /* Write second word of DImode or DFmode reference. Works on register
18070 or non-indexed memory only. */
18071 if (REG_P (x))
18072 fputs (reg_names[REGNO (x) + 1], file);
18073 else if (MEM_P (x))
18075 /* Handle possible auto-increment. Since it is pre-increment and
18076 we have already done it, we can just use an offset of word. */
18077 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18078 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18079 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18080 UNITS_PER_WORD));
18081 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18082 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18083 UNITS_PER_WORD));
18084 else
18085 output_address (XEXP (adjust_address_nv (x, SImode,
18086 UNITS_PER_WORD),
18087 0));
18089 if (small_data_operand (x, GET_MODE (x)))
18090 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18091 reg_names[SMALL_DATA_REG]);
18093 return;
18095 case 'm':
18096 /* MB value for a mask operand. */
18097 if (! mask_operand (x, SImode))
18098 output_operand_lossage ("invalid %%m value");
18100 fprintf (file, "%d", extract_MB (x));
18101 return;
18103 case 'M':
18104 /* ME value for a mask operand. */
18105 if (! mask_operand (x, SImode))
18106 output_operand_lossage ("invalid %%M value");
18108 fprintf (file, "%d", extract_ME (x));
18109 return;
18111 /* %n outputs the negative of its operand. */
18113 case 'N':
18114 /* Write the number of elements in the vector times 4. */
18115 if (GET_CODE (x) != PARALLEL)
18116 output_operand_lossage ("invalid %%N value");
18117 else
18118 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18119 return;
18121 case 'O':
18122 /* Similar, but subtract 1 first. */
18123 if (GET_CODE (x) != PARALLEL)
18124 output_operand_lossage ("invalid %%O value");
18125 else
18126 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18127 return;
18129 case 'p':
18130 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18131 if (! INT_P (x)
18132 || INTVAL (x) < 0
18133 || (i = exact_log2 (INTVAL (x))) < 0)
18134 output_operand_lossage ("invalid %%p value");
18135 else
18136 fprintf (file, "%d", i);
18137 return;
18139 case 'P':
18140 /* The operand must be an indirect memory reference. The result
18141 is the register name. */
18142 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18143 || REGNO (XEXP (x, 0)) >= 32)
18144 output_operand_lossage ("invalid %%P value");
18145 else
18146 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18147 return;
18149 case 'q':
18150 /* This outputs the logical code corresponding to a boolean
18151 expression. The expression may have one or both operands
18152 negated (if one, only the first one). For condition register
18153 logical operations, it will also treat the negated
18154 CR codes as NOTs, but not handle NOTs of them. */
18156 const char *const *t = 0;
18157 const char *s;
18158 enum rtx_code code = GET_CODE (x);
18159 static const char * const tbl[3][3] = {
18160 { "and", "andc", "nor" },
18161 { "or", "orc", "nand" },
18162 { "xor", "eqv", "xor" } };
18164 if (code == AND)
18165 t = tbl[0];
18166 else if (code == IOR)
18167 t = tbl[1];
18168 else if (code == XOR)
18169 t = tbl[2];
18170 else
18171 output_operand_lossage ("invalid %%q value");
18173 if (GET_CODE (XEXP (x, 0)) != NOT)
18174 s = t[0];
18175 else
18177 if (GET_CODE (XEXP (x, 1)) == NOT)
18178 s = t[2];
18179 else
18180 s = t[1];
18183 fputs (s, file);
18185 return;
18187 case 'Q':
18188 if (! TARGET_MFCRF)
18189 return;
18190 fputc (',', file);
18191 /* FALLTHRU */
18193 case 'R':
18194 /* X is a CR register. Print the mask for `mtcrf'. */
18195 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18196 output_operand_lossage ("invalid %%R value");
18197 else
18198 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18199 return;
18201 case 's':
18202 /* Low 5 bits of 32 - value */
18203 if (! INT_P (x))
18204 output_operand_lossage ("invalid %%s value");
18205 else
18206 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18207 return;
18209 case 'S':
18210 /* PowerPC64 mask position. All 0's is excluded.
18211 CONST_INT 32-bit mask is considered sign-extended so any
18212 transition must occur within the CONST_INT, not on the boundary. */
18213 if (! mask64_operand (x, DImode))
18214 output_operand_lossage ("invalid %%S value");
18216 uval = INTVAL (x);
18218 if (uval & 1) /* Clear Left */
18220 #if HOST_BITS_PER_WIDE_INT > 64
18221 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18222 #endif
18223 i = 64;
18225 else /* Clear Right */
18227 uval = ~uval;
18228 #if HOST_BITS_PER_WIDE_INT > 64
18229 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18230 #endif
18231 i = 63;
18233 while (uval != 0)
18234 --i, uval >>= 1;
18235 gcc_assert (i >= 0);
18236 fprintf (file, "%d", i);
18237 return;
18239 case 't':
18240 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18241 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18243 /* Bit 3 is OV bit. */
18244 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18246 /* If we want bit 31, write a shift count of zero, not 32. */
18247 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18248 return;
18250 case 'T':
18251 /* Print the symbolic name of a branch target register. */
18252 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18253 && REGNO (x) != CTR_REGNO))
18254 output_operand_lossage ("invalid %%T value");
18255 else if (REGNO (x) == LR_REGNO)
18256 fputs ("lr", file);
18257 else
18258 fputs ("ctr", file);
18259 return;
18261 case 'u':
18262 /* High-order 16 bits of constant for use in unsigned operand. */
18263 if (! INT_P (x))
18264 output_operand_lossage ("invalid %%u value");
18265 else
18266 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18267 (INTVAL (x) >> 16) & 0xffff);
18268 return;
18270 case 'v':
18271 /* High-order 16 bits of constant for use in signed operand. */
18272 if (! INT_P (x))
18273 output_operand_lossage ("invalid %%v value");
18274 else
18275 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18276 (INTVAL (x) >> 16) & 0xffff);
18277 return;
18279 case 'U':
18280 /* Print `u' if this has an auto-increment or auto-decrement. */
18281 if (MEM_P (x)
18282 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18283 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18284 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18285 putc ('u', file);
18286 return;
18288 case 'V':
18289 /* Print the trap code for this operand. */
18290 switch (GET_CODE (x))
18292 case EQ:
18293 fputs ("eq", file); /* 4 */
18294 break;
18295 case NE:
18296 fputs ("ne", file); /* 24 */
18297 break;
18298 case LT:
18299 fputs ("lt", file); /* 16 */
18300 break;
18301 case LE:
18302 fputs ("le", file); /* 20 */
18303 break;
18304 case GT:
18305 fputs ("gt", file); /* 8 */
18306 break;
18307 case GE:
18308 fputs ("ge", file); /* 12 */
18309 break;
18310 case LTU:
18311 fputs ("llt", file); /* 2 */
18312 break;
18313 case LEU:
18314 fputs ("lle", file); /* 6 */
18315 break;
18316 case GTU:
18317 fputs ("lgt", file); /* 1 */
18318 break;
18319 case GEU:
18320 fputs ("lge", file); /* 5 */
18321 break;
18322 default:
18323 gcc_unreachable ();
18325 break;
18327 case 'w':
18328 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18329 normally. */
18330 if (INT_P (x))
18331 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18332 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18333 else
18334 print_operand (file, x, 0);
18335 return;
18337 case 'W':
18338 /* MB value for a PowerPC64 rldic operand. */
18339 i = clz_hwi (INTVAL (x));
18341 fprintf (file, "%d", i);
18342 return;
18344 case 'x':
18345 /* X is a FPR or Altivec register used in a VSX context. */
18346 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18347 output_operand_lossage ("invalid %%x value");
18348 else
18350 int reg = REGNO (x);
18351 int vsx_reg = (FP_REGNO_P (reg)
18352 ? reg - 32
18353 : reg - FIRST_ALTIVEC_REGNO + 32);
18355 #ifdef TARGET_REGNAMES
18356 if (TARGET_REGNAMES)
18357 fprintf (file, "%%vs%d", vsx_reg);
18358 else
18359 #endif
18360 fprintf (file, "%d", vsx_reg);
18362 return;
18364 case 'X':
18365 if (MEM_P (x)
18366 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18367 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18368 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18369 putc ('x', file);
18370 return;
18372 case 'Y':
18373 /* Like 'L', for third word of TImode/PTImode */
18374 if (REG_P (x))
18375 fputs (reg_names[REGNO (x) + 2], file);
18376 else if (MEM_P (x))
18378 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18379 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18380 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18381 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18382 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18383 else
18384 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18385 if (small_data_operand (x, GET_MODE (x)))
18386 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18387 reg_names[SMALL_DATA_REG]);
18389 return;
18391 case 'z':
18392 /* X is a SYMBOL_REF. Write out the name preceded by a
18393 period and without any trailing data in brackets. Used for function
18394 names. If we are configured for System V (or the embedded ABI) on
18395 the PowerPC, do not emit the period, since those systems do not use
18396 TOCs and the like. */
18397 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18399 /* For macho, check to see if we need a stub. */
18400 if (TARGET_MACHO)
18402 const char *name = XSTR (x, 0);
18403 #if TARGET_MACHO
18404 if (darwin_emit_branch_islands
18405 && MACHOPIC_INDIRECT
18406 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18407 name = machopic_indirection_name (x, /*stub_p=*/true);
18408 #endif
18409 assemble_name (file, name);
18411 else if (!DOT_SYMBOLS)
18412 assemble_name (file, XSTR (x, 0));
18413 else
18414 rs6000_output_function_entry (file, XSTR (x, 0));
18415 return;
18417 case 'Z':
18418 /* Like 'L', for last word of TImode/PTImode. */
18419 if (REG_P (x))
18420 fputs (reg_names[REGNO (x) + 3], file);
18421 else if (MEM_P (x))
18423 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18424 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18425 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18426 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18427 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18428 else
18429 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18430 if (small_data_operand (x, GET_MODE (x)))
18431 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18432 reg_names[SMALL_DATA_REG]);
18434 return;
18436 /* Print AltiVec or SPE memory operand. */
18437 case 'y':
18439 rtx tmp;
18441 gcc_assert (MEM_P (x));
18443 tmp = XEXP (x, 0);
18445 /* Ugly hack because %y is overloaded. */
18446 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18447 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18448 || GET_MODE (x) == TFmode
18449 || GET_MODE (x) == TImode
18450 || GET_MODE (x) == PTImode))
18452 /* Handle [reg]. */
18453 if (REG_P (tmp))
18455 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18456 break;
18458 /* Handle [reg+UIMM]. */
18459 else if (GET_CODE (tmp) == PLUS &&
18460 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18462 int x;
18464 gcc_assert (REG_P (XEXP (tmp, 0)));
18466 x = INTVAL (XEXP (tmp, 1));
18467 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18468 break;
18471 /* Fall through. Must be [reg+reg]. */
18473 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18474 && GET_CODE (tmp) == AND
18475 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18476 && INTVAL (XEXP (tmp, 1)) == -16)
18477 tmp = XEXP (tmp, 0);
18478 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18479 && GET_CODE (tmp) == PRE_MODIFY)
18480 tmp = XEXP (tmp, 1);
18481 if (REG_P (tmp))
18482 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18483 else
18485 if (!GET_CODE (tmp) == PLUS
18486 || !REG_P (XEXP (tmp, 0))
18487 || !REG_P (XEXP (tmp, 1)))
18489 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18490 break;
18493 if (REGNO (XEXP (tmp, 0)) == 0)
18494 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18495 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18496 else
18497 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18498 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18500 break;
18503 case 0:
18504 if (REG_P (x))
18505 fprintf (file, "%s", reg_names[REGNO (x)]);
18506 else if (MEM_P (x))
18508 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18509 know the width from the mode. */
18510 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18511 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18512 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18513 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18514 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18515 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18516 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18517 output_address (XEXP (XEXP (x, 0), 1));
18518 else
18519 output_address (XEXP (x, 0));
18521 else
18523 if (toc_relative_expr_p (x, false))
18524 /* This hack along with a corresponding hack in
18525 rs6000_output_addr_const_extra arranges to output addends
18526 where the assembler expects to find them. eg.
18527 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18528 without this hack would be output as "x@toc+4". We
18529 want "x+4@toc". */
18530 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18531 else
18532 output_addr_const (file, x);
18534 return;
18536 case '&':
18537 assemble_name (file, rs6000_get_some_local_dynamic_name ());
18538 return;
18540 default:
18541 output_operand_lossage ("invalid %%xn code");
18545 /* Print the address of an operand. */
18547 void
18548 print_operand_address (FILE *file, rtx x)
18550 if (REG_P (x))
18551 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
18552 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
18553 || GET_CODE (x) == LABEL_REF)
18555 output_addr_const (file, x);
18556 if (small_data_operand (x, GET_MODE (x)))
18557 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18558 reg_names[SMALL_DATA_REG]);
18559 else
18560 gcc_assert (!TARGET_TOC);
18562 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18563 && REG_P (XEXP (x, 1)))
18565 if (REGNO (XEXP (x, 0)) == 0)
18566 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
18567 reg_names[ REGNO (XEXP (x, 0)) ]);
18568 else
18569 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
18570 reg_names[ REGNO (XEXP (x, 1)) ]);
18572 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18573 && GET_CODE (XEXP (x, 1)) == CONST_INT)
18574 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
18575 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
18576 #if TARGET_MACHO
18577 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18578 && CONSTANT_P (XEXP (x, 1)))
18580 fprintf (file, "lo16(");
18581 output_addr_const (file, XEXP (x, 1));
18582 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18584 #endif
18585 #if TARGET_ELF
18586 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18587 && CONSTANT_P (XEXP (x, 1)))
18589 output_addr_const (file, XEXP (x, 1));
18590 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18592 #endif
18593 else if (toc_relative_expr_p (x, false))
18595 /* This hack along with a corresponding hack in
18596 rs6000_output_addr_const_extra arranges to output addends
18597 where the assembler expects to find them. eg.
18598 (lo_sum (reg 9)
18599 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
18600 without this hack would be output as "x@toc+8@l(9)". We
18601 want "x+8@toc@l(9)". */
18602 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18603 if (GET_CODE (x) == LO_SUM)
18604 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
18605 else
18606 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
18608 else
18609 gcc_unreachable ();
18612 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
18614 static bool
18615 rs6000_output_addr_const_extra (FILE *file, rtx x)
18617 if (GET_CODE (x) == UNSPEC)
18618 switch (XINT (x, 1))
18620 case UNSPEC_TOCREL:
18621 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
18622 && REG_P (XVECEXP (x, 0, 1))
18623 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
18624 output_addr_const (file, XVECEXP (x, 0, 0));
18625 if (x == tocrel_base && tocrel_offset != const0_rtx)
18627 if (INTVAL (tocrel_offset) >= 0)
18628 fprintf (file, "+");
18629 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
18631 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
18633 putc ('-', file);
18634 assemble_name (file, toc_label_name);
18636 else if (TARGET_ELF)
18637 fputs ("@toc", file);
18638 return true;
18640 #if TARGET_MACHO
18641 case UNSPEC_MACHOPIC_OFFSET:
18642 output_addr_const (file, XVECEXP (x, 0, 0));
18643 putc ('-', file);
18644 machopic_output_function_base_name (file);
18645 return true;
18646 #endif
18648 return false;
18651 /* Target hook for assembling integer objects. The PowerPC version has
18652 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
18653 is defined. It also needs to handle DI-mode objects on 64-bit
18654 targets. */
18656 static bool
18657 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
18659 #ifdef RELOCATABLE_NEEDS_FIXUP
18660 /* Special handling for SI values. */
18661 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
18663 static int recurse = 0;
18665 /* For -mrelocatable, we mark all addresses that need to be fixed up in
18666 the .fixup section. Since the TOC section is already relocated, we
18667 don't need to mark it here. We used to skip the text section, but it
18668 should never be valid for relocated addresses to be placed in the text
18669 section. */
18670 if (TARGET_RELOCATABLE
18671 && in_section != toc_section
18672 && !recurse
18673 && GET_CODE (x) != CONST_INT
18674 && GET_CODE (x) != CONST_DOUBLE
18675 && CONSTANT_P (x))
18677 char buf[256];
18679 recurse = 1;
18680 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
18681 fixuplabelno++;
18682 ASM_OUTPUT_LABEL (asm_out_file, buf);
18683 fprintf (asm_out_file, "\t.long\t(");
18684 output_addr_const (asm_out_file, x);
18685 fprintf (asm_out_file, ")@fixup\n");
18686 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
18687 ASM_OUTPUT_ALIGN (asm_out_file, 2);
18688 fprintf (asm_out_file, "\t.long\t");
18689 assemble_name (asm_out_file, buf);
18690 fprintf (asm_out_file, "\n\t.previous\n");
18691 recurse = 0;
18692 return true;
18694 /* Remove initial .'s to turn a -mcall-aixdesc function
18695 address into the address of the descriptor, not the function
18696 itself. */
18697 else if (GET_CODE (x) == SYMBOL_REF
18698 && XSTR (x, 0)[0] == '.'
18699 && DEFAULT_ABI == ABI_AIX)
18701 const char *name = XSTR (x, 0);
18702 while (*name == '.')
18703 name++;
18705 fprintf (asm_out_file, "\t.long\t%s\n", name);
18706 return true;
18709 #endif /* RELOCATABLE_NEEDS_FIXUP */
18710 return default_assemble_integer (x, size, aligned_p);
18713 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
18714 /* Emit an assembler directive to set symbol visibility for DECL to
18715 VISIBILITY_TYPE. */
18717 static void
18718 rs6000_assemble_visibility (tree decl, int vis)
18720 if (TARGET_XCOFF)
18721 return;
18723 /* Functions need to have their entry point symbol visibility set as
18724 well as their descriptor symbol visibility. */
18725 if (DEFAULT_ABI == ABI_AIX
18726 && DOT_SYMBOLS
18727 && TREE_CODE (decl) == FUNCTION_DECL)
18729 static const char * const visibility_types[] = {
18730 NULL, "internal", "hidden", "protected"
18733 const char *name, *type;
18735 name = ((* targetm.strip_name_encoding)
18736 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
18737 type = visibility_types[vis];
18739 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
18740 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
18742 else
18743 default_assemble_visibility (decl, vis);
18745 #endif
18747 enum rtx_code
18748 rs6000_reverse_condition (enum machine_mode mode, enum rtx_code code)
18750 /* Reversal of FP compares takes care -- an ordered compare
18751 becomes an unordered compare and vice versa. */
18752 if (mode == CCFPmode
18753 && (!flag_finite_math_only
18754 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
18755 || code == UNEQ || code == LTGT))
18756 return reverse_condition_maybe_unordered (code);
18757 else
18758 return reverse_condition (code);
18761 /* Generate a compare for CODE. Return a brand-new rtx that
18762 represents the result of the compare. */
18764 static rtx
18765 rs6000_generate_compare (rtx cmp, enum machine_mode mode)
18767 enum machine_mode comp_mode;
18768 rtx compare_result;
18769 enum rtx_code code = GET_CODE (cmp);
18770 rtx op0 = XEXP (cmp, 0);
18771 rtx op1 = XEXP (cmp, 1);
18773 if (FLOAT_MODE_P (mode))
18774 comp_mode = CCFPmode;
18775 else if (code == GTU || code == LTU
18776 || code == GEU || code == LEU)
18777 comp_mode = CCUNSmode;
18778 else if ((code == EQ || code == NE)
18779 && unsigned_reg_p (op0)
18780 && (unsigned_reg_p (op1)
18781 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
18782 /* These are unsigned values, perhaps there will be a later
18783 ordering compare that can be shared with this one. */
18784 comp_mode = CCUNSmode;
18785 else
18786 comp_mode = CCmode;
18788 /* If we have an unsigned compare, make sure we don't have a signed value as
18789 an immediate. */
18790 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
18791 && INTVAL (op1) < 0)
18793 op0 = copy_rtx_if_shared (op0);
18794 op1 = force_reg (GET_MODE (op0), op1);
18795 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
18798 /* First, the compare. */
18799 compare_result = gen_reg_rtx (comp_mode);
18801 /* E500 FP compare instructions on the GPRs. Yuck! */
18802 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
18803 && FLOAT_MODE_P (mode))
18805 rtx cmp, or_result, compare_result2;
18806 enum machine_mode op_mode = GET_MODE (op0);
18807 bool reverse_p;
18809 if (op_mode == VOIDmode)
18810 op_mode = GET_MODE (op1);
18812 /* First reverse the condition codes that aren't directly supported. */
18813 switch (code)
18815 case NE:
18816 case UNLT:
18817 case UNLE:
18818 case UNGT:
18819 case UNGE:
18820 code = reverse_condition_maybe_unordered (code);
18821 reverse_p = true;
18822 break;
18824 case EQ:
18825 case LT:
18826 case LE:
18827 case GT:
18828 case GE:
18829 reverse_p = false;
18830 break;
18832 default:
18833 gcc_unreachable ();
18836 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
18837 This explains the following mess. */
18839 switch (code)
18841 case EQ:
18842 switch (op_mode)
18844 case SFmode:
18845 cmp = (flag_finite_math_only && !flag_trapping_math)
18846 ? gen_tstsfeq_gpr (compare_result, op0, op1)
18847 : gen_cmpsfeq_gpr (compare_result, op0, op1);
18848 break;
18850 case DFmode:
18851 cmp = (flag_finite_math_only && !flag_trapping_math)
18852 ? gen_tstdfeq_gpr (compare_result, op0, op1)
18853 : gen_cmpdfeq_gpr (compare_result, op0, op1);
18854 break;
18856 case TFmode:
18857 cmp = (flag_finite_math_only && !flag_trapping_math)
18858 ? gen_tsttfeq_gpr (compare_result, op0, op1)
18859 : gen_cmptfeq_gpr (compare_result, op0, op1);
18860 break;
18862 default:
18863 gcc_unreachable ();
18865 break;
18867 case GT:
18868 case GE:
18869 switch (op_mode)
18871 case SFmode:
18872 cmp = (flag_finite_math_only && !flag_trapping_math)
18873 ? gen_tstsfgt_gpr (compare_result, op0, op1)
18874 : gen_cmpsfgt_gpr (compare_result, op0, op1);
18875 break;
18877 case DFmode:
18878 cmp = (flag_finite_math_only && !flag_trapping_math)
18879 ? gen_tstdfgt_gpr (compare_result, op0, op1)
18880 : gen_cmpdfgt_gpr (compare_result, op0, op1);
18881 break;
18883 case TFmode:
18884 cmp = (flag_finite_math_only && !flag_trapping_math)
18885 ? gen_tsttfgt_gpr (compare_result, op0, op1)
18886 : gen_cmptfgt_gpr (compare_result, op0, op1);
18887 break;
18889 default:
18890 gcc_unreachable ();
18892 break;
18894 case LT:
18895 case LE:
18896 switch (op_mode)
18898 case SFmode:
18899 cmp = (flag_finite_math_only && !flag_trapping_math)
18900 ? gen_tstsflt_gpr (compare_result, op0, op1)
18901 : gen_cmpsflt_gpr (compare_result, op0, op1);
18902 break;
18904 case DFmode:
18905 cmp = (flag_finite_math_only && !flag_trapping_math)
18906 ? gen_tstdflt_gpr (compare_result, op0, op1)
18907 : gen_cmpdflt_gpr (compare_result, op0, op1);
18908 break;
18910 case TFmode:
18911 cmp = (flag_finite_math_only && !flag_trapping_math)
18912 ? gen_tsttflt_gpr (compare_result, op0, op1)
18913 : gen_cmptflt_gpr (compare_result, op0, op1);
18914 break;
18916 default:
18917 gcc_unreachable ();
18919 break;
18921 default:
18922 gcc_unreachable ();
18925 /* Synthesize LE and GE from LT/GT || EQ. */
18926 if (code == LE || code == GE)
18928 emit_insn (cmp);
18930 compare_result2 = gen_reg_rtx (CCFPmode);
18932 /* Do the EQ. */
18933 switch (op_mode)
18935 case SFmode:
18936 cmp = (flag_finite_math_only && !flag_trapping_math)
18937 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
18938 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
18939 break;
18941 case DFmode:
18942 cmp = (flag_finite_math_only && !flag_trapping_math)
18943 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
18944 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
18945 break;
18947 case TFmode:
18948 cmp = (flag_finite_math_only && !flag_trapping_math)
18949 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
18950 : gen_cmptfeq_gpr (compare_result2, op0, op1);
18951 break;
18953 default:
18954 gcc_unreachable ();
18957 emit_insn (cmp);
18959 /* OR them together. */
18960 or_result = gen_reg_rtx (CCFPmode);
18961 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
18962 compare_result2);
18963 compare_result = or_result;
18966 code = reverse_p ? NE : EQ;
18968 emit_insn (cmp);
18970 else
18972 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
18973 CLOBBERs to match cmptf_internal2 pattern. */
18974 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
18975 && GET_MODE (op0) == TFmode
18976 && !TARGET_IEEEQUAD
18977 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
18978 emit_insn (gen_rtx_PARALLEL (VOIDmode,
18979 gen_rtvec (10,
18980 gen_rtx_SET (VOIDmode,
18981 compare_result,
18982 gen_rtx_COMPARE (comp_mode, op0, op1)),
18983 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18984 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18985 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18986 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18987 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18988 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18989 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18990 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
18991 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
18992 else if (GET_CODE (op1) == UNSPEC
18993 && XINT (op1, 1) == UNSPEC_SP_TEST)
18995 rtx op1b = XVECEXP (op1, 0, 0);
18996 comp_mode = CCEQmode;
18997 compare_result = gen_reg_rtx (CCEQmode);
18998 if (TARGET_64BIT)
18999 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19000 else
19001 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19003 else
19004 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19005 gen_rtx_COMPARE (comp_mode, op0, op1)));
19008 /* Some kinds of FP comparisons need an OR operation;
19009 under flag_finite_math_only we don't bother. */
19010 if (FLOAT_MODE_P (mode)
19011 && !flag_finite_math_only
19012 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19013 && (code == LE || code == GE
19014 || code == UNEQ || code == LTGT
19015 || code == UNGT || code == UNLT))
19017 enum rtx_code or1, or2;
19018 rtx or1_rtx, or2_rtx, compare2_rtx;
19019 rtx or_result = gen_reg_rtx (CCEQmode);
19021 switch (code)
19023 case LE: or1 = LT; or2 = EQ; break;
19024 case GE: or1 = GT; or2 = EQ; break;
19025 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19026 case LTGT: or1 = LT; or2 = GT; break;
19027 case UNGT: or1 = UNORDERED; or2 = GT; break;
19028 case UNLT: or1 = UNORDERED; or2 = LT; break;
19029 default: gcc_unreachable ();
19031 validate_condition_mode (or1, comp_mode);
19032 validate_condition_mode (or2, comp_mode);
19033 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19034 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19035 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19036 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19037 const_true_rtx);
19038 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19040 compare_result = or_result;
19041 code = EQ;
19044 validate_condition_mode (code, GET_MODE (compare_result));
19046 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19050 /* Emit the RTL for an sISEL pattern. */
19052 void
19053 rs6000_emit_sISEL (enum machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19055 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19058 void
19059 rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
19061 rtx condition_rtx;
19062 enum machine_mode op_mode;
19063 enum rtx_code cond_code;
19064 rtx result = operands[0];
19066 if (TARGET_ISEL && (mode == SImode || mode == DImode))
19068 rs6000_emit_sISEL (mode, operands);
19069 return;
19072 condition_rtx = rs6000_generate_compare (operands[1], mode);
19073 cond_code = GET_CODE (condition_rtx);
19075 if (FLOAT_MODE_P (mode)
19076 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19078 rtx t;
19080 PUT_MODE (condition_rtx, SImode);
19081 t = XEXP (condition_rtx, 0);
19083 gcc_assert (cond_code == NE || cond_code == EQ);
19085 if (cond_code == NE)
19086 emit_insn (gen_e500_flip_gt_bit (t, t));
19088 emit_insn (gen_move_from_CR_gt_bit (result, t));
19089 return;
19092 if (cond_code == NE
19093 || cond_code == GE || cond_code == LE
19094 || cond_code == GEU || cond_code == LEU
19095 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19097 rtx not_result = gen_reg_rtx (CCEQmode);
19098 rtx not_op, rev_cond_rtx;
19099 enum machine_mode cc_mode;
19101 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19103 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19104 SImode, XEXP (condition_rtx, 0), const0_rtx);
19105 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19106 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19107 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19110 op_mode = GET_MODE (XEXP (operands[1], 0));
19111 if (op_mode == VOIDmode)
19112 op_mode = GET_MODE (XEXP (operands[1], 1));
19114 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19116 PUT_MODE (condition_rtx, DImode);
19117 convert_move (result, condition_rtx, 0);
19119 else
19121 PUT_MODE (condition_rtx, SImode);
19122 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19126 /* Emit a branch of kind CODE to location LOC. */
19128 void
19129 rs6000_emit_cbranch (enum machine_mode mode, rtx operands[])
19131 rtx condition_rtx, loc_ref;
19133 condition_rtx = rs6000_generate_compare (operands[0], mode);
19134 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19135 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19136 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19137 loc_ref, pc_rtx)));
19140 /* Return the string to output a conditional branch to LABEL, which is
19141 the operand template of the label, or NULL if the branch is really a
19142 conditional return.
19144 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19145 condition code register and its mode specifies what kind of
19146 comparison we made.
19148 REVERSED is nonzero if we should reverse the sense of the comparison.
19150 INSN is the insn. */
19152 char *
19153 output_cbranch (rtx op, const char *label, int reversed, rtx insn)
19155 static char string[64];
19156 enum rtx_code code = GET_CODE (op);
19157 rtx cc_reg = XEXP (op, 0);
19158 enum machine_mode mode = GET_MODE (cc_reg);
19159 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19160 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19161 int really_reversed = reversed ^ need_longbranch;
19162 char *s = string;
19163 const char *ccode;
19164 const char *pred;
19165 rtx note;
19167 validate_condition_mode (code, mode);
19169 /* Work out which way this really branches. We could use
19170 reverse_condition_maybe_unordered here always but this
19171 makes the resulting assembler clearer. */
19172 if (really_reversed)
19174 /* Reversal of FP compares takes care -- an ordered compare
19175 becomes an unordered compare and vice versa. */
19176 if (mode == CCFPmode)
19177 code = reverse_condition_maybe_unordered (code);
19178 else
19179 code = reverse_condition (code);
19182 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19184 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19185 to the GT bit. */
19186 switch (code)
19188 case EQ:
19189 /* Opposite of GT. */
19190 code = GT;
19191 break;
19193 case NE:
19194 code = UNLE;
19195 break;
19197 default:
19198 gcc_unreachable ();
19202 switch (code)
19204 /* Not all of these are actually distinct opcodes, but
19205 we distinguish them for clarity of the resulting assembler. */
19206 case NE: case LTGT:
19207 ccode = "ne"; break;
19208 case EQ: case UNEQ:
19209 ccode = "eq"; break;
19210 case GE: case GEU:
19211 ccode = "ge"; break;
19212 case GT: case GTU: case UNGT:
19213 ccode = "gt"; break;
19214 case LE: case LEU:
19215 ccode = "le"; break;
19216 case LT: case LTU: case UNLT:
19217 ccode = "lt"; break;
19218 case UNORDERED: ccode = "un"; break;
19219 case ORDERED: ccode = "nu"; break;
19220 case UNGE: ccode = "nl"; break;
19221 case UNLE: ccode = "ng"; break;
19222 default:
19223 gcc_unreachable ();
19226 /* Maybe we have a guess as to how likely the branch is. */
19227 pred = "";
19228 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19229 if (note != NULL_RTX)
19231 /* PROB is the difference from 50%. */
19232 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19234 /* Only hint for highly probable/improbable branches on newer
19235 cpus as static prediction overrides processor dynamic
19236 prediction. For older cpus we may as well always hint, but
19237 assume not taken for branches that are very close to 50% as a
19238 mispredicted taken branch is more expensive than a
19239 mispredicted not-taken branch. */
19240 if (rs6000_always_hint
19241 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19242 && br_prob_note_reliable_p (note)))
19244 if (abs (prob) > REG_BR_PROB_BASE / 20
19245 && ((prob > 0) ^ need_longbranch))
19246 pred = "+";
19247 else
19248 pred = "-";
19252 if (label == NULL)
19253 s += sprintf (s, "b%slr%s ", ccode, pred);
19254 else
19255 s += sprintf (s, "b%s%s ", ccode, pred);
19257 /* We need to escape any '%' characters in the reg_names string.
19258 Assume they'd only be the first character.... */
19259 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19260 *s++ = '%';
19261 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19263 if (label != NULL)
19265 /* If the branch distance was too far, we may have to use an
19266 unconditional branch to go the distance. */
19267 if (need_longbranch)
19268 s += sprintf (s, ",$+8\n\tb %s", label);
19269 else
19270 s += sprintf (s, ",%s", label);
19273 return string;
19276 /* Return the string to flip the GT bit on a CR. */
19277 char *
19278 output_e500_flip_gt_bit (rtx dst, rtx src)
19280 static char string[64];
19281 int a, b;
19283 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19284 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19286 /* GT bit. */
19287 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19288 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19290 sprintf (string, "crnot %d,%d", a, b);
19291 return string;
19294 /* Return insn for VSX or Altivec comparisons. */
19296 static rtx
19297 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19299 rtx mask;
19300 enum machine_mode mode = GET_MODE (op0);
19302 switch (code)
19304 default:
19305 break;
19307 case GE:
19308 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19309 return NULL_RTX;
19311 case EQ:
19312 case GT:
19313 case GTU:
19314 case ORDERED:
19315 case UNORDERED:
19316 case UNEQ:
19317 case LTGT:
19318 mask = gen_reg_rtx (mode);
19319 emit_insn (gen_rtx_SET (VOIDmode,
19320 mask,
19321 gen_rtx_fmt_ee (code, mode, op0, op1)));
19322 return mask;
19325 return NULL_RTX;
19328 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19329 DMODE is expected destination mode. This is a recursive function. */
19331 static rtx
19332 rs6000_emit_vector_compare (enum rtx_code rcode,
19333 rtx op0, rtx op1,
19334 enum machine_mode dmode)
19336 rtx mask;
19337 bool swap_operands = false;
19338 bool try_again = false;
19340 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19341 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19343 /* See if the comparison works as is. */
19344 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19345 if (mask)
19346 return mask;
19348 switch (rcode)
19350 case LT:
19351 rcode = GT;
19352 swap_operands = true;
19353 try_again = true;
19354 break;
19355 case LTU:
19356 rcode = GTU;
19357 swap_operands = true;
19358 try_again = true;
19359 break;
19360 case NE:
19361 case UNLE:
19362 case UNLT:
19363 case UNGE:
19364 case UNGT:
19365 /* Invert condition and try again.
19366 e.g., A != B becomes ~(A==B). */
19368 enum rtx_code rev_code;
19369 enum insn_code nor_code;
19370 rtx mask2;
19372 rev_code = reverse_condition_maybe_unordered (rcode);
19373 if (rev_code == UNKNOWN)
19374 return NULL_RTX;
19376 nor_code = optab_handler (one_cmpl_optab, dmode);
19377 if (nor_code == CODE_FOR_nothing)
19378 return NULL_RTX;
19380 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19381 if (!mask2)
19382 return NULL_RTX;
19384 mask = gen_reg_rtx (dmode);
19385 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19386 return mask;
19388 break;
19389 case GE:
19390 case GEU:
19391 case LE:
19392 case LEU:
19393 /* Try GT/GTU/LT/LTU OR EQ */
19395 rtx c_rtx, eq_rtx;
19396 enum insn_code ior_code;
19397 enum rtx_code new_code;
19399 switch (rcode)
19401 case GE:
19402 new_code = GT;
19403 break;
19405 case GEU:
19406 new_code = GTU;
19407 break;
19409 case LE:
19410 new_code = LT;
19411 break;
19413 case LEU:
19414 new_code = LTU;
19415 break;
19417 default:
19418 gcc_unreachable ();
19421 ior_code = optab_handler (ior_optab, dmode);
19422 if (ior_code == CODE_FOR_nothing)
19423 return NULL_RTX;
19425 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19426 if (!c_rtx)
19427 return NULL_RTX;
19429 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19430 if (!eq_rtx)
19431 return NULL_RTX;
19433 mask = gen_reg_rtx (dmode);
19434 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19435 return mask;
19437 break;
19438 default:
19439 return NULL_RTX;
19442 if (try_again)
19444 if (swap_operands)
19446 rtx tmp;
19447 tmp = op0;
19448 op0 = op1;
19449 op1 = tmp;
19452 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19453 if (mask)
19454 return mask;
19457 /* You only get two chances. */
19458 return NULL_RTX;
19461 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19462 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19463 operands for the relation operation COND. */
19466 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19467 rtx cond, rtx cc_op0, rtx cc_op1)
19469 enum machine_mode dest_mode = GET_MODE (dest);
19470 enum machine_mode mask_mode = GET_MODE (cc_op0);
19471 enum rtx_code rcode = GET_CODE (cond);
19472 enum machine_mode cc_mode = CCmode;
19473 rtx mask;
19474 rtx cond2;
19475 rtx tmp;
19476 bool invert_move = false;
19478 if (VECTOR_UNIT_NONE_P (dest_mode))
19479 return 0;
19481 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19482 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19484 switch (rcode)
19486 /* Swap operands if we can, and fall back to doing the operation as
19487 specified, and doing a NOR to invert the test. */
19488 case NE:
19489 case UNLE:
19490 case UNLT:
19491 case UNGE:
19492 case UNGT:
19493 /* Invert condition and try again.
19494 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19495 invert_move = true;
19496 rcode = reverse_condition_maybe_unordered (rcode);
19497 if (rcode == UNKNOWN)
19498 return 0;
19499 break;
19501 /* Mark unsigned tests with CCUNSmode. */
19502 case GTU:
19503 case GEU:
19504 case LTU:
19505 case LEU:
19506 cc_mode = CCUNSmode;
19507 break;
19509 default:
19510 break;
19513 /* Get the vector mask for the given relational operations. */
19514 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19516 if (!mask)
19517 return 0;
19519 if (invert_move)
19521 tmp = op_true;
19522 op_true = op_false;
19523 op_false = tmp;
19526 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19527 CONST0_RTX (dest_mode));
19528 emit_insn (gen_rtx_SET (VOIDmode,
19529 dest,
19530 gen_rtx_IF_THEN_ELSE (dest_mode,
19531 cond2,
19532 op_true,
19533 op_false)));
19534 return 1;
19537 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
19538 operands of the last comparison is nonzero/true, FALSE_COND if it
19539 is zero/false. Return 0 if the hardware has no such operation. */
19542 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19544 enum rtx_code code = GET_CODE (op);
19545 rtx op0 = XEXP (op, 0);
19546 rtx op1 = XEXP (op, 1);
19547 REAL_VALUE_TYPE c1;
19548 enum machine_mode compare_mode = GET_MODE (op0);
19549 enum machine_mode result_mode = GET_MODE (dest);
19550 rtx temp;
19551 bool is_against_zero;
19553 /* These modes should always match. */
19554 if (GET_MODE (op1) != compare_mode
19555 /* In the isel case however, we can use a compare immediate, so
19556 op1 may be a small constant. */
19557 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
19558 return 0;
19559 if (GET_MODE (true_cond) != result_mode)
19560 return 0;
19561 if (GET_MODE (false_cond) != result_mode)
19562 return 0;
19564 /* Don't allow using floating point comparisons for integer results for
19565 now. */
19566 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
19567 return 0;
19569 /* First, work out if the hardware can do this at all, or
19570 if it's too slow.... */
19571 if (!FLOAT_MODE_P (compare_mode))
19573 if (TARGET_ISEL)
19574 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
19575 return 0;
19577 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
19578 && SCALAR_FLOAT_MODE_P (compare_mode))
19579 return 0;
19581 is_against_zero = op1 == CONST0_RTX (compare_mode);
19583 /* A floating-point subtract might overflow, underflow, or produce
19584 an inexact result, thus changing the floating-point flags, so it
19585 can't be generated if we care about that. It's safe if one side
19586 of the construct is zero, since then no subtract will be
19587 generated. */
19588 if (SCALAR_FLOAT_MODE_P (compare_mode)
19589 && flag_trapping_math && ! is_against_zero)
19590 return 0;
19592 /* Eliminate half of the comparisons by switching operands, this
19593 makes the remaining code simpler. */
19594 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
19595 || code == LTGT || code == LT || code == UNLE)
19597 code = reverse_condition_maybe_unordered (code);
19598 temp = true_cond;
19599 true_cond = false_cond;
19600 false_cond = temp;
19603 /* UNEQ and LTGT take four instructions for a comparison with zero,
19604 it'll probably be faster to use a branch here too. */
19605 if (code == UNEQ && HONOR_NANS (compare_mode))
19606 return 0;
19608 if (GET_CODE (op1) == CONST_DOUBLE)
19609 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
19611 /* We're going to try to implement comparisons by performing
19612 a subtract, then comparing against zero. Unfortunately,
19613 Inf - Inf is NaN which is not zero, and so if we don't
19614 know that the operand is finite and the comparison
19615 would treat EQ different to UNORDERED, we can't do it. */
19616 if (HONOR_INFINITIES (compare_mode)
19617 && code != GT && code != UNGE
19618 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
19619 /* Constructs of the form (a OP b ? a : b) are safe. */
19620 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
19621 || (! rtx_equal_p (op0, true_cond)
19622 && ! rtx_equal_p (op1, true_cond))))
19623 return 0;
19625 /* At this point we know we can use fsel. */
19627 /* Reduce the comparison to a comparison against zero. */
19628 if (! is_against_zero)
19630 temp = gen_reg_rtx (compare_mode);
19631 emit_insn (gen_rtx_SET (VOIDmode, temp,
19632 gen_rtx_MINUS (compare_mode, op0, op1)));
19633 op0 = temp;
19634 op1 = CONST0_RTX (compare_mode);
19637 /* If we don't care about NaNs we can reduce some of the comparisons
19638 down to faster ones. */
19639 if (! HONOR_NANS (compare_mode))
19640 switch (code)
19642 case GT:
19643 code = LE;
19644 temp = true_cond;
19645 true_cond = false_cond;
19646 false_cond = temp;
19647 break;
19648 case UNGE:
19649 code = GE;
19650 break;
19651 case UNEQ:
19652 code = EQ;
19653 break;
19654 default:
19655 break;
19658 /* Now, reduce everything down to a GE. */
19659 switch (code)
19661 case GE:
19662 break;
19664 case LE:
19665 temp = gen_reg_rtx (compare_mode);
19666 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
19667 op0 = temp;
19668 break;
19670 case ORDERED:
19671 temp = gen_reg_rtx (compare_mode);
19672 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
19673 op0 = temp;
19674 break;
19676 case EQ:
19677 temp = gen_reg_rtx (compare_mode);
19678 emit_insn (gen_rtx_SET (VOIDmode, temp,
19679 gen_rtx_NEG (compare_mode,
19680 gen_rtx_ABS (compare_mode, op0))));
19681 op0 = temp;
19682 break;
19684 case UNGE:
19685 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
19686 temp = gen_reg_rtx (result_mode);
19687 emit_insn (gen_rtx_SET (VOIDmode, temp,
19688 gen_rtx_IF_THEN_ELSE (result_mode,
19689 gen_rtx_GE (VOIDmode,
19690 op0, op1),
19691 true_cond, false_cond)));
19692 false_cond = true_cond;
19693 true_cond = temp;
19695 temp = gen_reg_rtx (compare_mode);
19696 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
19697 op0 = temp;
19698 break;
19700 case GT:
19701 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
19702 temp = gen_reg_rtx (result_mode);
19703 emit_insn (gen_rtx_SET (VOIDmode, temp,
19704 gen_rtx_IF_THEN_ELSE (result_mode,
19705 gen_rtx_GE (VOIDmode,
19706 op0, op1),
19707 true_cond, false_cond)));
19708 true_cond = false_cond;
19709 false_cond = temp;
19711 temp = gen_reg_rtx (compare_mode);
19712 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
19713 op0 = temp;
19714 break;
19716 default:
19717 gcc_unreachable ();
19720 emit_insn (gen_rtx_SET (VOIDmode, dest,
19721 gen_rtx_IF_THEN_ELSE (result_mode,
19722 gen_rtx_GE (VOIDmode,
19723 op0, op1),
19724 true_cond, false_cond)));
19725 return 1;
19728 /* Same as above, but for ints (isel). */
19730 static int
19731 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19733 rtx condition_rtx, cr;
19734 enum machine_mode mode = GET_MODE (dest);
19735 enum rtx_code cond_code;
19736 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
19737 bool signedp;
19739 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
19740 return 0;
19742 /* We still have to do the compare, because isel doesn't do a
19743 compare, it just looks at the CRx bits set by a previous compare
19744 instruction. */
19745 condition_rtx = rs6000_generate_compare (op, mode);
19746 cond_code = GET_CODE (condition_rtx);
19747 cr = XEXP (condition_rtx, 0);
19748 signedp = GET_MODE (cr) == CCmode;
19750 isel_func = (mode == SImode
19751 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
19752 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
19754 switch (cond_code)
19756 case LT: case GT: case LTU: case GTU: case EQ:
19757 /* isel handles these directly. */
19758 break;
19760 default:
19761 /* We need to swap the sense of the comparison. */
19763 rtx t = true_cond;
19764 true_cond = false_cond;
19765 false_cond = t;
19766 PUT_CODE (condition_rtx, reverse_condition (cond_code));
19768 break;
19771 false_cond = force_reg (mode, false_cond);
19772 if (true_cond != const0_rtx)
19773 true_cond = force_reg (mode, true_cond);
19775 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
19777 return 1;
19780 const char *
19781 output_isel (rtx *operands)
19783 enum rtx_code code;
19785 code = GET_CODE (operands[1]);
19787 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
19789 gcc_assert (GET_CODE (operands[2]) == REG
19790 && GET_CODE (operands[3]) == REG);
19791 PUT_CODE (operands[1], reverse_condition (code));
19792 return "isel %0,%3,%2,%j1";
19795 return "isel %0,%2,%3,%j1";
19798 void
19799 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
19801 enum machine_mode mode = GET_MODE (op0);
19802 enum rtx_code c;
19803 rtx target;
19805 /* VSX/altivec have direct min/max insns. */
19806 if ((code == SMAX || code == SMIN)
19807 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
19808 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
19810 emit_insn (gen_rtx_SET (VOIDmode,
19811 dest,
19812 gen_rtx_fmt_ee (code, mode, op0, op1)));
19813 return;
19816 if (code == SMAX || code == SMIN)
19817 c = GE;
19818 else
19819 c = GEU;
19821 if (code == SMAX || code == UMAX)
19822 target = emit_conditional_move (dest, c, op0, op1, mode,
19823 op0, op1, mode, 0);
19824 else
19825 target = emit_conditional_move (dest, c, op0, op1, mode,
19826 op1, op0, mode, 0);
19827 gcc_assert (target);
19828 if (target != dest)
19829 emit_move_insn (dest, target);
19832 /* A subroutine of the atomic operation splitters. Jump to LABEL if
19833 COND is true. Mark the jump as unlikely to be taken. */
19835 static void
19836 emit_unlikely_jump (rtx cond, rtx label)
19838 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
19839 rtx x;
19841 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
19842 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
19843 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
19846 /* A subroutine of the atomic operation splitters. Emit a load-locked
19847 instruction in MODE. For QI/HImode, possibly use a pattern than includes
19848 the zero_extend operation. */
19850 static void
19851 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
19853 rtx (*fn) (rtx, rtx) = NULL;
19855 switch (mode)
19857 case QImode:
19858 fn = gen_load_lockedqi;
19859 break;
19860 case HImode:
19861 fn = gen_load_lockedhi;
19862 break;
19863 case SImode:
19864 if (GET_MODE (mem) == QImode)
19865 fn = gen_load_lockedqi_si;
19866 else if (GET_MODE (mem) == HImode)
19867 fn = gen_load_lockedhi_si;
19868 else
19869 fn = gen_load_lockedsi;
19870 break;
19871 case DImode:
19872 fn = gen_load_lockeddi;
19873 break;
19874 case TImode:
19875 fn = gen_load_lockedti;
19876 break;
19877 default:
19878 gcc_unreachable ();
19880 emit_insn (fn (reg, mem));
19883 /* A subroutine of the atomic operation splitters. Emit a store-conditional
19884 instruction in MODE. */
19886 static void
19887 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
19889 rtx (*fn) (rtx, rtx, rtx) = NULL;
19891 switch (mode)
19893 case QImode:
19894 fn = gen_store_conditionalqi;
19895 break;
19896 case HImode:
19897 fn = gen_store_conditionalhi;
19898 break;
19899 case SImode:
19900 fn = gen_store_conditionalsi;
19901 break;
19902 case DImode:
19903 fn = gen_store_conditionaldi;
19904 break;
19905 case TImode:
19906 fn = gen_store_conditionalti;
19907 break;
19908 default:
19909 gcc_unreachable ();
19912 /* Emit sync before stwcx. to address PPC405 Erratum. */
19913 if (PPC405_ERRATUM77)
19914 emit_insn (gen_hwsync ());
19916 emit_insn (fn (res, mem, val));
19919 /* Expand barriers before and after a load_locked/store_cond sequence. */
19921 static rtx
19922 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
19924 rtx addr = XEXP (mem, 0);
19925 int strict_p = (reload_in_progress || reload_completed);
19927 if (!legitimate_indirect_address_p (addr, strict_p)
19928 && !legitimate_indexed_address_p (addr, strict_p))
19930 addr = force_reg (Pmode, addr);
19931 mem = replace_equiv_address_nv (mem, addr);
19934 switch (model)
19936 case MEMMODEL_RELAXED:
19937 case MEMMODEL_CONSUME:
19938 case MEMMODEL_ACQUIRE:
19939 break;
19940 case MEMMODEL_RELEASE:
19941 case MEMMODEL_ACQ_REL:
19942 emit_insn (gen_lwsync ());
19943 break;
19944 case MEMMODEL_SEQ_CST:
19945 emit_insn (gen_hwsync ());
19946 break;
19947 default:
19948 gcc_unreachable ();
19950 return mem;
19953 static void
19954 rs6000_post_atomic_barrier (enum memmodel model)
19956 switch (model)
19958 case MEMMODEL_RELAXED:
19959 case MEMMODEL_CONSUME:
19960 case MEMMODEL_RELEASE:
19961 break;
19962 case MEMMODEL_ACQUIRE:
19963 case MEMMODEL_ACQ_REL:
19964 case MEMMODEL_SEQ_CST:
19965 emit_insn (gen_isync ());
19966 break;
19967 default:
19968 gcc_unreachable ();
19972 /* A subroutine of the various atomic expanders. For sub-word operations,
19973 we must adjust things to operate on SImode. Given the original MEM,
19974 return a new aligned memory. Also build and return the quantities by
19975 which to shift and mask. */
19977 static rtx
19978 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
19980 rtx addr, align, shift, mask, mem;
19981 HOST_WIDE_INT shift_mask;
19982 enum machine_mode mode = GET_MODE (orig_mem);
19984 /* For smaller modes, we have to implement this via SImode. */
19985 shift_mask = (mode == QImode ? 0x18 : 0x10);
19987 addr = XEXP (orig_mem, 0);
19988 addr = force_reg (GET_MODE (addr), addr);
19990 /* Aligned memory containing subword. Generate a new memory. We
19991 do not want any of the existing MEM_ATTR data, as we're now
19992 accessing memory outside the original object. */
19993 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
19994 NULL_RTX, 1, OPTAB_LIB_WIDEN);
19995 mem = gen_rtx_MEM (SImode, align);
19996 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
19997 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
19998 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20000 /* Shift amount for subword relative to aligned word. */
20001 shift = gen_reg_rtx (SImode);
20002 addr = gen_lowpart (SImode, addr);
20003 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20004 if (WORDS_BIG_ENDIAN)
20005 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20006 shift, 1, OPTAB_LIB_WIDEN);
20007 *pshift = shift;
20009 /* Mask for insertion. */
20010 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20011 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20012 *pmask = mask;
20014 return mem;
20017 /* A subroutine of the various atomic expanders. For sub-word operands,
20018 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20020 static rtx
20021 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20023 rtx x;
20025 x = gen_reg_rtx (SImode);
20026 emit_insn (gen_rtx_SET (VOIDmode, x,
20027 gen_rtx_AND (SImode,
20028 gen_rtx_NOT (SImode, mask),
20029 oldval)));
20031 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20033 return x;
20036 /* A subroutine of the various atomic expanders. For sub-word operands,
20037 extract WIDE to NARROW via SHIFT. */
20039 static void
20040 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20042 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20043 wide, 1, OPTAB_LIB_WIDEN);
20044 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20047 /* Expand an atomic compare and swap operation. */
20049 void
20050 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20052 rtx boolval, retval, mem, oldval, newval, cond;
20053 rtx label1, label2, x, mask, shift;
20054 enum machine_mode mode, orig_mode;
20055 enum memmodel mod_s, mod_f;
20056 bool is_weak;
20058 boolval = operands[0];
20059 retval = operands[1];
20060 mem = operands[2];
20061 oldval = operands[3];
20062 newval = operands[4];
20063 is_weak = (INTVAL (operands[5]) != 0);
20064 mod_s = (enum memmodel) INTVAL (operands[6]);
20065 mod_f = (enum memmodel) INTVAL (operands[7]);
20066 orig_mode = mode = GET_MODE (mem);
20068 mask = shift = NULL_RTX;
20069 if (mode == QImode || mode == HImode)
20071 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20072 lwarx and shift/mask operations. With power8, we need to do the
20073 comparison in SImode, but the store is still done in QI/HImode. */
20074 oldval = convert_modes (SImode, mode, oldval, 1);
20076 if (!TARGET_SYNC_HI_QI)
20078 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20080 /* Shift and mask OLDVAL into position with the word. */
20081 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20082 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20084 /* Shift and mask NEWVAL into position within the word. */
20085 newval = convert_modes (SImode, mode, newval, 1);
20086 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20087 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20090 /* Prepare to adjust the return value. */
20091 retval = gen_reg_rtx (SImode);
20092 mode = SImode;
20094 else if (reg_overlap_mentioned_p (retval, oldval))
20095 oldval = copy_to_reg (oldval);
20097 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20099 label1 = NULL_RTX;
20100 if (!is_weak)
20102 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20103 emit_label (XEXP (label1, 0));
20105 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20107 emit_load_locked (mode, retval, mem);
20109 x = retval;
20110 if (mask)
20112 x = expand_simple_binop (SImode, AND, retval, mask,
20113 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20116 cond = gen_reg_rtx (CCmode);
20117 /* If we have TImode, synthesize a comparison. */
20118 if (mode != TImode)
20119 x = gen_rtx_COMPARE (CCmode, x, oldval);
20120 else
20122 rtx xor1_result = gen_reg_rtx (DImode);
20123 rtx xor2_result = gen_reg_rtx (DImode);
20124 rtx or_result = gen_reg_rtx (DImode);
20125 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20126 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20127 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20128 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20130 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20131 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20132 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20133 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20136 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20138 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20139 emit_unlikely_jump (x, label2);
20141 x = newval;
20142 if (mask)
20143 x = rs6000_mask_atomic_subword (retval, newval, mask);
20145 emit_store_conditional (orig_mode, cond, mem, x);
20147 if (!is_weak)
20149 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20150 emit_unlikely_jump (x, label1);
20153 if (mod_f != MEMMODEL_RELAXED)
20154 emit_label (XEXP (label2, 0));
20156 rs6000_post_atomic_barrier (mod_s);
20158 if (mod_f == MEMMODEL_RELAXED)
20159 emit_label (XEXP (label2, 0));
20161 if (shift)
20162 rs6000_finish_atomic_subword (operands[1], retval, shift);
20163 else if (mode != GET_MODE (operands[1]))
20164 convert_move (operands[1], retval, 1);
20166 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20167 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20168 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20171 /* Expand an atomic exchange operation. */
20173 void
20174 rs6000_expand_atomic_exchange (rtx operands[])
20176 rtx retval, mem, val, cond;
20177 enum machine_mode mode;
20178 enum memmodel model;
20179 rtx label, x, mask, shift;
20181 retval = operands[0];
20182 mem = operands[1];
20183 val = operands[2];
20184 model = (enum memmodel) INTVAL (operands[3]);
20185 mode = GET_MODE (mem);
20187 mask = shift = NULL_RTX;
20188 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20190 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20192 /* Shift and mask VAL into position with the word. */
20193 val = convert_modes (SImode, mode, val, 1);
20194 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20195 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20197 /* Prepare to adjust the return value. */
20198 retval = gen_reg_rtx (SImode);
20199 mode = SImode;
20202 mem = rs6000_pre_atomic_barrier (mem, model);
20204 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20205 emit_label (XEXP (label, 0));
20207 emit_load_locked (mode, retval, mem);
20209 x = val;
20210 if (mask)
20211 x = rs6000_mask_atomic_subword (retval, val, mask);
20213 cond = gen_reg_rtx (CCmode);
20214 emit_store_conditional (mode, cond, mem, x);
20216 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20217 emit_unlikely_jump (x, label);
20219 rs6000_post_atomic_barrier (model);
20221 if (shift)
20222 rs6000_finish_atomic_subword (operands[0], retval, shift);
20225 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20226 to perform. MEM is the memory on which to operate. VAL is the second
20227 operand of the binary operator. BEFORE and AFTER are optional locations to
20228 return the value of MEM either before of after the operation. MODEL_RTX
20229 is a CONST_INT containing the memory model to use. */
20231 void
20232 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20233 rtx orig_before, rtx orig_after, rtx model_rtx)
20235 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20236 enum machine_mode mode = GET_MODE (mem);
20237 enum machine_mode store_mode = mode;
20238 rtx label, x, cond, mask, shift;
20239 rtx before = orig_before, after = orig_after;
20241 mask = shift = NULL_RTX;
20242 /* On power8, we want to use SImode for the operation. On previous systems,
20243 use the operation in a subword and shift/mask to get the proper byte or
20244 halfword. */
20245 if (mode == QImode || mode == HImode)
20247 if (TARGET_SYNC_HI_QI)
20249 val = convert_modes (SImode, mode, val, 1);
20251 /* Prepare to adjust the return value. */
20252 before = gen_reg_rtx (SImode);
20253 if (after)
20254 after = gen_reg_rtx (SImode);
20255 mode = SImode;
20257 else
20259 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20261 /* Shift and mask VAL into position with the word. */
20262 val = convert_modes (SImode, mode, val, 1);
20263 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20264 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20266 switch (code)
20268 case IOR:
20269 case XOR:
20270 /* We've already zero-extended VAL. That is sufficient to
20271 make certain that it does not affect other bits. */
20272 mask = NULL;
20273 break;
20275 case AND:
20276 /* If we make certain that all of the other bits in VAL are
20277 set, that will be sufficient to not affect other bits. */
20278 x = gen_rtx_NOT (SImode, mask);
20279 x = gen_rtx_IOR (SImode, x, val);
20280 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20281 mask = NULL;
20282 break;
20284 case NOT:
20285 case PLUS:
20286 case MINUS:
20287 /* These will all affect bits outside the field and need
20288 adjustment via MASK within the loop. */
20289 break;
20291 default:
20292 gcc_unreachable ();
20295 /* Prepare to adjust the return value. */
20296 before = gen_reg_rtx (SImode);
20297 if (after)
20298 after = gen_reg_rtx (SImode);
20299 store_mode = mode = SImode;
20303 mem = rs6000_pre_atomic_barrier (mem, model);
20305 label = gen_label_rtx ();
20306 emit_label (label);
20307 label = gen_rtx_LABEL_REF (VOIDmode, label);
20309 if (before == NULL_RTX)
20310 before = gen_reg_rtx (mode);
20312 emit_load_locked (mode, before, mem);
20314 if (code == NOT)
20316 x = expand_simple_binop (mode, AND, before, val,
20317 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20318 after = expand_simple_unop (mode, NOT, x, after, 1);
20320 else
20322 after = expand_simple_binop (mode, code, before, val,
20323 after, 1, OPTAB_LIB_WIDEN);
20326 x = after;
20327 if (mask)
20329 x = expand_simple_binop (SImode, AND, after, mask,
20330 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20331 x = rs6000_mask_atomic_subword (before, x, mask);
20333 else if (store_mode != mode)
20334 x = convert_modes (store_mode, mode, x, 1);
20336 cond = gen_reg_rtx (CCmode);
20337 emit_store_conditional (store_mode, cond, mem, x);
20339 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20340 emit_unlikely_jump (x, label);
20342 rs6000_post_atomic_barrier (model);
20344 if (shift)
20346 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20347 then do the calcuations in a SImode register. */
20348 if (orig_before)
20349 rs6000_finish_atomic_subword (orig_before, before, shift);
20350 if (orig_after)
20351 rs6000_finish_atomic_subword (orig_after, after, shift);
20353 else if (store_mode != mode)
20355 /* QImode/HImode on machines with lbarx/lharx where we do the native
20356 operation and then do the calcuations in a SImode register. */
20357 if (orig_before)
20358 convert_move (orig_before, before, 1);
20359 if (orig_after)
20360 convert_move (orig_after, after, 1);
20362 else if (orig_after && after != orig_after)
20363 emit_move_insn (orig_after, after);
20366 /* Emit instructions to move SRC to DST. Called by splitters for
20367 multi-register moves. It will emit at most one instruction for
20368 each register that is accessed; that is, it won't emit li/lis pairs
20369 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20370 register. */
20372 void
20373 rs6000_split_multireg_move (rtx dst, rtx src)
20375 /* The register number of the first register being moved. */
20376 int reg;
20377 /* The mode that is to be moved. */
20378 enum machine_mode mode;
20379 /* The mode that the move is being done in, and its size. */
20380 enum machine_mode reg_mode;
20381 int reg_mode_size;
20382 /* The number of registers that will be moved. */
20383 int nregs;
20385 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20386 mode = GET_MODE (dst);
20387 nregs = hard_regno_nregs[reg][mode];
20388 if (FP_REGNO_P (reg))
20389 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20390 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20391 else if (ALTIVEC_REGNO_P (reg))
20392 reg_mode = V16QImode;
20393 else if (TARGET_E500_DOUBLE && mode == TFmode)
20394 reg_mode = DFmode;
20395 else
20396 reg_mode = word_mode;
20397 reg_mode_size = GET_MODE_SIZE (reg_mode);
20399 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20401 /* TDmode residing in FP registers is special, since the ISA requires that
20402 the lower-numbered word of a register pair is always the most significant
20403 word, even in little-endian mode. This does not match the usual subreg
20404 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20405 the appropriate constituent registers "by hand" in little-endian mode.
20407 Note we do not need to check for destructive overlap here since TDmode
20408 can only reside in even/odd register pairs. */
20409 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20411 rtx p_src, p_dst;
20412 int i;
20414 for (i = 0; i < nregs; i++)
20416 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20417 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20418 else
20419 p_src = simplify_gen_subreg (reg_mode, src, mode,
20420 i * reg_mode_size);
20422 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20423 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20424 else
20425 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20426 i * reg_mode_size);
20428 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20431 return;
20434 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20436 /* Move register range backwards, if we might have destructive
20437 overlap. */
20438 int i;
20439 for (i = nregs - 1; i >= 0; i--)
20440 emit_insn (gen_rtx_SET (VOIDmode,
20441 simplify_gen_subreg (reg_mode, dst, mode,
20442 i * reg_mode_size),
20443 simplify_gen_subreg (reg_mode, src, mode,
20444 i * reg_mode_size)));
20446 else
20448 int i;
20449 int j = -1;
20450 bool used_update = false;
20451 rtx restore_basereg = NULL_RTX;
20453 if (MEM_P (src) && INT_REGNO_P (reg))
20455 rtx breg;
20457 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20458 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20460 rtx delta_rtx;
20461 breg = XEXP (XEXP (src, 0), 0);
20462 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20463 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20464 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20465 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20466 src = replace_equiv_address (src, breg);
20468 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20470 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20472 rtx basereg = XEXP (XEXP (src, 0), 0);
20473 if (TARGET_UPDATE)
20475 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20476 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20477 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20478 used_update = true;
20480 else
20481 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20482 XEXP (XEXP (src, 0), 1)));
20483 src = replace_equiv_address (src, basereg);
20485 else
20487 rtx basereg = gen_rtx_REG (Pmode, reg);
20488 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20489 src = replace_equiv_address (src, basereg);
20493 breg = XEXP (src, 0);
20494 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20495 breg = XEXP (breg, 0);
20497 /* If the base register we are using to address memory is
20498 also a destination reg, then change that register last. */
20499 if (REG_P (breg)
20500 && REGNO (breg) >= REGNO (dst)
20501 && REGNO (breg) < REGNO (dst) + nregs)
20502 j = REGNO (breg) - REGNO (dst);
20504 else if (MEM_P (dst) && INT_REGNO_P (reg))
20506 rtx breg;
20508 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20509 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20511 rtx delta_rtx;
20512 breg = XEXP (XEXP (dst, 0), 0);
20513 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20514 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20515 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20517 /* We have to update the breg before doing the store.
20518 Use store with update, if available. */
20520 if (TARGET_UPDATE)
20522 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20523 emit_insn (TARGET_32BIT
20524 ? (TARGET_POWERPC64
20525 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20526 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20527 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20528 used_update = true;
20530 else
20531 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20532 dst = replace_equiv_address (dst, breg);
20534 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20535 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20537 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20539 rtx basereg = XEXP (XEXP (dst, 0), 0);
20540 if (TARGET_UPDATE)
20542 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20543 emit_insn (gen_rtx_SET (VOIDmode,
20544 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
20545 used_update = true;
20547 else
20548 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20549 XEXP (XEXP (dst, 0), 1)));
20550 dst = replace_equiv_address (dst, basereg);
20552 else
20554 rtx basereg = XEXP (XEXP (dst, 0), 0);
20555 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
20556 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
20557 && REG_P (basereg)
20558 && REG_P (offsetreg)
20559 && REGNO (basereg) != REGNO (offsetreg));
20560 if (REGNO (basereg) == 0)
20562 rtx tmp = offsetreg;
20563 offsetreg = basereg;
20564 basereg = tmp;
20566 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
20567 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
20568 dst = replace_equiv_address (dst, basereg);
20571 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
20572 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
20575 for (i = 0; i < nregs; i++)
20577 /* Calculate index to next subword. */
20578 ++j;
20579 if (j == nregs)
20580 j = 0;
20582 /* If compiler already emitted move of first word by
20583 store with update, no need to do anything. */
20584 if (j == 0 && used_update)
20585 continue;
20587 emit_insn (gen_rtx_SET (VOIDmode,
20588 simplify_gen_subreg (reg_mode, dst, mode,
20589 j * reg_mode_size),
20590 simplify_gen_subreg (reg_mode, src, mode,
20591 j * reg_mode_size)));
20593 if (restore_basereg != NULL_RTX)
20594 emit_insn (restore_basereg);
20599 /* This page contains routines that are used to determine what the
20600 function prologue and epilogue code will do and write them out. */
20602 static inline bool
20603 save_reg_p (int r)
20605 return !call_used_regs[r] && df_regs_ever_live_p (r);
20608 /* Return the first fixed-point register that is required to be
20609 saved. 32 if none. */
20612 first_reg_to_save (void)
20614 int first_reg;
20616 /* Find lowest numbered live register. */
20617 for (first_reg = 13; first_reg <= 31; first_reg++)
20618 if (save_reg_p (first_reg))
20619 break;
20621 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
20622 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
20623 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
20624 || (TARGET_TOC && TARGET_MINIMAL_TOC))
20625 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20626 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
20628 #if TARGET_MACHO
20629 if (flag_pic
20630 && crtl->uses_pic_offset_table
20631 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
20632 return RS6000_PIC_OFFSET_TABLE_REGNUM;
20633 #endif
20635 return first_reg;
20638 /* Similar, for FP regs. */
20641 first_fp_reg_to_save (void)
20643 int first_reg;
20645 /* Find lowest numbered live register. */
20646 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
20647 if (save_reg_p (first_reg))
20648 break;
20650 return first_reg;
20653 /* Similar, for AltiVec regs. */
20655 static int
20656 first_altivec_reg_to_save (void)
20658 int i;
20660 /* Stack frame remains as is unless we are in AltiVec ABI. */
20661 if (! TARGET_ALTIVEC_ABI)
20662 return LAST_ALTIVEC_REGNO + 1;
20664 /* On Darwin, the unwind routines are compiled without
20665 TARGET_ALTIVEC, and use save_world to save/restore the
20666 altivec registers when necessary. */
20667 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
20668 && ! TARGET_ALTIVEC)
20669 return FIRST_ALTIVEC_REGNO + 20;
20671 /* Find lowest numbered live register. */
20672 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
20673 if (save_reg_p (i))
20674 break;
20676 return i;
20679 /* Return a 32-bit mask of the AltiVec registers we need to set in
20680 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
20681 the 32-bit word is 0. */
20683 static unsigned int
20684 compute_vrsave_mask (void)
20686 unsigned int i, mask = 0;
20688 /* On Darwin, the unwind routines are compiled without
20689 TARGET_ALTIVEC, and use save_world to save/restore the
20690 call-saved altivec registers when necessary. */
20691 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
20692 && ! TARGET_ALTIVEC)
20693 mask |= 0xFFF;
20695 /* First, find out if we use _any_ altivec registers. */
20696 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
20697 if (df_regs_ever_live_p (i))
20698 mask |= ALTIVEC_REG_BIT (i);
20700 if (mask == 0)
20701 return mask;
20703 /* Next, remove the argument registers from the set. These must
20704 be in the VRSAVE mask set by the caller, so we don't need to add
20705 them in again. More importantly, the mask we compute here is
20706 used to generate CLOBBERs in the set_vrsave insn, and we do not
20707 wish the argument registers to die. */
20708 for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
20709 mask &= ~ALTIVEC_REG_BIT (i);
20711 /* Similarly, remove the return value from the set. */
20713 bool yes = false;
20714 diddle_return_value (is_altivec_return_reg, &yes);
20715 if (yes)
20716 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
20719 return mask;
20722 /* For a very restricted set of circumstances, we can cut down the
20723 size of prologues/epilogues by calling our own save/restore-the-world
20724 routines. */
20726 static void
20727 compute_save_world_info (rs6000_stack_t *info_ptr)
20729 info_ptr->world_save_p = 1;
20730 info_ptr->world_save_p
20731 = (WORLD_SAVE_P (info_ptr)
20732 && DEFAULT_ABI == ABI_DARWIN
20733 && !cfun->has_nonlocal_label
20734 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
20735 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
20736 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
20737 && info_ptr->cr_save_p);
20739 /* This will not work in conjunction with sibcalls. Make sure there
20740 are none. (This check is expensive, but seldom executed.) */
20741 if (WORLD_SAVE_P (info_ptr))
20743 rtx insn;
20744 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
20745 if (CALL_P (insn) && SIBLING_CALL_P (insn))
20747 info_ptr->world_save_p = 0;
20748 break;
20752 if (WORLD_SAVE_P (info_ptr))
20754 /* Even if we're not touching VRsave, make sure there's room on the
20755 stack for it, if it looks like we're calling SAVE_WORLD, which
20756 will attempt to save it. */
20757 info_ptr->vrsave_size = 4;
20759 /* If we are going to save the world, we need to save the link register too. */
20760 info_ptr->lr_save_p = 1;
20762 /* "Save" the VRsave register too if we're saving the world. */
20763 if (info_ptr->vrsave_mask == 0)
20764 info_ptr->vrsave_mask = compute_vrsave_mask ();
20766 /* Because the Darwin register save/restore routines only handle
20767 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
20768 check. */
20769 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
20770 && (info_ptr->first_altivec_reg_save
20771 >= FIRST_SAVED_ALTIVEC_REGNO));
20773 return;
20777 static void
20778 is_altivec_return_reg (rtx reg, void *xyes)
20780 bool *yes = (bool *) xyes;
20781 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
20782 *yes = true;
20786 /* Look for user-defined global regs in the range FIRST to LAST-1.
20787 We should not restore these, and so cannot use lmw or out-of-line
20788 restore functions if there are any. We also can't save them
20789 (well, emit frame notes for them), because frame unwinding during
20790 exception handling will restore saved registers. */
20792 static bool
20793 global_regs_p (unsigned first, unsigned last)
20795 while (first < last)
20796 if (global_regs[first++])
20797 return true;
20798 return false;
20801 /* Determine the strategy for savings/restoring registers. */
20803 enum {
20804 SAVRES_MULTIPLE = 0x1,
20805 SAVE_INLINE_FPRS = 0x2,
20806 SAVE_INLINE_GPRS = 0x4,
20807 REST_INLINE_FPRS = 0x8,
20808 REST_INLINE_GPRS = 0x10,
20809 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
20810 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
20811 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
20812 SAVE_INLINE_VRS = 0x100,
20813 REST_INLINE_VRS = 0x200
20816 static int
20817 rs6000_savres_strategy (rs6000_stack_t *info,
20818 bool using_static_chain_p)
20820 int strategy = 0;
20821 bool lr_save_p;
20823 if (TARGET_MULTIPLE
20824 && !TARGET_POWERPC64
20825 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
20826 && info->first_gp_reg_save < 31
20827 && !global_regs_p (info->first_gp_reg_save, 32))
20828 strategy |= SAVRES_MULTIPLE;
20830 if (crtl->calls_eh_return
20831 || cfun->machine->ra_need_lr)
20832 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
20833 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
20834 | SAVE_INLINE_VRS | REST_INLINE_VRS);
20836 if (info->first_fp_reg_save == 64
20837 /* The out-of-line FP routines use double-precision stores;
20838 we can't use those routines if we don't have such stores. */
20839 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
20840 || global_regs_p (info->first_fp_reg_save, 64))
20841 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20843 if (info->first_gp_reg_save == 32
20844 || (!(strategy & SAVRES_MULTIPLE)
20845 && global_regs_p (info->first_gp_reg_save, 32)))
20846 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20848 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
20849 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
20850 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20852 /* Define cutoff for using out-of-line functions to save registers. */
20853 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
20855 if (!optimize_size)
20857 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20858 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20859 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20861 else
20863 /* Prefer out-of-line restore if it will exit. */
20864 if (info->first_fp_reg_save > 61)
20865 strategy |= SAVE_INLINE_FPRS;
20866 if (info->first_gp_reg_save > 29)
20868 if (info->first_fp_reg_save == 64)
20869 strategy |= SAVE_INLINE_GPRS;
20870 else
20871 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20873 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
20874 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20877 else if (DEFAULT_ABI == ABI_DARWIN)
20879 if (info->first_fp_reg_save > 60)
20880 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20881 if (info->first_gp_reg_save > 29)
20882 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20883 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20885 else
20887 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
20888 if (info->first_fp_reg_save > 61)
20889 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20890 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20891 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20894 /* Don't bother to try to save things out-of-line if r11 is occupied
20895 by the static chain. It would require too much fiddling and the
20896 static chain is rarely used anyway. FPRs are saved w.r.t the stack
20897 pointer on Darwin, and AIX uses r1 or r12. */
20898 if (using_static_chain_p
20899 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
20900 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
20901 | SAVE_INLINE_GPRS
20902 | SAVE_INLINE_VRS | REST_INLINE_VRS);
20904 /* We can only use the out-of-line routines to restore if we've
20905 saved all the registers from first_fp_reg_save in the prologue.
20906 Otherwise, we risk loading garbage. */
20907 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
20909 int i;
20911 for (i = info->first_fp_reg_save; i < 64; i++)
20912 if (!save_reg_p (i))
20914 strategy |= REST_INLINE_FPRS;
20915 break;
20919 /* If we are going to use store multiple, then don't even bother
20920 with the out-of-line routines, since the store-multiple
20921 instruction will always be smaller. */
20922 if ((strategy & SAVRES_MULTIPLE))
20923 strategy |= SAVE_INLINE_GPRS;
20925 /* info->lr_save_p isn't yet set if the only reason lr needs to be
20926 saved is an out-of-line save or restore. Set up the value for
20927 the next test (excluding out-of-line gpr restore). */
20928 lr_save_p = (info->lr_save_p
20929 || !(strategy & SAVE_INLINE_GPRS)
20930 || !(strategy & SAVE_INLINE_FPRS)
20931 || !(strategy & SAVE_INLINE_VRS)
20932 || !(strategy & REST_INLINE_FPRS)
20933 || !(strategy & REST_INLINE_VRS));
20935 /* The situation is more complicated with load multiple. We'd
20936 prefer to use the out-of-line routines for restores, since the
20937 "exit" out-of-line routines can handle the restore of LR and the
20938 frame teardown. However if doesn't make sense to use the
20939 out-of-line routine if that is the only reason we'd need to save
20940 LR, and we can't use the "exit" out-of-line gpr restore if we
20941 have saved some fprs; In those cases it is advantageous to use
20942 load multiple when available. */
20943 if ((strategy & SAVRES_MULTIPLE)
20944 && (!lr_save_p
20945 || info->first_fp_reg_save != 64))
20946 strategy |= REST_INLINE_GPRS;
20948 /* Saving CR interferes with the exit routines used on the SPE, so
20949 just punt here. */
20950 if (TARGET_SPE_ABI
20951 && info->spe_64bit_regs_used
20952 && info->cr_save_p)
20953 strategy |= REST_INLINE_GPRS;
20955 /* We can only use load multiple or the out-of-line routines to
20956 restore if we've used store multiple or out-of-line routines
20957 in the prologue, i.e. if we've saved all the registers from
20958 first_gp_reg_save. Otherwise, we risk loading garbage. */
20959 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
20960 == SAVE_INLINE_GPRS)
20962 int i;
20964 for (i = info->first_gp_reg_save; i < 32; i++)
20965 if (!save_reg_p (i))
20967 strategy |= REST_INLINE_GPRS;
20968 break;
20972 if (TARGET_ELF && TARGET_64BIT)
20974 if (!(strategy & SAVE_INLINE_FPRS))
20975 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
20976 else if (!(strategy & SAVE_INLINE_GPRS)
20977 && info->first_fp_reg_save == 64)
20978 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
20980 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
20981 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
20983 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
20984 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
20986 return strategy;
20989 /* Calculate the stack information for the current function. This is
20990 complicated by having two separate calling sequences, the AIX calling
20991 sequence and the V.4 calling sequence.
20993 AIX (and Darwin/Mac OS X) stack frames look like:
20994 32-bit 64-bit
20995 SP----> +---------------------------------------+
20996 | back chain to caller | 0 0
20997 +---------------------------------------+
20998 | saved CR | 4 8 (8-11)
20999 +---------------------------------------+
21000 | saved LR | 8 16
21001 +---------------------------------------+
21002 | reserved for compilers | 12 24
21003 +---------------------------------------+
21004 | reserved for binders | 16 32
21005 +---------------------------------------+
21006 | saved TOC pointer | 20 40
21007 +---------------------------------------+
21008 | Parameter save area (P) | 24 48
21009 +---------------------------------------+
21010 | Alloca space (A) | 24+P etc.
21011 +---------------------------------------+
21012 | Local variable space (L) | 24+P+A
21013 +---------------------------------------+
21014 | Float/int conversion temporary (X) | 24+P+A+L
21015 +---------------------------------------+
21016 | Save area for AltiVec registers (W) | 24+P+A+L+X
21017 +---------------------------------------+
21018 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21019 +---------------------------------------+
21020 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21021 +---------------------------------------+
21022 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21023 +---------------------------------------+
21024 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21025 +---------------------------------------+
21026 old SP->| back chain to caller's caller |
21027 +---------------------------------------+
21029 The required alignment for AIX configurations is two words (i.e., 8
21030 or 16 bytes).
21032 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21034 SP----> +---------------------------------------+
21035 | Back chain to caller | 0
21036 +---------------------------------------+
21037 | Save area for CR | 8
21038 +---------------------------------------+
21039 | Saved LR | 16
21040 +---------------------------------------+
21041 | Saved TOC pointer | 24
21042 +---------------------------------------+
21043 | Parameter save area (P) | 32
21044 +---------------------------------------+
21045 | Alloca space (A) | 32+P
21046 +---------------------------------------+
21047 | Local variable space (L) | 32+P+A
21048 +---------------------------------------+
21049 | Save area for AltiVec registers (W) | 32+P+A+L
21050 +---------------------------------------+
21051 | AltiVec alignment padding (Y) | 32+P+A+L+W
21052 +---------------------------------------+
21053 | Save area for GP registers (G) | 32+P+A+L+W+Y
21054 +---------------------------------------+
21055 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21056 +---------------------------------------+
21057 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21058 +---------------------------------------+
21061 V.4 stack frames look like:
21063 SP----> +---------------------------------------+
21064 | back chain to caller | 0
21065 +---------------------------------------+
21066 | caller's saved LR | 4
21067 +---------------------------------------+
21068 | Parameter save area (P) | 8
21069 +---------------------------------------+
21070 | Alloca space (A) | 8+P
21071 +---------------------------------------+
21072 | Varargs save area (V) | 8+P+A
21073 +---------------------------------------+
21074 | Local variable space (L) | 8+P+A+V
21075 +---------------------------------------+
21076 | Float/int conversion temporary (X) | 8+P+A+V+L
21077 +---------------------------------------+
21078 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21079 +---------------------------------------+
21080 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21081 +---------------------------------------+
21082 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21083 +---------------------------------------+
21084 | SPE: area for 64-bit GP registers |
21085 +---------------------------------------+
21086 | SPE alignment padding |
21087 +---------------------------------------+
21088 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21089 +---------------------------------------+
21090 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21091 +---------------------------------------+
21092 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21093 +---------------------------------------+
21094 old SP->| back chain to caller's caller |
21095 +---------------------------------------+
21097 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21098 given. (But note below and in sysv4.h that we require only 8 and
21099 may round up the size of our stack frame anyways. The historical
21100 reason is early versions of powerpc-linux which didn't properly
21101 align the stack at program startup. A happy side-effect is that
21102 -mno-eabi libraries can be used with -meabi programs.)
21104 The EABI configuration defaults to the V.4 layout. However,
21105 the stack alignment requirements may differ. If -mno-eabi is not
21106 given, the required stack alignment is 8 bytes; if -mno-eabi is
21107 given, the required alignment is 16 bytes. (But see V.4 comment
21108 above.) */
21110 #ifndef ABI_STACK_BOUNDARY
21111 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21112 #endif
21114 static rs6000_stack_t *
21115 rs6000_stack_info (void)
21117 rs6000_stack_t *info_ptr = &stack_info;
21118 int reg_size = TARGET_32BIT ? 4 : 8;
21119 int ehrd_size;
21120 int ehcr_size;
21121 int save_align;
21122 int first_gp;
21123 HOST_WIDE_INT non_fixed_size;
21124 bool using_static_chain_p;
21126 if (reload_completed && info_ptr->reload_completed)
21127 return info_ptr;
21129 memset (info_ptr, 0, sizeof (*info_ptr));
21130 info_ptr->reload_completed = reload_completed;
21132 if (TARGET_SPE)
21134 /* Cache value so we don't rescan instruction chain over and over. */
21135 if (cfun->machine->insn_chain_scanned_p == 0)
21136 cfun->machine->insn_chain_scanned_p
21137 = spe_func_has_64bit_regs_p () + 1;
21138 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21141 /* Select which calling sequence. */
21142 info_ptr->abi = DEFAULT_ABI;
21144 /* Calculate which registers need to be saved & save area size. */
21145 info_ptr->first_gp_reg_save = first_reg_to_save ();
21146 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21147 even if it currently looks like we won't. Reload may need it to
21148 get at a constant; if so, it will have already created a constant
21149 pool entry for it. */
21150 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21151 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21152 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21153 && crtl->uses_const_pool
21154 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21155 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21156 else
21157 first_gp = info_ptr->first_gp_reg_save;
21159 info_ptr->gp_size = reg_size * (32 - first_gp);
21161 /* For the SPE, we have an additional upper 32-bits on each GPR.
21162 Ideally we should save the entire 64-bits only when the upper
21163 half is used in SIMD instructions. Since we only record
21164 registers live (not the size they are used in), this proves
21165 difficult because we'd have to traverse the instruction chain at
21166 the right time, taking reload into account. This is a real pain,
21167 so we opt to save the GPRs in 64-bits always if but one register
21168 gets used in 64-bits. Otherwise, all the registers in the frame
21169 get saved in 32-bits.
21171 So... since when we save all GPRs (except the SP) in 64-bits, the
21172 traditional GP save area will be empty. */
21173 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21174 info_ptr->gp_size = 0;
21176 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21177 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21179 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21180 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21181 - info_ptr->first_altivec_reg_save);
21183 /* Does this function call anything? */
21184 info_ptr->calls_p = (! crtl->is_leaf
21185 || cfun->machine->ra_needs_full_frame);
21187 /* Determine if we need to save the condition code registers. */
21188 if (df_regs_ever_live_p (CR2_REGNO)
21189 || df_regs_ever_live_p (CR3_REGNO)
21190 || df_regs_ever_live_p (CR4_REGNO))
21192 info_ptr->cr_save_p = 1;
21193 if (DEFAULT_ABI == ABI_V4)
21194 info_ptr->cr_size = reg_size;
21197 /* If the current function calls __builtin_eh_return, then we need
21198 to allocate stack space for registers that will hold data for
21199 the exception handler. */
21200 if (crtl->calls_eh_return)
21202 unsigned int i;
21203 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21204 continue;
21206 /* SPE saves EH registers in 64-bits. */
21207 ehrd_size = i * (TARGET_SPE_ABI
21208 && info_ptr->spe_64bit_regs_used != 0
21209 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21211 else
21212 ehrd_size = 0;
21214 /* In the ELFv2 ABI, we also need to allocate space for separate
21215 CR field save areas if the function calls __builtin_eh_return. */
21216 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21218 /* This hard-codes that we have three call-saved CR fields. */
21219 ehcr_size = 3 * reg_size;
21220 /* We do *not* use the regular CR save mechanism. */
21221 info_ptr->cr_save_p = 0;
21223 else
21224 ehcr_size = 0;
21226 /* Determine various sizes. */
21227 info_ptr->reg_size = reg_size;
21228 info_ptr->fixed_size = RS6000_SAVE_AREA;
21229 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21230 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21231 TARGET_ALTIVEC ? 16 : 8);
21232 if (FRAME_GROWS_DOWNWARD)
21233 info_ptr->vars_size
21234 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21235 + info_ptr->parm_size,
21236 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21237 - (info_ptr->fixed_size + info_ptr->vars_size
21238 + info_ptr->parm_size);
21240 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21241 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21242 else
21243 info_ptr->spe_gp_size = 0;
21245 if (TARGET_ALTIVEC_ABI)
21246 info_ptr->vrsave_mask = compute_vrsave_mask ();
21247 else
21248 info_ptr->vrsave_mask = 0;
21250 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21251 info_ptr->vrsave_size = 4;
21252 else
21253 info_ptr->vrsave_size = 0;
21255 compute_save_world_info (info_ptr);
21257 /* Calculate the offsets. */
21258 switch (DEFAULT_ABI)
21260 case ABI_NONE:
21261 default:
21262 gcc_unreachable ();
21264 case ABI_AIX:
21265 case ABI_ELFv2:
21266 case ABI_DARWIN:
21267 info_ptr->fp_save_offset = - info_ptr->fp_size;
21268 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21270 if (TARGET_ALTIVEC_ABI)
21272 info_ptr->vrsave_save_offset
21273 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21275 /* Align stack so vector save area is on a quadword boundary.
21276 The padding goes above the vectors. */
21277 if (info_ptr->altivec_size != 0)
21278 info_ptr->altivec_padding_size
21279 = info_ptr->vrsave_save_offset & 0xF;
21280 else
21281 info_ptr->altivec_padding_size = 0;
21283 info_ptr->altivec_save_offset
21284 = info_ptr->vrsave_save_offset
21285 - info_ptr->altivec_padding_size
21286 - info_ptr->altivec_size;
21287 gcc_assert (info_ptr->altivec_size == 0
21288 || info_ptr->altivec_save_offset % 16 == 0);
21290 /* Adjust for AltiVec case. */
21291 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21293 else
21294 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21296 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21297 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21298 info_ptr->lr_save_offset = 2*reg_size;
21299 break;
21301 case ABI_V4:
21302 info_ptr->fp_save_offset = - info_ptr->fp_size;
21303 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21304 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21306 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21308 /* Align stack so SPE GPR save area is aligned on a
21309 double-word boundary. */
21310 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21311 info_ptr->spe_padding_size
21312 = 8 - (-info_ptr->cr_save_offset % 8);
21313 else
21314 info_ptr->spe_padding_size = 0;
21316 info_ptr->spe_gp_save_offset
21317 = info_ptr->cr_save_offset
21318 - info_ptr->spe_padding_size
21319 - info_ptr->spe_gp_size;
21321 /* Adjust for SPE case. */
21322 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21324 else if (TARGET_ALTIVEC_ABI)
21326 info_ptr->vrsave_save_offset
21327 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21329 /* Align stack so vector save area is on a quadword boundary. */
21330 if (info_ptr->altivec_size != 0)
21331 info_ptr->altivec_padding_size
21332 = 16 - (-info_ptr->vrsave_save_offset % 16);
21333 else
21334 info_ptr->altivec_padding_size = 0;
21336 info_ptr->altivec_save_offset
21337 = info_ptr->vrsave_save_offset
21338 - info_ptr->altivec_padding_size
21339 - info_ptr->altivec_size;
21341 /* Adjust for AltiVec case. */
21342 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21344 else
21345 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21346 info_ptr->ehrd_offset -= ehrd_size;
21347 info_ptr->lr_save_offset = reg_size;
21348 break;
21351 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21352 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21353 + info_ptr->gp_size
21354 + info_ptr->altivec_size
21355 + info_ptr->altivec_padding_size
21356 + info_ptr->spe_gp_size
21357 + info_ptr->spe_padding_size
21358 + ehrd_size
21359 + ehcr_size
21360 + info_ptr->cr_size
21361 + info_ptr->vrsave_size,
21362 save_align);
21364 non_fixed_size = (info_ptr->vars_size
21365 + info_ptr->parm_size
21366 + info_ptr->save_size);
21368 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21369 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21371 /* Determine if we need to save the link register. */
21372 if (info_ptr->calls_p
21373 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21374 && crtl->profile
21375 && !TARGET_PROFILE_KERNEL)
21376 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21377 #ifdef TARGET_RELOCATABLE
21378 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21379 #endif
21380 || rs6000_ra_ever_killed ())
21381 info_ptr->lr_save_p = 1;
21383 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21384 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21385 && call_used_regs[STATIC_CHAIN_REGNUM]);
21386 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21387 using_static_chain_p);
21389 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21390 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21391 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21392 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21393 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21394 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21395 info_ptr->lr_save_p = 1;
21397 if (info_ptr->lr_save_p)
21398 df_set_regs_ever_live (LR_REGNO, true);
21400 /* Determine if we need to allocate any stack frame:
21402 For AIX we need to push the stack if a frame pointer is needed
21403 (because the stack might be dynamically adjusted), if we are
21404 debugging, if we make calls, or if the sum of fp_save, gp_save,
21405 and local variables are more than the space needed to save all
21406 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21407 + 18*8 = 288 (GPR13 reserved).
21409 For V.4 we don't have the stack cushion that AIX uses, but assume
21410 that the debugger can handle stackless frames. */
21412 if (info_ptr->calls_p)
21413 info_ptr->push_p = 1;
21415 else if (DEFAULT_ABI == ABI_V4)
21416 info_ptr->push_p = non_fixed_size != 0;
21418 else if (frame_pointer_needed)
21419 info_ptr->push_p = 1;
21421 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21422 info_ptr->push_p = 1;
21424 else
21425 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21427 /* Zero offsets if we're not saving those registers. */
21428 if (info_ptr->fp_size == 0)
21429 info_ptr->fp_save_offset = 0;
21431 if (info_ptr->gp_size == 0)
21432 info_ptr->gp_save_offset = 0;
21434 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21435 info_ptr->altivec_save_offset = 0;
21437 /* Zero VRSAVE offset if not saved and restored. */
21438 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21439 info_ptr->vrsave_save_offset = 0;
21441 if (! TARGET_SPE_ABI
21442 || info_ptr->spe_64bit_regs_used == 0
21443 || info_ptr->spe_gp_size == 0)
21444 info_ptr->spe_gp_save_offset = 0;
21446 if (! info_ptr->lr_save_p)
21447 info_ptr->lr_save_offset = 0;
21449 if (! info_ptr->cr_save_p)
21450 info_ptr->cr_save_offset = 0;
21452 return info_ptr;
21455 /* Return true if the current function uses any GPRs in 64-bit SIMD
21456 mode. */
21458 static bool
21459 spe_func_has_64bit_regs_p (void)
21461 rtx insns, insn;
21463 /* Functions that save and restore all the call-saved registers will
21464 need to save/restore the registers in 64-bits. */
21465 if (crtl->calls_eh_return
21466 || cfun->calls_setjmp
21467 || crtl->has_nonlocal_goto)
21468 return true;
21470 insns = get_insns ();
21472 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21474 if (INSN_P (insn))
21476 rtx i;
21478 /* FIXME: This should be implemented with attributes...
21480 (set_attr "spe64" "true")....then,
21481 if (get_spe64(insn)) return true;
21483 It's the only reliable way to do the stuff below. */
21485 i = PATTERN (insn);
21486 if (GET_CODE (i) == SET)
21488 enum machine_mode mode = GET_MODE (SET_SRC (i));
21490 if (SPE_VECTOR_MODE (mode))
21491 return true;
21492 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21493 return true;
21498 return false;
21501 static void
21502 debug_stack_info (rs6000_stack_t *info)
21504 const char *abi_string;
21506 if (! info)
21507 info = rs6000_stack_info ();
21509 fprintf (stderr, "\nStack information for function %s:\n",
21510 ((current_function_decl && DECL_NAME (current_function_decl))
21511 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21512 : "<unknown>"));
21514 switch (info->abi)
21516 default: abi_string = "Unknown"; break;
21517 case ABI_NONE: abi_string = "NONE"; break;
21518 case ABI_AIX: abi_string = "AIX"; break;
21519 case ABI_ELFv2: abi_string = "ELFv2"; break;
21520 case ABI_DARWIN: abi_string = "Darwin"; break;
21521 case ABI_V4: abi_string = "V.4"; break;
21524 fprintf (stderr, "\tABI = %5s\n", abi_string);
21526 if (TARGET_ALTIVEC_ABI)
21527 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21529 if (TARGET_SPE_ABI)
21530 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21532 if (info->first_gp_reg_save != 32)
21533 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21535 if (info->first_fp_reg_save != 64)
21536 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
21538 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
21539 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
21540 info->first_altivec_reg_save);
21542 if (info->lr_save_p)
21543 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
21545 if (info->cr_save_p)
21546 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
21548 if (info->vrsave_mask)
21549 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
21551 if (info->push_p)
21552 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
21554 if (info->calls_p)
21555 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
21557 if (info->gp_save_offset)
21558 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
21560 if (info->fp_save_offset)
21561 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
21563 if (info->altivec_save_offset)
21564 fprintf (stderr, "\taltivec_save_offset = %5d\n",
21565 info->altivec_save_offset);
21567 if (info->spe_gp_save_offset)
21568 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
21569 info->spe_gp_save_offset);
21571 if (info->vrsave_save_offset)
21572 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
21573 info->vrsave_save_offset);
21575 if (info->lr_save_offset)
21576 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
21578 if (info->cr_save_offset)
21579 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
21581 if (info->varargs_save_offset)
21582 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
21584 if (info->total_size)
21585 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21586 info->total_size);
21588 if (info->vars_size)
21589 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21590 info->vars_size);
21592 if (info->parm_size)
21593 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
21595 if (info->fixed_size)
21596 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
21598 if (info->gp_size)
21599 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
21601 if (info->spe_gp_size)
21602 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
21604 if (info->fp_size)
21605 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
21607 if (info->altivec_size)
21608 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
21610 if (info->vrsave_size)
21611 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
21613 if (info->altivec_padding_size)
21614 fprintf (stderr, "\taltivec_padding_size= %5d\n",
21615 info->altivec_padding_size);
21617 if (info->spe_padding_size)
21618 fprintf (stderr, "\tspe_padding_size = %5d\n",
21619 info->spe_padding_size);
21621 if (info->cr_size)
21622 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
21624 if (info->save_size)
21625 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
21627 if (info->reg_size != 4)
21628 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
21630 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
21632 fprintf (stderr, "\n");
21636 rs6000_return_addr (int count, rtx frame)
21638 /* Currently we don't optimize very well between prolog and body
21639 code and for PIC code the code can be actually quite bad, so
21640 don't try to be too clever here. */
21641 if (count != 0
21642 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
21644 cfun->machine->ra_needs_full_frame = 1;
21646 return
21647 gen_rtx_MEM
21648 (Pmode,
21649 memory_address
21650 (Pmode,
21651 plus_constant (Pmode,
21652 copy_to_reg
21653 (gen_rtx_MEM (Pmode,
21654 memory_address (Pmode, frame))),
21655 RETURN_ADDRESS_OFFSET)));
21658 cfun->machine->ra_need_lr = 1;
21659 return get_hard_reg_initial_val (Pmode, LR_REGNO);
21662 /* Say whether a function is a candidate for sibcall handling or not. */
21664 static bool
21665 rs6000_function_ok_for_sibcall (tree decl, tree exp)
21667 tree fntype;
21669 if (decl)
21670 fntype = TREE_TYPE (decl);
21671 else
21672 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
21674 /* We can't do it if the called function has more vector parameters
21675 than the current function; there's nowhere to put the VRsave code. */
21676 if (TARGET_ALTIVEC_ABI
21677 && TARGET_ALTIVEC_VRSAVE
21678 && !(decl && decl == current_function_decl))
21680 function_args_iterator args_iter;
21681 tree type;
21682 int nvreg = 0;
21684 /* Functions with vector parameters are required to have a
21685 prototype, so the argument type info must be available
21686 here. */
21687 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
21688 if (TREE_CODE (type) == VECTOR_TYPE
21689 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
21690 nvreg++;
21692 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
21693 if (TREE_CODE (type) == VECTOR_TYPE
21694 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
21695 nvreg--;
21697 if (nvreg > 0)
21698 return false;
21701 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
21702 functions, because the callee may have a different TOC pointer to
21703 the caller and there's no way to ensure we restore the TOC when
21704 we return. With the secure-plt SYSV ABI we can't make non-local
21705 calls when -fpic/PIC because the plt call stubs use r30. */
21706 if (DEFAULT_ABI == ABI_DARWIN
21707 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21708 && decl
21709 && !DECL_EXTERNAL (decl)
21710 && (*targetm.binds_local_p) (decl))
21711 || (DEFAULT_ABI == ABI_V4
21712 && (!TARGET_SECURE_PLT
21713 || !flag_pic
21714 || (decl
21715 && (*targetm.binds_local_p) (decl)))))
21717 tree attr_list = TYPE_ATTRIBUTES (fntype);
21719 if (!lookup_attribute ("longcall", attr_list)
21720 || lookup_attribute ("shortcall", attr_list))
21721 return true;
21724 return false;
21727 static int
21728 rs6000_ra_ever_killed (void)
21730 rtx top;
21731 rtx reg;
21732 rtx insn;
21734 if (cfun->is_thunk)
21735 return 0;
21737 if (cfun->machine->lr_save_state)
21738 return cfun->machine->lr_save_state - 1;
21740 /* regs_ever_live has LR marked as used if any sibcalls are present,
21741 but this should not force saving and restoring in the
21742 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
21743 clobbers LR, so that is inappropriate. */
21745 /* Also, the prologue can generate a store into LR that
21746 doesn't really count, like this:
21748 move LR->R0
21749 bcl to set PIC register
21750 move LR->R31
21751 move R0->LR
21753 When we're called from the epilogue, we need to avoid counting
21754 this as a store. */
21756 push_topmost_sequence ();
21757 top = get_insns ();
21758 pop_topmost_sequence ();
21759 reg = gen_rtx_REG (Pmode, LR_REGNO);
21761 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
21763 if (INSN_P (insn))
21765 if (CALL_P (insn))
21767 if (!SIBLING_CALL_P (insn))
21768 return 1;
21770 else if (find_regno_note (insn, REG_INC, LR_REGNO))
21771 return 1;
21772 else if (set_of (reg, insn) != NULL_RTX
21773 && !prologue_epilogue_contains (insn))
21774 return 1;
21777 return 0;
21780 /* Emit instructions needed to load the TOC register.
21781 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
21782 a constant pool; or for SVR4 -fpic. */
21784 void
21785 rs6000_emit_load_toc_table (int fromprolog)
21787 rtx dest;
21788 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
21790 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
21792 char buf[30];
21793 rtx lab, tmp1, tmp2, got;
21795 lab = gen_label_rtx ();
21796 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
21797 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21798 if (flag_pic == 2)
21799 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
21800 else
21801 got = rs6000_got_sym ();
21802 tmp1 = tmp2 = dest;
21803 if (!fromprolog)
21805 tmp1 = gen_reg_rtx (Pmode);
21806 tmp2 = gen_reg_rtx (Pmode);
21808 emit_insn (gen_load_toc_v4_PIC_1 (lab));
21809 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
21810 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
21811 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
21813 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
21815 emit_insn (gen_load_toc_v4_pic_si ());
21816 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
21818 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
21820 char buf[30];
21821 rtx temp0 = (fromprolog
21822 ? gen_rtx_REG (Pmode, 0)
21823 : gen_reg_rtx (Pmode));
21825 if (fromprolog)
21827 rtx symF, symL;
21829 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21830 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21832 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
21833 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21835 emit_insn (gen_load_toc_v4_PIC_1 (symF));
21836 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
21837 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
21839 else
21841 rtx tocsym, lab;
21843 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
21844 lab = gen_label_rtx ();
21845 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
21846 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
21847 if (TARGET_LINK_STACK)
21848 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
21849 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
21851 emit_insn (gen_addsi3 (dest, temp0, dest));
21853 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
21855 /* This is for AIX code running in non-PIC ELF32. */
21856 char buf[30];
21857 rtx realsym;
21858 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
21859 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21861 emit_insn (gen_elf_high (dest, realsym));
21862 emit_insn (gen_elf_low (dest, dest, realsym));
21864 else
21866 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21868 if (TARGET_32BIT)
21869 emit_insn (gen_load_toc_aix_si (dest));
21870 else
21871 emit_insn (gen_load_toc_aix_di (dest));
21875 /* Emit instructions to restore the link register after determining where
21876 its value has been stored. */
21878 void
21879 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
21881 rs6000_stack_t *info = rs6000_stack_info ();
21882 rtx operands[2];
21884 operands[0] = source;
21885 operands[1] = scratch;
21887 if (info->lr_save_p)
21889 rtx frame_rtx = stack_pointer_rtx;
21890 HOST_WIDE_INT sp_offset = 0;
21891 rtx tmp;
21893 if (frame_pointer_needed
21894 || cfun->calls_alloca
21895 || info->total_size > 32767)
21897 tmp = gen_frame_mem (Pmode, frame_rtx);
21898 emit_move_insn (operands[1], tmp);
21899 frame_rtx = operands[1];
21901 else if (info->push_p)
21902 sp_offset = info->total_size;
21904 tmp = plus_constant (Pmode, frame_rtx,
21905 info->lr_save_offset + sp_offset);
21906 tmp = gen_frame_mem (Pmode, tmp);
21907 emit_move_insn (tmp, operands[0]);
21909 else
21910 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
21912 /* Freeze lr_save_p. We've just emitted rtl that depends on the
21913 state of lr_save_p so any change from here on would be a bug. In
21914 particular, stop rs6000_ra_ever_killed from considering the SET
21915 of lr we may have added just above. */
21916 cfun->machine->lr_save_state = info->lr_save_p + 1;
21919 static GTY(()) alias_set_type set = -1;
21921 alias_set_type
21922 get_TOC_alias_set (void)
21924 if (set == -1)
21925 set = new_alias_set ();
21926 return set;
21929 /* This returns nonzero if the current function uses the TOC. This is
21930 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
21931 is generated by the ABI_V4 load_toc_* patterns. */
21932 #if TARGET_ELF
21933 static int
21934 uses_TOC (void)
21936 rtx insn;
21938 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21939 if (INSN_P (insn))
21941 rtx pat = PATTERN (insn);
21942 int i;
21944 if (GET_CODE (pat) == PARALLEL)
21945 for (i = 0; i < XVECLEN (pat, 0); i++)
21947 rtx sub = XVECEXP (pat, 0, i);
21948 if (GET_CODE (sub) == USE)
21950 sub = XEXP (sub, 0);
21951 if (GET_CODE (sub) == UNSPEC
21952 && XINT (sub, 1) == UNSPEC_TOC)
21953 return 1;
21957 return 0;
21959 #endif
21962 create_TOC_reference (rtx symbol, rtx largetoc_reg)
21964 rtx tocrel, tocreg, hi;
21966 if (TARGET_DEBUG_ADDR)
21968 if (GET_CODE (symbol) == SYMBOL_REF)
21969 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
21970 XSTR (symbol, 0));
21971 else
21973 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
21974 GET_RTX_NAME (GET_CODE (symbol)));
21975 debug_rtx (symbol);
21979 if (!can_create_pseudo_p ())
21980 df_set_regs_ever_live (TOC_REGISTER, true);
21982 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
21983 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
21984 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
21985 return tocrel;
21987 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
21988 if (largetoc_reg != NULL)
21990 emit_move_insn (largetoc_reg, hi);
21991 hi = largetoc_reg;
21993 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
21996 /* Issue assembly directives that create a reference to the given DWARF
21997 FRAME_TABLE_LABEL from the current function section. */
21998 void
21999 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22001 fprintf (asm_out_file, "\t.ref %s\n",
22002 (* targetm.strip_name_encoding) (frame_table_label));
22005 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22006 and the change to the stack pointer. */
22008 static void
22009 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22011 rtvec p;
22012 int i;
22013 rtx regs[3];
22015 i = 0;
22016 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22017 if (hard_frame_needed)
22018 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22019 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22020 || (hard_frame_needed
22021 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22022 regs[i++] = fp;
22024 p = rtvec_alloc (i);
22025 while (--i >= 0)
22027 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22028 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22031 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22034 /* Emit the correct code for allocating stack space, as insns.
22035 If COPY_REG, make sure a copy of the old frame is left there.
22036 The generated code may use hard register 0 as a temporary. */
22038 static void
22039 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22041 rtx insn;
22042 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22043 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22044 rtx todec = gen_int_mode (-size, Pmode);
22045 rtx par, set, mem;
22047 if (INTVAL (todec) != -size)
22049 warning (0, "stack frame too large");
22050 emit_insn (gen_trap ());
22051 return;
22054 if (crtl->limit_stack)
22056 if (REG_P (stack_limit_rtx)
22057 && REGNO (stack_limit_rtx) > 1
22058 && REGNO (stack_limit_rtx) <= 31)
22060 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22061 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22062 const0_rtx));
22064 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22065 && TARGET_32BIT
22066 && DEFAULT_ABI == ABI_V4)
22068 rtx toload = gen_rtx_CONST (VOIDmode,
22069 gen_rtx_PLUS (Pmode,
22070 stack_limit_rtx,
22071 GEN_INT (size)));
22073 emit_insn (gen_elf_high (tmp_reg, toload));
22074 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22075 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22076 const0_rtx));
22078 else
22079 warning (0, "stack limit expression is not supported");
22082 if (copy_reg)
22084 if (copy_off != 0)
22085 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22086 else
22087 emit_move_insn (copy_reg, stack_reg);
22090 if (size > 32767)
22092 /* Need a note here so that try_split doesn't get confused. */
22093 if (get_last_insn () == NULL_RTX)
22094 emit_note (NOTE_INSN_DELETED);
22095 insn = emit_move_insn (tmp_reg, todec);
22096 try_split (PATTERN (insn), insn, 0);
22097 todec = tmp_reg;
22100 insn = emit_insn (TARGET_32BIT
22101 ? gen_movsi_update_stack (stack_reg, stack_reg,
22102 todec, stack_reg)
22103 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22104 todec, stack_reg));
22105 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22106 it now and set the alias set/attributes. The above gen_*_update
22107 calls will generate a PARALLEL with the MEM set being the first
22108 operation. */
22109 par = PATTERN (insn);
22110 gcc_assert (GET_CODE (par) == PARALLEL);
22111 set = XVECEXP (par, 0, 0);
22112 gcc_assert (GET_CODE (set) == SET);
22113 mem = SET_DEST (set);
22114 gcc_assert (MEM_P (mem));
22115 MEM_NOTRAP_P (mem) = 1;
22116 set_mem_alias_set (mem, get_frame_alias_set ());
22118 RTX_FRAME_RELATED_P (insn) = 1;
22119 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22120 gen_rtx_SET (VOIDmode, stack_reg,
22121 gen_rtx_PLUS (Pmode, stack_reg,
22122 GEN_INT (-size))));
22125 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22127 #if PROBE_INTERVAL > 32768
22128 #error Cannot use indexed addressing mode for stack probing
22129 #endif
22131 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22132 inclusive. These are offsets from the current stack pointer. */
22134 static void
22135 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22137 /* See if we have a constant small number of probes to generate. If so,
22138 that's the easy case. */
22139 if (first + size <= 32768)
22141 HOST_WIDE_INT i;
22143 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22144 it exceeds SIZE. If only one probe is needed, this will not
22145 generate any code. Then probe at FIRST + SIZE. */
22146 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22147 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22148 -(first + i)));
22150 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22151 -(first + size)));
22154 /* Otherwise, do the same as above, but in a loop. Note that we must be
22155 extra careful with variables wrapping around because we might be at
22156 the very top (or the very bottom) of the address space and we have
22157 to be able to handle this case properly; in particular, we use an
22158 equality test for the loop condition. */
22159 else
22161 HOST_WIDE_INT rounded_size;
22162 rtx r12 = gen_rtx_REG (Pmode, 12);
22163 rtx r0 = gen_rtx_REG (Pmode, 0);
22165 /* Sanity check for the addressing mode we're going to use. */
22166 gcc_assert (first <= 32768);
22168 /* Step 1: round SIZE to the previous multiple of the interval. */
22170 rounded_size = size & -PROBE_INTERVAL;
22173 /* Step 2: compute initial and final value of the loop counter. */
22175 /* TEST_ADDR = SP + FIRST. */
22176 emit_insn (gen_rtx_SET (VOIDmode, r12,
22177 plus_constant (Pmode, stack_pointer_rtx,
22178 -first)));
22180 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22181 if (rounded_size > 32768)
22183 emit_move_insn (r0, GEN_INT (-rounded_size));
22184 emit_insn (gen_rtx_SET (VOIDmode, r0,
22185 gen_rtx_PLUS (Pmode, r12, r0)));
22187 else
22188 emit_insn (gen_rtx_SET (VOIDmode, r0,
22189 plus_constant (Pmode, r12, -rounded_size)));
22192 /* Step 3: the loop
22194 while (TEST_ADDR != LAST_ADDR)
22196 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22197 probe at TEST_ADDR
22200 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22201 until it is equal to ROUNDED_SIZE. */
22203 if (TARGET_64BIT)
22204 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22205 else
22206 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22209 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22210 that SIZE is equal to ROUNDED_SIZE. */
22212 if (size != rounded_size)
22213 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22217 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22218 absolute addresses. */
22220 const char *
22221 output_probe_stack_range (rtx reg1, rtx reg2)
22223 static int labelno = 0;
22224 char loop_lab[32], end_lab[32];
22225 rtx xops[2];
22227 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22228 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22230 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22232 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22233 xops[0] = reg1;
22234 xops[1] = reg2;
22235 if (TARGET_64BIT)
22236 output_asm_insn ("cmpd 0,%0,%1", xops);
22237 else
22238 output_asm_insn ("cmpw 0,%0,%1", xops);
22240 fputs ("\tbeq 0,", asm_out_file);
22241 assemble_name_raw (asm_out_file, end_lab);
22242 fputc ('\n', asm_out_file);
22244 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22245 xops[1] = GEN_INT (-PROBE_INTERVAL);
22246 output_asm_insn ("addi %0,%0,%1", xops);
22248 /* Probe at TEST_ADDR and branch. */
22249 xops[1] = gen_rtx_REG (Pmode, 0);
22250 output_asm_insn ("stw %1,0(%0)", xops);
22251 fprintf (asm_out_file, "\tb ");
22252 assemble_name_raw (asm_out_file, loop_lab);
22253 fputc ('\n', asm_out_file);
22255 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22257 return "";
22260 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22261 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22262 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22263 deduce these equivalences by itself so it wasn't necessary to hold
22264 its hand so much. Don't be tempted to always supply d2_f_d_e with
22265 the actual cfa register, ie. r31 when we are using a hard frame
22266 pointer. That fails when saving regs off r1, and sched moves the
22267 r31 setup past the reg saves. */
22269 static rtx
22270 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22271 rtx reg2, rtx rreg, rtx split_reg)
22273 rtx real, temp;
22275 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22277 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22278 int i;
22280 gcc_checking_assert (val == 0);
22281 real = PATTERN (insn);
22282 if (GET_CODE (real) == PARALLEL)
22283 for (i = 0; i < XVECLEN (real, 0); i++)
22284 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22286 rtx set = XVECEXP (real, 0, i);
22288 RTX_FRAME_RELATED_P (set) = 1;
22290 RTX_FRAME_RELATED_P (insn) = 1;
22291 return insn;
22294 /* copy_rtx will not make unique copies of registers, so we need to
22295 ensure we don't have unwanted sharing here. */
22296 if (reg == reg2)
22297 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22299 if (reg == rreg)
22300 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22302 real = copy_rtx (PATTERN (insn));
22304 if (reg2 != NULL_RTX)
22305 real = replace_rtx (real, reg2, rreg);
22307 if (REGNO (reg) == STACK_POINTER_REGNUM)
22308 gcc_checking_assert (val == 0);
22309 else
22310 real = replace_rtx (real, reg,
22311 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22312 STACK_POINTER_REGNUM),
22313 GEN_INT (val)));
22315 /* We expect that 'real' is either a SET or a PARALLEL containing
22316 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22317 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22319 if (GET_CODE (real) == SET)
22321 rtx set = real;
22323 temp = simplify_rtx (SET_SRC (set));
22324 if (temp)
22325 SET_SRC (set) = temp;
22326 temp = simplify_rtx (SET_DEST (set));
22327 if (temp)
22328 SET_DEST (set) = temp;
22329 if (GET_CODE (SET_DEST (set)) == MEM)
22331 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22332 if (temp)
22333 XEXP (SET_DEST (set), 0) = temp;
22336 else
22338 int i;
22340 gcc_assert (GET_CODE (real) == PARALLEL);
22341 for (i = 0; i < XVECLEN (real, 0); i++)
22342 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22344 rtx set = XVECEXP (real, 0, i);
22346 temp = simplify_rtx (SET_SRC (set));
22347 if (temp)
22348 SET_SRC (set) = temp;
22349 temp = simplify_rtx (SET_DEST (set));
22350 if (temp)
22351 SET_DEST (set) = temp;
22352 if (GET_CODE (SET_DEST (set)) == MEM)
22354 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22355 if (temp)
22356 XEXP (SET_DEST (set), 0) = temp;
22358 RTX_FRAME_RELATED_P (set) = 1;
22362 /* If a store insn has been split into multiple insns, the
22363 true source register is given by split_reg. */
22364 if (split_reg != NULL_RTX)
22365 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22367 RTX_FRAME_RELATED_P (insn) = 1;
22368 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22370 return insn;
22373 /* Returns an insn that has a vrsave set operation with the
22374 appropriate CLOBBERs. */
22376 static rtx
22377 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22379 int nclobs, i;
22380 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22381 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22383 clobs[0]
22384 = gen_rtx_SET (VOIDmode,
22385 vrsave,
22386 gen_rtx_UNSPEC_VOLATILE (SImode,
22387 gen_rtvec (2, reg, vrsave),
22388 UNSPECV_SET_VRSAVE));
22390 nclobs = 1;
22392 /* We need to clobber the registers in the mask so the scheduler
22393 does not move sets to VRSAVE before sets of AltiVec registers.
22395 However, if the function receives nonlocal gotos, reload will set
22396 all call saved registers live. We will end up with:
22398 (set (reg 999) (mem))
22399 (parallel [ (set (reg vrsave) (unspec blah))
22400 (clobber (reg 999))])
22402 The clobber will cause the store into reg 999 to be dead, and
22403 flow will attempt to delete an epilogue insn. In this case, we
22404 need an unspec use/set of the register. */
22406 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22407 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22409 if (!epiloguep || call_used_regs [i])
22410 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22411 gen_rtx_REG (V4SImode, i));
22412 else
22414 rtx reg = gen_rtx_REG (V4SImode, i);
22416 clobs[nclobs++]
22417 = gen_rtx_SET (VOIDmode,
22418 reg,
22419 gen_rtx_UNSPEC (V4SImode,
22420 gen_rtvec (1, reg), 27));
22424 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22426 for (i = 0; i < nclobs; ++i)
22427 XVECEXP (insn, 0, i) = clobs[i];
22429 return insn;
22432 static rtx
22433 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22435 rtx addr, mem;
22437 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22438 mem = gen_frame_mem (GET_MODE (reg), addr);
22439 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22442 static rtx
22443 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22445 return gen_frame_set (reg, frame_reg, offset, false);
22448 static rtx
22449 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22451 return gen_frame_set (reg, frame_reg, offset, true);
22454 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22455 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22457 static rtx
22458 emit_frame_save (rtx frame_reg, enum machine_mode mode,
22459 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22461 rtx reg, insn;
22463 /* Some cases that need register indexed addressing. */
22464 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22465 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22466 || (TARGET_E500_DOUBLE && mode == DFmode)
22467 || (TARGET_SPE_ABI
22468 && SPE_VECTOR_MODE (mode)
22469 && !SPE_CONST_OFFSET_OK (offset))));
22471 reg = gen_rtx_REG (mode, regno);
22472 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22473 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22474 NULL_RTX, NULL_RTX, NULL_RTX);
22477 /* Emit an offset memory reference suitable for a frame store, while
22478 converting to a valid addressing mode. */
22480 static rtx
22481 gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset)
22483 rtx int_rtx, offset_rtx;
22485 int_rtx = GEN_INT (offset);
22487 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22488 || (TARGET_E500_DOUBLE && mode == DFmode))
22490 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22491 emit_move_insn (offset_rtx, int_rtx);
22493 else
22494 offset_rtx = int_rtx;
22496 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22499 #ifndef TARGET_FIX_AND_CONTINUE
22500 #define TARGET_FIX_AND_CONTINUE 0
22501 #endif
22503 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22504 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22505 #define LAST_SAVRES_REGISTER 31
22506 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22508 enum {
22509 SAVRES_LR = 0x1,
22510 SAVRES_SAVE = 0x2,
22511 SAVRES_REG = 0x0c,
22512 SAVRES_GPR = 0,
22513 SAVRES_FPR = 4,
22514 SAVRES_VR = 8
22517 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22519 /* Temporary holding space for an out-of-line register save/restore
22520 routine name. */
22521 static char savres_routine_name[30];
22523 /* Return the name for an out-of-line register save/restore routine.
22524 We are saving/restoring GPRs if GPR is true. */
22526 static char *
22527 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22529 const char *prefix = "";
22530 const char *suffix = "";
22532 /* Different targets are supposed to define
22533 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22534 routine name could be defined with:
22536 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
22538 This is a nice idea in practice, but in reality, things are
22539 complicated in several ways:
22541 - ELF targets have save/restore routines for GPRs.
22543 - SPE targets use different prefixes for 32/64-bit registers, and
22544 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
22546 - PPC64 ELF targets have routines for save/restore of GPRs that
22547 differ in what they do with the link register, so having a set
22548 prefix doesn't work. (We only use one of the save routines at
22549 the moment, though.)
22551 - PPC32 elf targets have "exit" versions of the restore routines
22552 that restore the link register and can save some extra space.
22553 These require an extra suffix. (There are also "tail" versions
22554 of the restore routines and "GOT" versions of the save routines,
22555 but we don't generate those at present. Same problems apply,
22556 though.)
22558 We deal with all this by synthesizing our own prefix/suffix and
22559 using that for the simple sprintf call shown above. */
22560 if (TARGET_SPE)
22562 /* No floating point saves on the SPE. */
22563 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
22565 if ((sel & SAVRES_SAVE))
22566 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
22567 else
22568 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
22570 if ((sel & SAVRES_LR))
22571 suffix = "_x";
22573 else if (DEFAULT_ABI == ABI_V4)
22575 if (TARGET_64BIT)
22576 goto aix_names;
22578 if ((sel & SAVRES_REG) == SAVRES_GPR)
22579 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
22580 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22581 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
22582 else if ((sel & SAVRES_REG) == SAVRES_VR)
22583 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22584 else
22585 abort ();
22587 if ((sel & SAVRES_LR))
22588 suffix = "_x";
22590 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22592 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
22593 /* No out-of-line save/restore routines for GPRs on AIX. */
22594 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
22595 #endif
22597 aix_names:
22598 if ((sel & SAVRES_REG) == SAVRES_GPR)
22599 prefix = ((sel & SAVRES_SAVE)
22600 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
22601 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
22602 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22604 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
22605 if ((sel & SAVRES_LR))
22606 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
22607 else
22608 #endif
22610 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
22611 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
22614 else if ((sel & SAVRES_REG) == SAVRES_VR)
22615 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22616 else
22617 abort ();
22620 if (DEFAULT_ABI == ABI_DARWIN)
22622 /* The Darwin approach is (slightly) different, in order to be
22623 compatible with code generated by the system toolchain. There is a
22624 single symbol for the start of save sequence, and the code here
22625 embeds an offset into that code on the basis of the first register
22626 to be saved. */
22627 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
22628 if ((sel & SAVRES_REG) == SAVRES_GPR)
22629 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
22630 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
22631 (regno - 13) * 4, prefix, regno);
22632 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22633 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
22634 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
22635 else if ((sel & SAVRES_REG) == SAVRES_VR)
22636 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
22637 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
22638 else
22639 abort ();
22641 else
22642 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
22644 return savres_routine_name;
22647 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
22648 We are saving/restoring GPRs if GPR is true. */
22650 static rtx
22651 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
22653 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
22654 ? info->first_gp_reg_save
22655 : (sel & SAVRES_REG) == SAVRES_FPR
22656 ? info->first_fp_reg_save - 32
22657 : (sel & SAVRES_REG) == SAVRES_VR
22658 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
22659 : -1);
22660 rtx sym;
22661 int select = sel;
22663 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
22664 versions of the gpr routines. */
22665 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
22666 && info->spe_64bit_regs_used)
22667 select ^= SAVRES_FPR ^ SAVRES_GPR;
22669 /* Don't generate bogus routine names. */
22670 gcc_assert (FIRST_SAVRES_REGISTER <= regno
22671 && regno <= LAST_SAVRES_REGISTER
22672 && select >= 0 && select <= 12);
22674 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
22676 if (sym == NULL)
22678 char *name;
22680 name = rs6000_savres_routine_name (info, regno, sel);
22682 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
22683 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
22684 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
22687 return sym;
22690 /* Emit a sequence of insns, including a stack tie if needed, for
22691 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
22692 reset the stack pointer, but move the base of the frame into
22693 reg UPDT_REGNO for use by out-of-line register restore routines. */
22695 static rtx
22696 rs6000_emit_stack_reset (rs6000_stack_t *info,
22697 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
22698 unsigned updt_regno)
22700 rtx updt_reg_rtx;
22702 /* This blockage is needed so that sched doesn't decide to move
22703 the sp change before the register restores. */
22704 if (DEFAULT_ABI == ABI_V4
22705 || (TARGET_SPE_ABI
22706 && info->spe_64bit_regs_used != 0
22707 && info->first_gp_reg_save != 32))
22708 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
22710 /* If we are restoring registers out-of-line, we will be using the
22711 "exit" variants of the restore routines, which will reset the
22712 stack for us. But we do need to point updt_reg into the
22713 right place for those routines. */
22714 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
22716 if (frame_off != 0)
22717 return emit_insn (gen_add3_insn (updt_reg_rtx,
22718 frame_reg_rtx, GEN_INT (frame_off)));
22719 else if (REGNO (frame_reg_rtx) != updt_regno)
22720 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
22722 return NULL_RTX;
22725 /* Return the register number used as a pointer by out-of-line
22726 save/restore functions. */
22728 static inline unsigned
22729 ptr_regno_for_savres (int sel)
22731 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22732 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
22733 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
22736 /* Construct a parallel rtx describing the effect of a call to an
22737 out-of-line register save/restore routine, and emit the insn
22738 or jump_insn as appropriate. */
22740 static rtx
22741 rs6000_emit_savres_rtx (rs6000_stack_t *info,
22742 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
22743 enum machine_mode reg_mode, int sel)
22745 int i;
22746 int offset, start_reg, end_reg, n_regs, use_reg;
22747 int reg_size = GET_MODE_SIZE (reg_mode);
22748 rtx sym;
22749 rtvec p;
22750 rtx par, insn;
22752 offset = 0;
22753 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
22754 ? info->first_gp_reg_save
22755 : (sel & SAVRES_REG) == SAVRES_FPR
22756 ? info->first_fp_reg_save
22757 : (sel & SAVRES_REG) == SAVRES_VR
22758 ? info->first_altivec_reg_save
22759 : -1);
22760 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
22761 ? 32
22762 : (sel & SAVRES_REG) == SAVRES_FPR
22763 ? 64
22764 : (sel & SAVRES_REG) == SAVRES_VR
22765 ? LAST_ALTIVEC_REGNO + 1
22766 : -1);
22767 n_regs = end_reg - start_reg;
22768 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
22769 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
22770 + n_regs);
22772 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
22773 RTVEC_ELT (p, offset++) = ret_rtx;
22775 RTVEC_ELT (p, offset++)
22776 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
22778 sym = rs6000_savres_routine_sym (info, sel);
22779 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
22781 use_reg = ptr_regno_for_savres (sel);
22782 if ((sel & SAVRES_REG) == SAVRES_VR)
22784 /* Vector regs are saved/restored using [reg+reg] addressing. */
22785 RTVEC_ELT (p, offset++)
22786 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
22787 RTVEC_ELT (p, offset++)
22788 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
22790 else
22791 RTVEC_ELT (p, offset++)
22792 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
22794 for (i = 0; i < end_reg - start_reg; i++)
22795 RTVEC_ELT (p, i + offset)
22796 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
22797 frame_reg_rtx, save_area_offset + reg_size * i,
22798 (sel & SAVRES_SAVE) != 0);
22800 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
22801 RTVEC_ELT (p, i + offset)
22802 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
22804 par = gen_rtx_PARALLEL (VOIDmode, p);
22806 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
22808 insn = emit_jump_insn (par);
22809 JUMP_LABEL (insn) = ret_rtx;
22811 else
22812 insn = emit_insn (par);
22813 return insn;
22816 /* Emit code to store CR fields that need to be saved into REG. */
22818 static void
22819 rs6000_emit_move_from_cr (rtx reg)
22821 /* Only the ELFv2 ABI allows storing only selected fields. */
22822 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
22824 int i, cr_reg[8], count = 0;
22826 /* Collect CR fields that must be saved. */
22827 for (i = 0; i < 8; i++)
22828 if (save_reg_p (CR0_REGNO + i))
22829 cr_reg[count++] = i;
22831 /* If it's just a single one, use mfcrf. */
22832 if (count == 1)
22834 rtvec p = rtvec_alloc (1);
22835 rtvec r = rtvec_alloc (2);
22836 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
22837 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
22838 RTVEC_ELT (p, 0)
22839 = gen_rtx_SET (VOIDmode, reg,
22840 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
22842 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22843 return;
22846 /* ??? It might be better to handle count == 2 / 3 cases here
22847 as well, using logical operations to combine the values. */
22850 emit_insn (gen_movesi_from_cr (reg));
22853 /* Determine whether the gp REG is really used. */
22855 static bool
22856 rs6000_reg_live_or_pic_offset_p (int reg)
22858 /* If the function calls eh_return, claim used all the registers that would
22859 be checked for liveness otherwise. This is required for the PIC offset
22860 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
22861 register allocation purposes in this case. */
22863 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
22864 && (!call_used_regs[reg]
22865 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
22866 && !TARGET_SINGLE_PIC_BASE
22867 && TARGET_TOC && TARGET_MINIMAL_TOC)))
22868 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
22869 && !TARGET_SINGLE_PIC_BASE
22870 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
22871 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
22874 /* Emit function prologue as insns. */
22876 void
22877 rs6000_emit_prologue (void)
22879 rs6000_stack_t *info = rs6000_stack_info ();
22880 enum machine_mode reg_mode = Pmode;
22881 int reg_size = TARGET_32BIT ? 4 : 8;
22882 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22883 rtx frame_reg_rtx = sp_reg_rtx;
22884 unsigned int cr_save_regno;
22885 rtx cr_save_rtx = NULL_RTX;
22886 rtx insn;
22887 int strategy;
22888 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
22889 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
22890 && call_used_regs[STATIC_CHAIN_REGNUM]);
22891 /* Offset to top of frame for frame_reg and sp respectively. */
22892 HOST_WIDE_INT frame_off = 0;
22893 HOST_WIDE_INT sp_off = 0;
22895 #ifdef ENABLE_CHECKING
22896 /* Track and check usage of r0, r11, r12. */
22897 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
22898 #define START_USE(R) do \
22900 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
22901 reg_inuse |= 1 << (R); \
22902 } while (0)
22903 #define END_USE(R) do \
22905 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
22906 reg_inuse &= ~(1 << (R)); \
22907 } while (0)
22908 #define NOT_INUSE(R) do \
22910 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
22911 } while (0)
22912 #else
22913 #define START_USE(R) do {} while (0)
22914 #define END_USE(R) do {} while (0)
22915 #define NOT_INUSE(R) do {} while (0)
22916 #endif
22918 if (DEFAULT_ABI == ABI_ELFv2)
22920 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
22922 /* With -mminimal-toc we may generate an extra use of r2 below. */
22923 if (!TARGET_SINGLE_PIC_BASE
22924 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
22925 cfun->machine->r2_setup_needed = true;
22929 if (flag_stack_usage_info)
22930 current_function_static_stack_size = info->total_size;
22932 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
22934 HOST_WIDE_INT size = info->total_size;
22936 if (crtl->is_leaf && !cfun->calls_alloca)
22938 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
22939 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
22940 size - STACK_CHECK_PROTECT);
22942 else if (size > 0)
22943 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
22946 if (TARGET_FIX_AND_CONTINUE)
22948 /* gdb on darwin arranges to forward a function from the old
22949 address by modifying the first 5 instructions of the function
22950 to branch to the overriding function. This is necessary to
22951 permit function pointers that point to the old function to
22952 actually forward to the new function. */
22953 emit_insn (gen_nop ());
22954 emit_insn (gen_nop ());
22955 emit_insn (gen_nop ());
22956 emit_insn (gen_nop ());
22957 emit_insn (gen_nop ());
22960 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
22962 reg_mode = V2SImode;
22963 reg_size = 8;
22966 /* Handle world saves specially here. */
22967 if (WORLD_SAVE_P (info))
22969 int i, j, sz;
22970 rtx treg;
22971 rtvec p;
22972 rtx reg0;
22974 /* save_world expects lr in r0. */
22975 reg0 = gen_rtx_REG (Pmode, 0);
22976 if (info->lr_save_p)
22978 insn = emit_move_insn (reg0,
22979 gen_rtx_REG (Pmode, LR_REGNO));
22980 RTX_FRAME_RELATED_P (insn) = 1;
22983 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
22984 assumptions about the offsets of various bits of the stack
22985 frame. */
22986 gcc_assert (info->gp_save_offset == -220
22987 && info->fp_save_offset == -144
22988 && info->lr_save_offset == 8
22989 && info->cr_save_offset == 4
22990 && info->push_p
22991 && info->lr_save_p
22992 && (!crtl->calls_eh_return
22993 || info->ehrd_offset == -432)
22994 && info->vrsave_save_offset == -224
22995 && info->altivec_save_offset == -416);
22997 treg = gen_rtx_REG (SImode, 11);
22998 emit_move_insn (treg, GEN_INT (-info->total_size));
23000 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23001 in R11. It also clobbers R12, so beware! */
23003 /* Preserve CR2 for save_world prologues */
23004 sz = 5;
23005 sz += 32 - info->first_gp_reg_save;
23006 sz += 64 - info->first_fp_reg_save;
23007 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23008 p = rtvec_alloc (sz);
23009 j = 0;
23010 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23011 gen_rtx_REG (SImode,
23012 LR_REGNO));
23013 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23014 gen_rtx_SYMBOL_REF (Pmode,
23015 "*save_world"));
23016 /* We do floats first so that the instruction pattern matches
23017 properly. */
23018 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23019 RTVEC_ELT (p, j++)
23020 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23021 ? DFmode : SFmode,
23022 info->first_fp_reg_save + i),
23023 frame_reg_rtx,
23024 info->fp_save_offset + frame_off + 8 * i);
23025 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23026 RTVEC_ELT (p, j++)
23027 = gen_frame_store (gen_rtx_REG (V4SImode,
23028 info->first_altivec_reg_save + i),
23029 frame_reg_rtx,
23030 info->altivec_save_offset + frame_off + 16 * i);
23031 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23032 RTVEC_ELT (p, j++)
23033 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23034 frame_reg_rtx,
23035 info->gp_save_offset + frame_off + reg_size * i);
23037 /* CR register traditionally saved as CR2. */
23038 RTVEC_ELT (p, j++)
23039 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23040 frame_reg_rtx, info->cr_save_offset + frame_off);
23041 /* Explain about use of R0. */
23042 if (info->lr_save_p)
23043 RTVEC_ELT (p, j++)
23044 = gen_frame_store (reg0,
23045 frame_reg_rtx, info->lr_save_offset + frame_off);
23046 /* Explain what happens to the stack pointer. */
23048 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23049 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23052 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23053 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23054 treg, GEN_INT (-info->total_size), NULL_RTX);
23055 sp_off = frame_off = info->total_size;
23058 strategy = info->savres_strategy;
23060 /* For V.4, update stack before we do any saving and set back pointer. */
23061 if (! WORLD_SAVE_P (info)
23062 && info->push_p
23063 && (DEFAULT_ABI == ABI_V4
23064 || crtl->calls_eh_return))
23066 bool need_r11 = (TARGET_SPE
23067 ? (!(strategy & SAVE_INLINE_GPRS)
23068 && info->spe_64bit_regs_used == 0)
23069 : (!(strategy & SAVE_INLINE_FPRS)
23070 || !(strategy & SAVE_INLINE_GPRS)
23071 || !(strategy & SAVE_INLINE_VRS)));
23072 int ptr_regno = -1;
23073 rtx ptr_reg = NULL_RTX;
23074 int ptr_off = 0;
23076 if (info->total_size < 32767)
23077 frame_off = info->total_size;
23078 else if (need_r11)
23079 ptr_regno = 11;
23080 else if (info->cr_save_p
23081 || info->lr_save_p
23082 || info->first_fp_reg_save < 64
23083 || info->first_gp_reg_save < 32
23084 || info->altivec_size != 0
23085 || info->vrsave_mask != 0
23086 || crtl->calls_eh_return)
23087 ptr_regno = 12;
23088 else
23090 /* The prologue won't be saving any regs so there is no need
23091 to set up a frame register to access any frame save area.
23092 We also won't be using frame_off anywhere below, but set
23093 the correct value anyway to protect against future
23094 changes to this function. */
23095 frame_off = info->total_size;
23097 if (ptr_regno != -1)
23099 /* Set up the frame offset to that needed by the first
23100 out-of-line save function. */
23101 START_USE (ptr_regno);
23102 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23103 frame_reg_rtx = ptr_reg;
23104 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23105 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23106 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23107 ptr_off = info->gp_save_offset + info->gp_size;
23108 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23109 ptr_off = info->altivec_save_offset + info->altivec_size;
23110 frame_off = -ptr_off;
23112 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23113 sp_off = info->total_size;
23114 if (frame_reg_rtx != sp_reg_rtx)
23115 rs6000_emit_stack_tie (frame_reg_rtx, false);
23118 /* If we use the link register, get it into r0. */
23119 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23121 rtx addr, reg, mem;
23123 reg = gen_rtx_REG (Pmode, 0);
23124 START_USE (0);
23125 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23126 RTX_FRAME_RELATED_P (insn) = 1;
23128 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23129 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23131 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23132 GEN_INT (info->lr_save_offset + frame_off));
23133 mem = gen_rtx_MEM (Pmode, addr);
23134 /* This should not be of rs6000_sr_alias_set, because of
23135 __builtin_return_address. */
23137 insn = emit_move_insn (mem, reg);
23138 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23139 NULL_RTX, NULL_RTX, NULL_RTX);
23140 END_USE (0);
23144 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23145 r12 will be needed by out-of-line gpr restore. */
23146 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23147 && !(strategy & (SAVE_INLINE_GPRS
23148 | SAVE_NOINLINE_GPRS_SAVES_LR))
23149 ? 11 : 12);
23150 if (!WORLD_SAVE_P (info)
23151 && info->cr_save_p
23152 && REGNO (frame_reg_rtx) != cr_save_regno
23153 && !(using_static_chain_p && cr_save_regno == 11))
23155 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23156 START_USE (cr_save_regno);
23157 rs6000_emit_move_from_cr (cr_save_rtx);
23160 /* Do any required saving of fpr's. If only one or two to save, do
23161 it ourselves. Otherwise, call function. */
23162 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23164 int i;
23165 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23166 if (save_reg_p (info->first_fp_reg_save + i))
23167 emit_frame_save (frame_reg_rtx,
23168 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23169 ? DFmode : SFmode),
23170 info->first_fp_reg_save + i,
23171 info->fp_save_offset + frame_off + 8 * i,
23172 sp_off - frame_off);
23174 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23176 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23177 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23178 unsigned ptr_regno = ptr_regno_for_savres (sel);
23179 rtx ptr_reg = frame_reg_rtx;
23181 if (REGNO (frame_reg_rtx) == ptr_regno)
23182 gcc_checking_assert (frame_off == 0);
23183 else
23185 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23186 NOT_INUSE (ptr_regno);
23187 emit_insn (gen_add3_insn (ptr_reg,
23188 frame_reg_rtx, GEN_INT (frame_off)));
23190 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23191 info->fp_save_offset,
23192 info->lr_save_offset,
23193 DFmode, sel);
23194 rs6000_frame_related (insn, ptr_reg, sp_off,
23195 NULL_RTX, NULL_RTX, NULL_RTX);
23196 if (lr)
23197 END_USE (0);
23200 /* Save GPRs. This is done as a PARALLEL if we are using
23201 the store-multiple instructions. */
23202 if (!WORLD_SAVE_P (info)
23203 && TARGET_SPE_ABI
23204 && info->spe_64bit_regs_used != 0
23205 && info->first_gp_reg_save != 32)
23207 int i;
23208 rtx spe_save_area_ptr;
23209 HOST_WIDE_INT save_off;
23210 int ool_adjust = 0;
23212 /* Determine whether we can address all of the registers that need
23213 to be saved with an offset from frame_reg_rtx that fits in
23214 the small const field for SPE memory instructions. */
23215 int spe_regs_addressable
23216 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23217 + reg_size * (32 - info->first_gp_reg_save - 1))
23218 && (strategy & SAVE_INLINE_GPRS));
23220 if (spe_regs_addressable)
23222 spe_save_area_ptr = frame_reg_rtx;
23223 save_off = frame_off;
23225 else
23227 /* Make r11 point to the start of the SPE save area. We need
23228 to be careful here if r11 is holding the static chain. If
23229 it is, then temporarily save it in r0. */
23230 HOST_WIDE_INT offset;
23232 if (!(strategy & SAVE_INLINE_GPRS))
23233 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23234 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23235 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23236 save_off = frame_off - offset;
23238 if (using_static_chain_p)
23240 rtx r0 = gen_rtx_REG (Pmode, 0);
23242 START_USE (0);
23243 gcc_assert (info->first_gp_reg_save > 11);
23245 emit_move_insn (r0, spe_save_area_ptr);
23247 else if (REGNO (frame_reg_rtx) != 11)
23248 START_USE (11);
23250 emit_insn (gen_addsi3 (spe_save_area_ptr,
23251 frame_reg_rtx, GEN_INT (offset)));
23252 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23253 frame_off = -info->spe_gp_save_offset + ool_adjust;
23256 if ((strategy & SAVE_INLINE_GPRS))
23258 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23259 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23260 emit_frame_save (spe_save_area_ptr, reg_mode,
23261 info->first_gp_reg_save + i,
23262 (info->spe_gp_save_offset + save_off
23263 + reg_size * i),
23264 sp_off - save_off);
23266 else
23268 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23269 info->spe_gp_save_offset + save_off,
23270 0, reg_mode,
23271 SAVRES_SAVE | SAVRES_GPR);
23273 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23274 NULL_RTX, NULL_RTX, NULL_RTX);
23277 /* Move the static chain pointer back. */
23278 if (!spe_regs_addressable)
23280 if (using_static_chain_p)
23282 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23283 END_USE (0);
23285 else if (REGNO (frame_reg_rtx) != 11)
23286 END_USE (11);
23289 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23291 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23292 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23293 unsigned ptr_regno = ptr_regno_for_savres (sel);
23294 rtx ptr_reg = frame_reg_rtx;
23295 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23296 int end_save = info->gp_save_offset + info->gp_size;
23297 int ptr_off;
23299 if (!ptr_set_up)
23300 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23302 /* Need to adjust r11 (r12) if we saved any FPRs. */
23303 if (end_save + frame_off != 0)
23305 rtx offset = GEN_INT (end_save + frame_off);
23307 if (ptr_set_up)
23308 frame_off = -end_save;
23309 else
23310 NOT_INUSE (ptr_regno);
23311 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23313 else if (!ptr_set_up)
23315 NOT_INUSE (ptr_regno);
23316 emit_move_insn (ptr_reg, frame_reg_rtx);
23318 ptr_off = -end_save;
23319 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23320 info->gp_save_offset + ptr_off,
23321 info->lr_save_offset + ptr_off,
23322 reg_mode, sel);
23323 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23324 NULL_RTX, NULL_RTX, NULL_RTX);
23325 if (lr)
23326 END_USE (0);
23328 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23330 rtvec p;
23331 int i;
23332 p = rtvec_alloc (32 - info->first_gp_reg_save);
23333 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23334 RTVEC_ELT (p, i)
23335 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23336 frame_reg_rtx,
23337 info->gp_save_offset + frame_off + reg_size * i);
23338 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23339 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23340 NULL_RTX, NULL_RTX, NULL_RTX);
23342 else if (!WORLD_SAVE_P (info))
23344 int i;
23345 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23346 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23347 emit_frame_save (frame_reg_rtx, reg_mode,
23348 info->first_gp_reg_save + i,
23349 info->gp_save_offset + frame_off + reg_size * i,
23350 sp_off - frame_off);
23353 if (crtl->calls_eh_return)
23355 unsigned int i;
23356 rtvec p;
23358 for (i = 0; ; ++i)
23360 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23361 if (regno == INVALID_REGNUM)
23362 break;
23365 p = rtvec_alloc (i);
23367 for (i = 0; ; ++i)
23369 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23370 if (regno == INVALID_REGNUM)
23371 break;
23373 insn
23374 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23375 sp_reg_rtx,
23376 info->ehrd_offset + sp_off + reg_size * (int) i);
23377 RTVEC_ELT (p, i) = insn;
23378 RTX_FRAME_RELATED_P (insn) = 1;
23381 insn = emit_insn (gen_blockage ());
23382 RTX_FRAME_RELATED_P (insn) = 1;
23383 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23386 /* In AIX ABI we need to make sure r2 is really saved. */
23387 if (TARGET_AIX && crtl->calls_eh_return)
23389 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23390 rtx save_insn, join_insn, note;
23391 long toc_restore_insn;
23393 tmp_reg = gen_rtx_REG (Pmode, 11);
23394 tmp_reg_si = gen_rtx_REG (SImode, 11);
23395 if (using_static_chain_p)
23397 START_USE (0);
23398 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23400 else
23401 START_USE (11);
23402 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23403 /* Peek at instruction to which this function returns. If it's
23404 restoring r2, then we know we've already saved r2. We can't
23405 unconditionally save r2 because the value we have will already
23406 be updated if we arrived at this function via a plt call or
23407 toc adjusting stub. */
23408 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23409 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23410 + RS6000_TOC_SAVE_SLOT);
23411 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23412 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23413 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23414 validate_condition_mode (EQ, CCUNSmode);
23415 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23416 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23417 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23418 toc_save_done = gen_label_rtx ();
23419 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23420 gen_rtx_EQ (VOIDmode, compare_result,
23421 const0_rtx),
23422 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23423 pc_rtx);
23424 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23425 JUMP_LABEL (jump) = toc_save_done;
23426 LABEL_NUSES (toc_save_done) += 1;
23428 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23429 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23430 sp_off - frame_off);
23432 emit_label (toc_save_done);
23434 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23435 have a CFG that has different saves along different paths.
23436 Move the note to a dummy blockage insn, which describes that
23437 R2 is unconditionally saved after the label. */
23438 /* ??? An alternate representation might be a special insn pattern
23439 containing both the branch and the store. That might let the
23440 code that minimizes the number of DW_CFA_advance opcodes better
23441 freedom in placing the annotations. */
23442 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23443 if (note)
23444 remove_note (save_insn, note);
23445 else
23446 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23447 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23448 RTX_FRAME_RELATED_P (save_insn) = 0;
23450 join_insn = emit_insn (gen_blockage ());
23451 REG_NOTES (join_insn) = note;
23452 RTX_FRAME_RELATED_P (join_insn) = 1;
23454 if (using_static_chain_p)
23456 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23457 END_USE (0);
23459 else
23460 END_USE (11);
23463 /* Save CR if we use any that must be preserved. */
23464 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23466 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23467 GEN_INT (info->cr_save_offset + frame_off));
23468 rtx mem = gen_frame_mem (SImode, addr);
23470 /* If we didn't copy cr before, do so now using r0. */
23471 if (cr_save_rtx == NULL_RTX)
23473 START_USE (0);
23474 cr_save_rtx = gen_rtx_REG (SImode, 0);
23475 rs6000_emit_move_from_cr (cr_save_rtx);
23478 /* Saving CR requires a two-instruction sequence: one instruction
23479 to move the CR to a general-purpose register, and a second
23480 instruction that stores the GPR to memory.
23482 We do not emit any DWARF CFI records for the first of these,
23483 because we cannot properly represent the fact that CR is saved in
23484 a register. One reason is that we cannot express that multiple
23485 CR fields are saved; another reason is that on 64-bit, the size
23486 of the CR register in DWARF (4 bytes) differs from the size of
23487 a general-purpose register.
23489 This means if any intervening instruction were to clobber one of
23490 the call-saved CR fields, we'd have incorrect CFI. To prevent
23491 this from happening, we mark the store to memory as a use of
23492 those CR fields, which prevents any such instruction from being
23493 scheduled in between the two instructions. */
23494 rtx crsave_v[9];
23495 int n_crsave = 0;
23496 int i;
23498 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23499 for (i = 0; i < 8; i++)
23500 if (save_reg_p (CR0_REGNO + i))
23501 crsave_v[n_crsave++]
23502 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23504 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23505 gen_rtvec_v (n_crsave, crsave_v)));
23506 END_USE (REGNO (cr_save_rtx));
23508 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23509 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23510 so we need to construct a frame expression manually. */
23511 RTX_FRAME_RELATED_P (insn) = 1;
23513 /* Update address to be stack-pointer relative, like
23514 rs6000_frame_related would do. */
23515 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23516 GEN_INT (info->cr_save_offset + sp_off));
23517 mem = gen_frame_mem (SImode, addr);
23519 if (DEFAULT_ABI == ABI_ELFv2)
23521 /* In the ELFv2 ABI we generate separate CFI records for each
23522 CR field that was actually saved. They all point to the
23523 same 32-bit stack slot. */
23524 rtx crframe[8];
23525 int n_crframe = 0;
23527 for (i = 0; i < 8; i++)
23528 if (save_reg_p (CR0_REGNO + i))
23530 crframe[n_crframe]
23531 = gen_rtx_SET (VOIDmode, mem,
23532 gen_rtx_REG (SImode, CR0_REGNO + i));
23534 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23535 n_crframe++;
23538 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23539 gen_rtx_PARALLEL (VOIDmode,
23540 gen_rtvec_v (n_crframe, crframe)));
23542 else
23544 /* In other ABIs, by convention, we use a single CR regnum to
23545 represent the fact that all call-saved CR fields are saved.
23546 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
23547 rtx set = gen_rtx_SET (VOIDmode, mem,
23548 gen_rtx_REG (SImode, CR2_REGNO));
23549 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
23553 /* In the ELFv2 ABI we need to save all call-saved CR fields into
23554 *separate* slots if the routine calls __builtin_eh_return, so
23555 that they can be independently restored by the unwinder. */
23556 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23558 int i, cr_off = info->ehcr_offset;
23559 rtx crsave;
23561 /* ??? We might get better performance by using multiple mfocrf
23562 instructions. */
23563 crsave = gen_rtx_REG (SImode, 0);
23564 emit_insn (gen_movesi_from_cr (crsave));
23566 for (i = 0; i < 8; i++)
23567 if (!call_used_regs[CR0_REGNO + i])
23569 rtvec p = rtvec_alloc (2);
23570 RTVEC_ELT (p, 0)
23571 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
23572 RTVEC_ELT (p, 1)
23573 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23575 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23577 RTX_FRAME_RELATED_P (insn) = 1;
23578 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23579 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
23580 sp_reg_rtx, cr_off + sp_off));
23582 cr_off += reg_size;
23586 /* Update stack and set back pointer unless this is V.4,
23587 for which it was done previously. */
23588 if (!WORLD_SAVE_P (info) && info->push_p
23589 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
23591 rtx ptr_reg = NULL;
23592 int ptr_off = 0;
23594 /* If saving altivec regs we need to be able to address all save
23595 locations using a 16-bit offset. */
23596 if ((strategy & SAVE_INLINE_VRS) == 0
23597 || (info->altivec_size != 0
23598 && (info->altivec_save_offset + info->altivec_size - 16
23599 + info->total_size - frame_off) > 32767)
23600 || (info->vrsave_size != 0
23601 && (info->vrsave_save_offset
23602 + info->total_size - frame_off) > 32767))
23604 int sel = SAVRES_SAVE | SAVRES_VR;
23605 unsigned ptr_regno = ptr_regno_for_savres (sel);
23607 if (using_static_chain_p
23608 && ptr_regno == STATIC_CHAIN_REGNUM)
23609 ptr_regno = 12;
23610 if (REGNO (frame_reg_rtx) != ptr_regno)
23611 START_USE (ptr_regno);
23612 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23613 frame_reg_rtx = ptr_reg;
23614 ptr_off = info->altivec_save_offset + info->altivec_size;
23615 frame_off = -ptr_off;
23617 else if (REGNO (frame_reg_rtx) == 1)
23618 frame_off = info->total_size;
23619 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23620 sp_off = info->total_size;
23621 if (frame_reg_rtx != sp_reg_rtx)
23622 rs6000_emit_stack_tie (frame_reg_rtx, false);
23625 /* Set frame pointer, if needed. */
23626 if (frame_pointer_needed)
23628 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
23629 sp_reg_rtx);
23630 RTX_FRAME_RELATED_P (insn) = 1;
23633 /* Save AltiVec registers if needed. Save here because the red zone does
23634 not always include AltiVec registers. */
23635 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
23636 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
23638 int end_save = info->altivec_save_offset + info->altivec_size;
23639 int ptr_off;
23640 /* Oddly, the vector save/restore functions point r0 at the end
23641 of the save area, then use r11 or r12 to load offsets for
23642 [reg+reg] addressing. */
23643 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
23644 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
23645 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
23647 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
23648 NOT_INUSE (0);
23649 if (end_save + frame_off != 0)
23651 rtx offset = GEN_INT (end_save + frame_off);
23653 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23655 else
23656 emit_move_insn (ptr_reg, frame_reg_rtx);
23658 ptr_off = -end_save;
23659 insn = rs6000_emit_savres_rtx (info, scratch_reg,
23660 info->altivec_save_offset + ptr_off,
23661 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
23662 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
23663 NULL_RTX, NULL_RTX, NULL_RTX);
23664 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
23666 /* The oddity mentioned above clobbered our frame reg. */
23667 emit_move_insn (frame_reg_rtx, ptr_reg);
23668 frame_off = ptr_off;
23671 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
23672 && info->altivec_size != 0)
23674 int i;
23676 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
23677 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
23679 rtx areg, savereg, mem, split_reg;
23680 int offset;
23682 offset = (info->altivec_save_offset + frame_off
23683 + 16 * (i - info->first_altivec_reg_save));
23685 savereg = gen_rtx_REG (V4SImode, i);
23687 NOT_INUSE (0);
23688 areg = gen_rtx_REG (Pmode, 0);
23689 emit_move_insn (areg, GEN_INT (offset));
23691 /* AltiVec addressing mode is [reg+reg]. */
23692 mem = gen_frame_mem (V4SImode,
23693 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
23695 insn = emit_move_insn (mem, savereg);
23697 /* When we split a VSX store into two insns, we need to make
23698 sure the DWARF info knows which register we are storing.
23699 Pass it in to be used on the appropriate note. */
23700 if (!BYTES_BIG_ENDIAN
23701 && GET_CODE (PATTERN (insn)) == SET
23702 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
23703 split_reg = savereg;
23704 else
23705 split_reg = NULL_RTX;
23707 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23708 areg, GEN_INT (offset), split_reg);
23712 /* VRSAVE is a bit vector representing which AltiVec registers
23713 are used. The OS uses this to determine which vector
23714 registers to save on a context switch. We need to save
23715 VRSAVE on the stack frame, add whatever AltiVec registers we
23716 used in this function, and do the corresponding magic in the
23717 epilogue. */
23719 if (!WORLD_SAVE_P (info)
23720 && TARGET_ALTIVEC
23721 && TARGET_ALTIVEC_VRSAVE
23722 && info->vrsave_mask != 0)
23724 rtx reg, vrsave;
23725 int offset;
23726 int save_regno;
23728 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
23729 be using r12 as frame_reg_rtx and r11 as the static chain
23730 pointer for nested functions. */
23731 save_regno = 12;
23732 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23733 && !using_static_chain_p)
23734 save_regno = 11;
23735 else if (REGNO (frame_reg_rtx) == 12)
23737 save_regno = 11;
23738 if (using_static_chain_p)
23739 save_regno = 0;
23742 NOT_INUSE (save_regno);
23743 reg = gen_rtx_REG (SImode, save_regno);
23744 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
23745 if (TARGET_MACHO)
23746 emit_insn (gen_get_vrsave_internal (reg));
23747 else
23748 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
23750 /* Save VRSAVE. */
23751 offset = info->vrsave_save_offset + frame_off;
23752 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
23754 /* Include the registers in the mask. */
23755 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
23757 insn = emit_insn (generate_set_vrsave (reg, info, 0));
23760 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
23761 if (!TARGET_SINGLE_PIC_BASE
23762 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23763 || (DEFAULT_ABI == ABI_V4
23764 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
23765 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
23767 /* If emit_load_toc_table will use the link register, we need to save
23768 it. We use R12 for this purpose because emit_load_toc_table
23769 can use register 0. This allows us to use a plain 'blr' to return
23770 from the procedure more often. */
23771 int save_LR_around_toc_setup = (TARGET_ELF
23772 && DEFAULT_ABI == ABI_V4
23773 && flag_pic
23774 && ! info->lr_save_p
23775 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
23776 if (save_LR_around_toc_setup)
23778 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
23779 rtx tmp = gen_rtx_REG (Pmode, 12);
23781 insn = emit_move_insn (tmp, lr);
23782 RTX_FRAME_RELATED_P (insn) = 1;
23784 rs6000_emit_load_toc_table (TRUE);
23786 insn = emit_move_insn (lr, tmp);
23787 add_reg_note (insn, REG_CFA_RESTORE, lr);
23788 RTX_FRAME_RELATED_P (insn) = 1;
23790 else
23791 rs6000_emit_load_toc_table (TRUE);
23794 #if TARGET_MACHO
23795 if (!TARGET_SINGLE_PIC_BASE
23796 && DEFAULT_ABI == ABI_DARWIN
23797 && flag_pic && crtl->uses_pic_offset_table)
23799 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
23800 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
23802 /* Save and restore LR locally around this call (in R0). */
23803 if (!info->lr_save_p)
23804 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
23806 emit_insn (gen_load_macho_picbase (src));
23808 emit_move_insn (gen_rtx_REG (Pmode,
23809 RS6000_PIC_OFFSET_TABLE_REGNUM),
23810 lr);
23812 if (!info->lr_save_p)
23813 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
23815 #endif
23817 /* If we need to, save the TOC register after doing the stack setup.
23818 Do not emit eh frame info for this save. The unwinder wants info,
23819 conceptually attached to instructions in this function, about
23820 register values in the caller of this function. This R2 may have
23821 already been changed from the value in the caller.
23822 We don't attempt to write accurate DWARF EH frame info for R2
23823 because code emitted by gcc for a (non-pointer) function call
23824 doesn't save and restore R2. Instead, R2 is managed out-of-line
23825 by a linker generated plt call stub when the function resides in
23826 a shared library. This behaviour is costly to describe in DWARF,
23827 both in terms of the size of DWARF info and the time taken in the
23828 unwinder to interpret it. R2 changes, apart from the
23829 calls_eh_return case earlier in this function, are handled by
23830 linux-unwind.h frob_update_context. */
23831 if (rs6000_save_toc_in_prologue_p ())
23833 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
23834 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
23838 /* Write function prologue. */
23840 static void
23841 rs6000_output_function_prologue (FILE *file,
23842 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
23844 rs6000_stack_t *info = rs6000_stack_info ();
23846 if (TARGET_DEBUG_STACK)
23847 debug_stack_info (info);
23849 /* Write .extern for any function we will call to save and restore
23850 fp values. */
23851 if (info->first_fp_reg_save < 64
23852 && !TARGET_MACHO
23853 && !TARGET_ELF)
23855 char *name;
23856 int regno = info->first_fp_reg_save - 32;
23858 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
23860 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23861 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23862 name = rs6000_savres_routine_name (info, regno, sel);
23863 fprintf (file, "\t.extern %s\n", name);
23865 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
23867 bool lr = (info->savres_strategy
23868 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
23869 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
23870 name = rs6000_savres_routine_name (info, regno, sel);
23871 fprintf (file, "\t.extern %s\n", name);
23875 /* ELFv2 ABI r2 setup code and local entry point. This must follow
23876 immediately after the global entry point label. */
23877 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
23879 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23881 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
23882 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
23884 fputs ("\t.localentry\t", file);
23885 assemble_name (file, name);
23886 fputs (",.-", file);
23887 assemble_name (file, name);
23888 fputs ("\n", file);
23891 /* Output -mprofile-kernel code. This needs to be done here instead of
23892 in output_function_profile since it must go after the ELFv2 ABI
23893 local entry point. */
23894 if (TARGET_PROFILE_KERNEL && crtl->profile)
23896 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23897 gcc_assert (!TARGET_32BIT);
23899 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
23900 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
23902 /* In the ELFv2 ABI we have no compiler stack word. It must be
23903 the resposibility of _mcount to preserve the static chain
23904 register if required. */
23905 if (DEFAULT_ABI != ABI_ELFv2
23906 && cfun->static_chain_decl != NULL)
23908 asm_fprintf (file, "\tstd %s,24(%s)\n",
23909 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
23910 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
23911 asm_fprintf (file, "\tld %s,24(%s)\n",
23912 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
23914 else
23915 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
23918 rs6000_pic_labelno++;
23921 /* Non-zero if vmx regs are restored before the frame pop, zero if
23922 we restore after the pop when possible. */
23923 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
23925 /* Restoring cr is a two step process: loading a reg from the frame
23926 save, then moving the reg to cr. For ABI_V4 we must let the
23927 unwinder know that the stack location is no longer valid at or
23928 before the stack deallocation, but we can't emit a cfa_restore for
23929 cr at the stack deallocation like we do for other registers.
23930 The trouble is that it is possible for the move to cr to be
23931 scheduled after the stack deallocation. So say exactly where cr
23932 is located on each of the two insns. */
23934 static rtx
23935 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
23937 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
23938 rtx reg = gen_rtx_REG (SImode, regno);
23939 rtx insn = emit_move_insn (reg, mem);
23941 if (!exit_func && DEFAULT_ABI == ABI_V4)
23943 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
23944 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
23946 add_reg_note (insn, REG_CFA_REGISTER, set);
23947 RTX_FRAME_RELATED_P (insn) = 1;
23949 return reg;
23952 /* Reload CR from REG. */
23954 static void
23955 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
23957 int count = 0;
23958 int i;
23960 if (using_mfcr_multiple)
23962 for (i = 0; i < 8; i++)
23963 if (save_reg_p (CR0_REGNO + i))
23964 count++;
23965 gcc_assert (count);
23968 if (using_mfcr_multiple && count > 1)
23970 rtx insn;
23971 rtvec p;
23972 int ndx;
23974 p = rtvec_alloc (count);
23976 ndx = 0;
23977 for (i = 0; i < 8; i++)
23978 if (save_reg_p (CR0_REGNO + i))
23980 rtvec r = rtvec_alloc (2);
23981 RTVEC_ELT (r, 0) = reg;
23982 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
23983 RTVEC_ELT (p, ndx) =
23984 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
23985 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
23986 ndx++;
23988 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23989 gcc_assert (ndx == count);
23991 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
23992 CR field separately. */
23993 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
23995 for (i = 0; i < 8; i++)
23996 if (save_reg_p (CR0_REGNO + i))
23997 add_reg_note (insn, REG_CFA_RESTORE,
23998 gen_rtx_REG (SImode, CR0_REGNO + i));
24000 RTX_FRAME_RELATED_P (insn) = 1;
24003 else
24004 for (i = 0; i < 8; i++)
24005 if (save_reg_p (CR0_REGNO + i))
24007 rtx insn = emit_insn (gen_movsi_to_cr_one
24008 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24010 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24011 CR field separately, attached to the insn that in fact
24012 restores this particular CR field. */
24013 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24015 add_reg_note (insn, REG_CFA_RESTORE,
24016 gen_rtx_REG (SImode, CR0_REGNO + i));
24018 RTX_FRAME_RELATED_P (insn) = 1;
24022 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24023 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24024 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24026 rtx insn = get_last_insn ();
24027 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24029 add_reg_note (insn, REG_CFA_RESTORE, cr);
24030 RTX_FRAME_RELATED_P (insn) = 1;
24034 /* Like cr, the move to lr instruction can be scheduled after the
24035 stack deallocation, but unlike cr, its stack frame save is still
24036 valid. So we only need to emit the cfa_restore on the correct
24037 instruction. */
24039 static void
24040 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24042 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24043 rtx reg = gen_rtx_REG (Pmode, regno);
24045 emit_move_insn (reg, mem);
24048 static void
24049 restore_saved_lr (int regno, bool exit_func)
24051 rtx reg = gen_rtx_REG (Pmode, regno);
24052 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24053 rtx insn = emit_move_insn (lr, reg);
24055 if (!exit_func && flag_shrink_wrap)
24057 add_reg_note (insn, REG_CFA_RESTORE, lr);
24058 RTX_FRAME_RELATED_P (insn) = 1;
24062 static rtx
24063 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24065 if (DEFAULT_ABI == ABI_ELFv2)
24067 int i;
24068 for (i = 0; i < 8; i++)
24069 if (save_reg_p (CR0_REGNO + i))
24071 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24072 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24073 cfa_restores);
24076 else if (info->cr_save_p)
24077 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24078 gen_rtx_REG (SImode, CR2_REGNO),
24079 cfa_restores);
24081 if (info->lr_save_p)
24082 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24083 gen_rtx_REG (Pmode, LR_REGNO),
24084 cfa_restores);
24085 return cfa_restores;
24088 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24089 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24090 below stack pointer not cloberred by signals. */
24092 static inline bool
24093 offset_below_red_zone_p (HOST_WIDE_INT offset)
24095 return offset < (DEFAULT_ABI == ABI_V4
24097 : TARGET_32BIT ? -220 : -288);
24100 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24102 static void
24103 emit_cfa_restores (rtx cfa_restores)
24105 rtx insn = get_last_insn ();
24106 rtx *loc = &REG_NOTES (insn);
24108 while (*loc)
24109 loc = &XEXP (*loc, 1);
24110 *loc = cfa_restores;
24111 RTX_FRAME_RELATED_P (insn) = 1;
24114 /* Emit function epilogue as insns. */
24116 void
24117 rs6000_emit_epilogue (int sibcall)
24119 rs6000_stack_t *info;
24120 int restoring_GPRs_inline;
24121 int restoring_FPRs_inline;
24122 int using_load_multiple;
24123 int using_mtcr_multiple;
24124 int use_backchain_to_restore_sp;
24125 int restore_lr;
24126 int strategy;
24127 HOST_WIDE_INT frame_off = 0;
24128 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24129 rtx frame_reg_rtx = sp_reg_rtx;
24130 rtx cfa_restores = NULL_RTX;
24131 rtx insn;
24132 rtx cr_save_reg = NULL_RTX;
24133 enum machine_mode reg_mode = Pmode;
24134 int reg_size = TARGET_32BIT ? 4 : 8;
24135 int i;
24136 bool exit_func;
24137 unsigned ptr_regno;
24139 info = rs6000_stack_info ();
24141 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24143 reg_mode = V2SImode;
24144 reg_size = 8;
24147 strategy = info->savres_strategy;
24148 using_load_multiple = strategy & SAVRES_MULTIPLE;
24149 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24150 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24151 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24152 || rs6000_cpu == PROCESSOR_PPC603
24153 || rs6000_cpu == PROCESSOR_PPC750
24154 || optimize_size);
24155 /* Restore via the backchain when we have a large frame, since this
24156 is more efficient than an addis, addi pair. The second condition
24157 here will not trigger at the moment; We don't actually need a
24158 frame pointer for alloca, but the generic parts of the compiler
24159 give us one anyway. */
24160 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24161 || (cfun->calls_alloca
24162 && !frame_pointer_needed));
24163 restore_lr = (info->lr_save_p
24164 && (restoring_FPRs_inline
24165 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24166 && (restoring_GPRs_inline
24167 || info->first_fp_reg_save < 64));
24169 if (WORLD_SAVE_P (info))
24171 int i, j;
24172 char rname[30];
24173 const char *alloc_rname;
24174 rtvec p;
24176 /* eh_rest_world_r10 will return to the location saved in the LR
24177 stack slot (which is not likely to be our caller.)
24178 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24179 rest_world is similar, except any R10 parameter is ignored.
24180 The exception-handling stuff that was here in 2.95 is no
24181 longer necessary. */
24183 p = rtvec_alloc (9
24185 + 32 - info->first_gp_reg_save
24186 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24187 + 63 + 1 - info->first_fp_reg_save);
24189 strcpy (rname, ((crtl->calls_eh_return) ?
24190 "*eh_rest_world_r10" : "*rest_world"));
24191 alloc_rname = ggc_strdup (rname);
24193 j = 0;
24194 RTVEC_ELT (p, j++) = ret_rtx;
24195 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24196 gen_rtx_REG (Pmode,
24197 LR_REGNO));
24198 RTVEC_ELT (p, j++)
24199 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24200 /* The instruction pattern requires a clobber here;
24201 it is shared with the restVEC helper. */
24202 RTVEC_ELT (p, j++)
24203 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24206 /* CR register traditionally saved as CR2. */
24207 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24208 RTVEC_ELT (p, j++)
24209 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24210 if (flag_shrink_wrap)
24212 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24213 gen_rtx_REG (Pmode, LR_REGNO),
24214 cfa_restores);
24215 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24219 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24221 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24222 RTVEC_ELT (p, j++)
24223 = gen_frame_load (reg,
24224 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24225 if (flag_shrink_wrap)
24226 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24228 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24230 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24231 RTVEC_ELT (p, j++)
24232 = gen_frame_load (reg,
24233 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24234 if (flag_shrink_wrap)
24235 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24237 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24239 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24240 ? DFmode : SFmode),
24241 info->first_fp_reg_save + i);
24242 RTVEC_ELT (p, j++)
24243 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24244 if (flag_shrink_wrap)
24245 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24247 RTVEC_ELT (p, j++)
24248 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24249 RTVEC_ELT (p, j++)
24250 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24251 RTVEC_ELT (p, j++)
24252 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24253 RTVEC_ELT (p, j++)
24254 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24255 RTVEC_ELT (p, j++)
24256 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24257 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24259 if (flag_shrink_wrap)
24261 REG_NOTES (insn) = cfa_restores;
24262 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24263 RTX_FRAME_RELATED_P (insn) = 1;
24265 return;
24268 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24269 if (info->push_p)
24270 frame_off = info->total_size;
24272 /* Restore AltiVec registers if we must do so before adjusting the
24273 stack. */
24274 if (TARGET_ALTIVEC_ABI
24275 && info->altivec_size != 0
24276 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24277 || (DEFAULT_ABI != ABI_V4
24278 && offset_below_red_zone_p (info->altivec_save_offset))))
24280 int i;
24281 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24283 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24284 if (use_backchain_to_restore_sp)
24286 int frame_regno = 11;
24288 if ((strategy & REST_INLINE_VRS) == 0)
24290 /* Of r11 and r12, select the one not clobbered by an
24291 out-of-line restore function for the frame register. */
24292 frame_regno = 11 + 12 - scratch_regno;
24294 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24295 emit_move_insn (frame_reg_rtx,
24296 gen_rtx_MEM (Pmode, sp_reg_rtx));
24297 frame_off = 0;
24299 else if (frame_pointer_needed)
24300 frame_reg_rtx = hard_frame_pointer_rtx;
24302 if ((strategy & REST_INLINE_VRS) == 0)
24304 int end_save = info->altivec_save_offset + info->altivec_size;
24305 int ptr_off;
24306 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24307 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24309 if (end_save + frame_off != 0)
24311 rtx offset = GEN_INT (end_save + frame_off);
24313 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24315 else
24316 emit_move_insn (ptr_reg, frame_reg_rtx);
24318 ptr_off = -end_save;
24319 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24320 info->altivec_save_offset + ptr_off,
24321 0, V4SImode, SAVRES_VR);
24323 else
24325 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24326 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24328 rtx addr, areg, mem, reg;
24330 areg = gen_rtx_REG (Pmode, 0);
24331 emit_move_insn
24332 (areg, GEN_INT (info->altivec_save_offset
24333 + frame_off
24334 + 16 * (i - info->first_altivec_reg_save)));
24336 /* AltiVec addressing mode is [reg+reg]. */
24337 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24338 mem = gen_frame_mem (V4SImode, addr);
24340 reg = gen_rtx_REG (V4SImode, i);
24341 emit_move_insn (reg, mem);
24345 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24346 if (((strategy & REST_INLINE_VRS) == 0
24347 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24348 && (flag_shrink_wrap
24349 || (offset_below_red_zone_p
24350 (info->altivec_save_offset
24351 + 16 * (i - info->first_altivec_reg_save)))))
24353 rtx reg = gen_rtx_REG (V4SImode, i);
24354 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24358 /* Restore VRSAVE if we must do so before adjusting the stack. */
24359 if (TARGET_ALTIVEC
24360 && TARGET_ALTIVEC_VRSAVE
24361 && info->vrsave_mask != 0
24362 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24363 || (DEFAULT_ABI != ABI_V4
24364 && offset_below_red_zone_p (info->vrsave_save_offset))))
24366 rtx reg;
24368 if (frame_reg_rtx == sp_reg_rtx)
24370 if (use_backchain_to_restore_sp)
24372 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24373 emit_move_insn (frame_reg_rtx,
24374 gen_rtx_MEM (Pmode, sp_reg_rtx));
24375 frame_off = 0;
24377 else if (frame_pointer_needed)
24378 frame_reg_rtx = hard_frame_pointer_rtx;
24381 reg = gen_rtx_REG (SImode, 12);
24382 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24383 info->vrsave_save_offset + frame_off));
24385 emit_insn (generate_set_vrsave (reg, info, 1));
24388 insn = NULL_RTX;
24389 /* If we have a large stack frame, restore the old stack pointer
24390 using the backchain. */
24391 if (use_backchain_to_restore_sp)
24393 if (frame_reg_rtx == sp_reg_rtx)
24395 /* Under V.4, don't reset the stack pointer until after we're done
24396 loading the saved registers. */
24397 if (DEFAULT_ABI == ABI_V4)
24398 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24400 insn = emit_move_insn (frame_reg_rtx,
24401 gen_rtx_MEM (Pmode, sp_reg_rtx));
24402 frame_off = 0;
24404 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24405 && DEFAULT_ABI == ABI_V4)
24406 /* frame_reg_rtx has been set up by the altivec restore. */
24408 else
24410 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24411 frame_reg_rtx = sp_reg_rtx;
24414 /* If we have a frame pointer, we can restore the old stack pointer
24415 from it. */
24416 else if (frame_pointer_needed)
24418 frame_reg_rtx = sp_reg_rtx;
24419 if (DEFAULT_ABI == ABI_V4)
24420 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24421 /* Prevent reordering memory accesses against stack pointer restore. */
24422 else if (cfun->calls_alloca
24423 || offset_below_red_zone_p (-info->total_size))
24424 rs6000_emit_stack_tie (frame_reg_rtx, true);
24426 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24427 GEN_INT (info->total_size)));
24428 frame_off = 0;
24430 else if (info->push_p
24431 && DEFAULT_ABI != ABI_V4
24432 && !crtl->calls_eh_return)
24434 /* Prevent reordering memory accesses against stack pointer restore. */
24435 if (cfun->calls_alloca
24436 || offset_below_red_zone_p (-info->total_size))
24437 rs6000_emit_stack_tie (frame_reg_rtx, false);
24438 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24439 GEN_INT (info->total_size)));
24440 frame_off = 0;
24442 if (insn && frame_reg_rtx == sp_reg_rtx)
24444 if (cfa_restores)
24446 REG_NOTES (insn) = cfa_restores;
24447 cfa_restores = NULL_RTX;
24449 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24450 RTX_FRAME_RELATED_P (insn) = 1;
24453 /* Restore AltiVec registers if we have not done so already. */
24454 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24455 && TARGET_ALTIVEC_ABI
24456 && info->altivec_size != 0
24457 && (DEFAULT_ABI == ABI_V4
24458 || !offset_below_red_zone_p (info->altivec_save_offset)))
24460 int i;
24462 if ((strategy & REST_INLINE_VRS) == 0)
24464 int end_save = info->altivec_save_offset + info->altivec_size;
24465 int ptr_off;
24466 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24467 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24468 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24470 if (end_save + frame_off != 0)
24472 rtx offset = GEN_INT (end_save + frame_off);
24474 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24476 else
24477 emit_move_insn (ptr_reg, frame_reg_rtx);
24479 ptr_off = -end_save;
24480 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24481 info->altivec_save_offset + ptr_off,
24482 0, V4SImode, SAVRES_VR);
24483 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24485 /* Frame reg was clobbered by out-of-line save. Restore it
24486 from ptr_reg, and if we are calling out-of-line gpr or
24487 fpr restore set up the correct pointer and offset. */
24488 unsigned newptr_regno = 1;
24489 if (!restoring_GPRs_inline)
24491 bool lr = info->gp_save_offset + info->gp_size == 0;
24492 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24493 newptr_regno = ptr_regno_for_savres (sel);
24494 end_save = info->gp_save_offset + info->gp_size;
24496 else if (!restoring_FPRs_inline)
24498 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24499 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24500 newptr_regno = ptr_regno_for_savres (sel);
24501 end_save = info->gp_save_offset + info->gp_size;
24504 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24505 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24507 if (end_save + ptr_off != 0)
24509 rtx offset = GEN_INT (end_save + ptr_off);
24511 frame_off = -end_save;
24512 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24514 else
24516 frame_off = ptr_off;
24517 emit_move_insn (frame_reg_rtx, ptr_reg);
24521 else
24523 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24524 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24526 rtx addr, areg, mem, reg;
24528 areg = gen_rtx_REG (Pmode, 0);
24529 emit_move_insn
24530 (areg, GEN_INT (info->altivec_save_offset
24531 + frame_off
24532 + 16 * (i - info->first_altivec_reg_save)));
24534 /* AltiVec addressing mode is [reg+reg]. */
24535 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24536 mem = gen_frame_mem (V4SImode, addr);
24538 reg = gen_rtx_REG (V4SImode, i);
24539 emit_move_insn (reg, mem);
24543 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24544 if (((strategy & REST_INLINE_VRS) == 0
24545 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24546 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24548 rtx reg = gen_rtx_REG (V4SImode, i);
24549 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24553 /* Restore VRSAVE if we have not done so already. */
24554 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24555 && TARGET_ALTIVEC
24556 && TARGET_ALTIVEC_VRSAVE
24557 && info->vrsave_mask != 0
24558 && (DEFAULT_ABI == ABI_V4
24559 || !offset_below_red_zone_p (info->vrsave_save_offset)))
24561 rtx reg;
24563 reg = gen_rtx_REG (SImode, 12);
24564 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24565 info->vrsave_save_offset + frame_off));
24567 emit_insn (generate_set_vrsave (reg, info, 1));
24570 /* If we exit by an out-of-line restore function on ABI_V4 then that
24571 function will deallocate the stack, so we don't need to worry
24572 about the unwinder restoring cr from an invalid stack frame
24573 location. */
24574 exit_func = (!restoring_FPRs_inline
24575 || (!restoring_GPRs_inline
24576 && info->first_fp_reg_save == 64));
24578 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
24579 *separate* slots if the routine calls __builtin_eh_return, so
24580 that they can be independently restored by the unwinder. */
24581 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24583 int i, cr_off = info->ehcr_offset;
24585 for (i = 0; i < 8; i++)
24586 if (!call_used_regs[CR0_REGNO + i])
24588 rtx reg = gen_rtx_REG (SImode, 0);
24589 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24590 cr_off + frame_off));
24592 insn = emit_insn (gen_movsi_to_cr_one
24593 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24595 if (!exit_func && flag_shrink_wrap)
24597 add_reg_note (insn, REG_CFA_RESTORE,
24598 gen_rtx_REG (SImode, CR0_REGNO + i));
24600 RTX_FRAME_RELATED_P (insn) = 1;
24603 cr_off += reg_size;
24607 /* Get the old lr if we saved it. If we are restoring registers
24608 out-of-line, then the out-of-line routines can do this for us. */
24609 if (restore_lr && restoring_GPRs_inline)
24610 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
24612 /* Get the old cr if we saved it. */
24613 if (info->cr_save_p)
24615 unsigned cr_save_regno = 12;
24617 if (!restoring_GPRs_inline)
24619 /* Ensure we don't use the register used by the out-of-line
24620 gpr register restore below. */
24621 bool lr = info->gp_save_offset + info->gp_size == 0;
24622 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24623 int gpr_ptr_regno = ptr_regno_for_savres (sel);
24625 if (gpr_ptr_regno == 12)
24626 cr_save_regno = 11;
24627 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
24629 else if (REGNO (frame_reg_rtx) == 12)
24630 cr_save_regno = 11;
24632 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
24633 info->cr_save_offset + frame_off,
24634 exit_func);
24637 /* Set LR here to try to overlap restores below. */
24638 if (restore_lr && restoring_GPRs_inline)
24639 restore_saved_lr (0, exit_func);
24641 /* Load exception handler data registers, if needed. */
24642 if (crtl->calls_eh_return)
24644 unsigned int i, regno;
24646 if (TARGET_AIX)
24648 rtx reg = gen_rtx_REG (reg_mode, 2);
24649 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24650 frame_off + RS6000_TOC_SAVE_SLOT));
24653 for (i = 0; ; ++i)
24655 rtx mem;
24657 regno = EH_RETURN_DATA_REGNO (i);
24658 if (regno == INVALID_REGNUM)
24659 break;
24661 /* Note: possible use of r0 here to address SPE regs. */
24662 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
24663 info->ehrd_offset + frame_off
24664 + reg_size * (int) i);
24666 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
24670 /* Restore GPRs. This is done as a PARALLEL if we are using
24671 the load-multiple instructions. */
24672 if (TARGET_SPE_ABI
24673 && info->spe_64bit_regs_used
24674 && info->first_gp_reg_save != 32)
24676 /* Determine whether we can address all of the registers that need
24677 to be saved with an offset from frame_reg_rtx that fits in
24678 the small const field for SPE memory instructions. */
24679 int spe_regs_addressable
24680 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
24681 + reg_size * (32 - info->first_gp_reg_save - 1))
24682 && restoring_GPRs_inline);
24684 if (!spe_regs_addressable)
24686 int ool_adjust = 0;
24687 rtx old_frame_reg_rtx = frame_reg_rtx;
24688 /* Make r11 point to the start of the SPE save area. We worried about
24689 not clobbering it when we were saving registers in the prologue.
24690 There's no need to worry here because the static chain is passed
24691 anew to every function. */
24693 if (!restoring_GPRs_inline)
24694 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
24695 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24696 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
24697 GEN_INT (info->spe_gp_save_offset
24698 + frame_off
24699 - ool_adjust)));
24700 /* Keep the invariant that frame_reg_rtx + frame_off points
24701 at the top of the stack frame. */
24702 frame_off = -info->spe_gp_save_offset + ool_adjust;
24705 if (restoring_GPRs_inline)
24707 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
24709 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24710 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
24712 rtx offset, addr, mem, reg;
24714 /* We're doing all this to ensure that the immediate offset
24715 fits into the immediate field of 'evldd'. */
24716 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
24718 offset = GEN_INT (spe_offset + reg_size * i);
24719 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
24720 mem = gen_rtx_MEM (V2SImode, addr);
24721 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24723 emit_move_insn (reg, mem);
24726 else
24727 rs6000_emit_savres_rtx (info, frame_reg_rtx,
24728 info->spe_gp_save_offset + frame_off,
24729 info->lr_save_offset + frame_off,
24730 reg_mode,
24731 SAVRES_GPR | SAVRES_LR);
24733 else if (!restoring_GPRs_inline)
24735 /* We are jumping to an out-of-line function. */
24736 rtx ptr_reg;
24737 int end_save = info->gp_save_offset + info->gp_size;
24738 bool can_use_exit = end_save == 0;
24739 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
24740 int ptr_off;
24742 /* Emit stack reset code if we need it. */
24743 ptr_regno = ptr_regno_for_savres (sel);
24744 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24745 if (can_use_exit)
24746 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
24747 else if (end_save + frame_off != 0)
24748 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
24749 GEN_INT (end_save + frame_off)));
24750 else if (REGNO (frame_reg_rtx) != ptr_regno)
24751 emit_move_insn (ptr_reg, frame_reg_rtx);
24752 if (REGNO (frame_reg_rtx) == ptr_regno)
24753 frame_off = -end_save;
24755 if (can_use_exit && info->cr_save_p)
24756 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
24758 ptr_off = -end_save;
24759 rs6000_emit_savres_rtx (info, ptr_reg,
24760 info->gp_save_offset + ptr_off,
24761 info->lr_save_offset + ptr_off,
24762 reg_mode, sel);
24764 else if (using_load_multiple)
24766 rtvec p;
24767 p = rtvec_alloc (32 - info->first_gp_reg_save);
24768 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24769 RTVEC_ELT (p, i)
24770 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
24771 frame_reg_rtx,
24772 info->gp_save_offset + frame_off + reg_size * i);
24773 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24775 else
24777 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24778 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
24779 emit_insn (gen_frame_load
24780 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
24781 frame_reg_rtx,
24782 info->gp_save_offset + frame_off + reg_size * i));
24785 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
24787 /* If the frame pointer was used then we can't delay emitting
24788 a REG_CFA_DEF_CFA note. This must happen on the insn that
24789 restores the frame pointer, r31. We may have already emitted
24790 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
24791 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
24792 be harmless if emitted. */
24793 if (frame_pointer_needed)
24795 insn = get_last_insn ();
24796 add_reg_note (insn, REG_CFA_DEF_CFA,
24797 plus_constant (Pmode, frame_reg_rtx, frame_off));
24798 RTX_FRAME_RELATED_P (insn) = 1;
24801 /* Set up cfa_restores. We always need these when
24802 shrink-wrapping. If not shrink-wrapping then we only need
24803 the cfa_restore when the stack location is no longer valid.
24804 The cfa_restores must be emitted on or before the insn that
24805 invalidates the stack, and of course must not be emitted
24806 before the insn that actually does the restore. The latter
24807 is why it is a bad idea to emit the cfa_restores as a group
24808 on the last instruction here that actually does a restore:
24809 That insn may be reordered with respect to others doing
24810 restores. */
24811 if (flag_shrink_wrap
24812 && !restoring_GPRs_inline
24813 && info->first_fp_reg_save == 64)
24814 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
24816 for (i = info->first_gp_reg_save; i < 32; i++)
24817 if (!restoring_GPRs_inline
24818 || using_load_multiple
24819 || rs6000_reg_live_or_pic_offset_p (i))
24821 rtx reg = gen_rtx_REG (reg_mode, i);
24823 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24827 if (!restoring_GPRs_inline
24828 && info->first_fp_reg_save == 64)
24830 /* We are jumping to an out-of-line function. */
24831 if (cfa_restores)
24832 emit_cfa_restores (cfa_restores);
24833 return;
24836 if (restore_lr && !restoring_GPRs_inline)
24838 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
24839 restore_saved_lr (0, exit_func);
24842 /* Restore fpr's if we need to do it without calling a function. */
24843 if (restoring_FPRs_inline)
24844 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
24845 if (save_reg_p (info->first_fp_reg_save + i))
24847 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24848 ? DFmode : SFmode),
24849 info->first_fp_reg_save + i);
24850 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24851 info->fp_save_offset + frame_off + 8 * i));
24852 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
24853 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24856 /* If we saved cr, restore it here. Just those that were used. */
24857 if (info->cr_save_p)
24858 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
24860 /* If this is V.4, unwind the stack pointer after all of the loads
24861 have been done, or set up r11 if we are restoring fp out of line. */
24862 ptr_regno = 1;
24863 if (!restoring_FPRs_inline)
24865 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24866 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24867 ptr_regno = ptr_regno_for_savres (sel);
24870 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
24871 if (REGNO (frame_reg_rtx) == ptr_regno)
24872 frame_off = 0;
24874 if (insn && restoring_FPRs_inline)
24876 if (cfa_restores)
24878 REG_NOTES (insn) = cfa_restores;
24879 cfa_restores = NULL_RTX;
24881 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24882 RTX_FRAME_RELATED_P (insn) = 1;
24885 if (crtl->calls_eh_return)
24887 rtx sa = EH_RETURN_STACKADJ_RTX;
24888 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
24891 if (!sibcall)
24893 rtvec p;
24894 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24895 if (! restoring_FPRs_inline)
24897 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
24898 RTVEC_ELT (p, 0) = ret_rtx;
24900 else
24902 if (cfa_restores)
24904 /* We can't hang the cfa_restores off a simple return,
24905 since the shrink-wrap code sometimes uses an existing
24906 return. This means there might be a path from
24907 pre-prologue code to this return, and dwarf2cfi code
24908 wants the eh_frame unwinder state to be the same on
24909 all paths to any point. So we need to emit the
24910 cfa_restores before the return. For -m64 we really
24911 don't need epilogue cfa_restores at all, except for
24912 this irritating dwarf2cfi with shrink-wrap
24913 requirement; The stack red-zone means eh_frame info
24914 from the prologue telling the unwinder to restore
24915 from the stack is perfectly good right to the end of
24916 the function. */
24917 emit_insn (gen_blockage ());
24918 emit_cfa_restores (cfa_restores);
24919 cfa_restores = NULL_RTX;
24921 p = rtvec_alloc (2);
24922 RTVEC_ELT (p, 0) = simple_return_rtx;
24925 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
24926 ? gen_rtx_USE (VOIDmode,
24927 gen_rtx_REG (Pmode, LR_REGNO))
24928 : gen_rtx_CLOBBER (VOIDmode,
24929 gen_rtx_REG (Pmode, LR_REGNO)));
24931 /* If we have to restore more than two FP registers, branch to the
24932 restore function. It will return to our caller. */
24933 if (! restoring_FPRs_inline)
24935 int i;
24936 int reg;
24937 rtx sym;
24939 if (flag_shrink_wrap)
24940 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
24942 sym = rs6000_savres_routine_sym (info,
24943 SAVRES_FPR | (lr ? SAVRES_LR : 0));
24944 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
24945 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
24946 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
24948 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
24950 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
24952 RTVEC_ELT (p, i + 4)
24953 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
24954 if (flag_shrink_wrap)
24955 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
24956 cfa_restores);
24960 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24963 if (cfa_restores)
24965 if (sibcall)
24966 /* Ensure the cfa_restores are hung off an insn that won't
24967 be reordered above other restores. */
24968 emit_insn (gen_blockage ());
24970 emit_cfa_restores (cfa_restores);
24974 /* Write function epilogue. */
24976 static void
24977 rs6000_output_function_epilogue (FILE *file,
24978 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24980 #if TARGET_MACHO
24981 macho_branch_islands ();
24982 /* Mach-O doesn't support labels at the end of objects, so if
24983 it looks like we might want one, insert a NOP. */
24985 rtx insn = get_last_insn ();
24986 rtx deleted_debug_label = NULL_RTX;
24987 while (insn
24988 && NOTE_P (insn)
24989 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
24991 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
24992 notes only, instead set their CODE_LABEL_NUMBER to -1,
24993 otherwise there would be code generation differences
24994 in between -g and -g0. */
24995 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
24996 deleted_debug_label = insn;
24997 insn = PREV_INSN (insn);
24999 if (insn
25000 && (LABEL_P (insn)
25001 || (NOTE_P (insn)
25002 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25003 fputs ("\tnop\n", file);
25004 else if (deleted_debug_label)
25005 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25006 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25007 CODE_LABEL_NUMBER (insn) = -1;
25009 #endif
25011 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25012 on its format.
25014 We don't output a traceback table if -finhibit-size-directive was
25015 used. The documentation for -finhibit-size-directive reads
25016 ``don't output a @code{.size} assembler directive, or anything
25017 else that would cause trouble if the function is split in the
25018 middle, and the two halves are placed at locations far apart in
25019 memory.'' The traceback table has this property, since it
25020 includes the offset from the start of the function to the
25021 traceback table itself.
25023 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25024 different traceback table. */
25025 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25026 && ! flag_inhibit_size_directive
25027 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25029 const char *fname = NULL;
25030 const char *language_string = lang_hooks.name;
25031 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25032 int i;
25033 int optional_tbtab;
25034 rs6000_stack_t *info = rs6000_stack_info ();
25036 if (rs6000_traceback == traceback_full)
25037 optional_tbtab = 1;
25038 else if (rs6000_traceback == traceback_part)
25039 optional_tbtab = 0;
25040 else
25041 optional_tbtab = !optimize_size && !TARGET_ELF;
25043 if (optional_tbtab)
25045 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25046 while (*fname == '.') /* V.4 encodes . in the name */
25047 fname++;
25049 /* Need label immediately before tbtab, so we can compute
25050 its offset from the function start. */
25051 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25052 ASM_OUTPUT_LABEL (file, fname);
25055 /* The .tbtab pseudo-op can only be used for the first eight
25056 expressions, since it can't handle the possibly variable
25057 length fields that follow. However, if you omit the optional
25058 fields, the assembler outputs zeros for all optional fields
25059 anyways, giving each variable length field is minimum length
25060 (as defined in sys/debug.h). Thus we can not use the .tbtab
25061 pseudo-op at all. */
25063 /* An all-zero word flags the start of the tbtab, for debuggers
25064 that have to find it by searching forward from the entry
25065 point or from the current pc. */
25066 fputs ("\t.long 0\n", file);
25068 /* Tbtab format type. Use format type 0. */
25069 fputs ("\t.byte 0,", file);
25071 /* Language type. Unfortunately, there does not seem to be any
25072 official way to discover the language being compiled, so we
25073 use language_string.
25074 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25075 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25076 a number, so for now use 9. LTO and Go aren't assigned numbers
25077 either, so for now use 0. */
25078 if (! strcmp (language_string, "GNU C")
25079 || ! strcmp (language_string, "GNU GIMPLE")
25080 || ! strcmp (language_string, "GNU Go"))
25081 i = 0;
25082 else if (! strcmp (language_string, "GNU F77")
25083 || ! strcmp (language_string, "GNU Fortran"))
25084 i = 1;
25085 else if (! strcmp (language_string, "GNU Pascal"))
25086 i = 2;
25087 else if (! strcmp (language_string, "GNU Ada"))
25088 i = 3;
25089 else if (! strcmp (language_string, "GNU C++")
25090 || ! strcmp (language_string, "GNU Objective-C++"))
25091 i = 9;
25092 else if (! strcmp (language_string, "GNU Java"))
25093 i = 13;
25094 else if (! strcmp (language_string, "GNU Objective-C"))
25095 i = 14;
25096 else
25097 gcc_unreachable ();
25098 fprintf (file, "%d,", i);
25100 /* 8 single bit fields: global linkage (not set for C extern linkage,
25101 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25102 from start of procedure stored in tbtab, internal function, function
25103 has controlled storage, function has no toc, function uses fp,
25104 function logs/aborts fp operations. */
25105 /* Assume that fp operations are used if any fp reg must be saved. */
25106 fprintf (file, "%d,",
25107 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25109 /* 6 bitfields: function is interrupt handler, name present in
25110 proc table, function calls alloca, on condition directives
25111 (controls stack walks, 3 bits), saves condition reg, saves
25112 link reg. */
25113 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25114 set up as a frame pointer, even when there is no alloca call. */
25115 fprintf (file, "%d,",
25116 ((optional_tbtab << 6)
25117 | ((optional_tbtab & frame_pointer_needed) << 5)
25118 | (info->cr_save_p << 1)
25119 | (info->lr_save_p)));
25121 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25122 (6 bits). */
25123 fprintf (file, "%d,",
25124 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25126 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25127 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25129 if (optional_tbtab)
25131 /* Compute the parameter info from the function decl argument
25132 list. */
25133 tree decl;
25134 int next_parm_info_bit = 31;
25136 for (decl = DECL_ARGUMENTS (current_function_decl);
25137 decl; decl = DECL_CHAIN (decl))
25139 rtx parameter = DECL_INCOMING_RTL (decl);
25140 enum machine_mode mode = GET_MODE (parameter);
25142 if (GET_CODE (parameter) == REG)
25144 if (SCALAR_FLOAT_MODE_P (mode))
25146 int bits;
25148 float_parms++;
25150 switch (mode)
25152 case SFmode:
25153 case SDmode:
25154 bits = 0x2;
25155 break;
25157 case DFmode:
25158 case DDmode:
25159 case TFmode:
25160 case TDmode:
25161 bits = 0x3;
25162 break;
25164 default:
25165 gcc_unreachable ();
25168 /* If only one bit will fit, don't or in this entry. */
25169 if (next_parm_info_bit > 0)
25170 parm_info |= (bits << (next_parm_info_bit - 1));
25171 next_parm_info_bit -= 2;
25173 else
25175 fixed_parms += ((GET_MODE_SIZE (mode)
25176 + (UNITS_PER_WORD - 1))
25177 / UNITS_PER_WORD);
25178 next_parm_info_bit -= 1;
25184 /* Number of fixed point parameters. */
25185 /* This is actually the number of words of fixed point parameters; thus
25186 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25187 fprintf (file, "%d,", fixed_parms);
25189 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25190 all on stack. */
25191 /* This is actually the number of fp registers that hold parameters;
25192 and thus the maximum value is 13. */
25193 /* Set parameters on stack bit if parameters are not in their original
25194 registers, regardless of whether they are on the stack? Xlc
25195 seems to set the bit when not optimizing. */
25196 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25198 if (! optional_tbtab)
25199 return;
25201 /* Optional fields follow. Some are variable length. */
25203 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25204 11 double float. */
25205 /* There is an entry for each parameter in a register, in the order that
25206 they occur in the parameter list. Any intervening arguments on the
25207 stack are ignored. If the list overflows a long (max possible length
25208 34 bits) then completely leave off all elements that don't fit. */
25209 /* Only emit this long if there was at least one parameter. */
25210 if (fixed_parms || float_parms)
25211 fprintf (file, "\t.long %d\n", parm_info);
25213 /* Offset from start of code to tb table. */
25214 fputs ("\t.long ", file);
25215 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25216 RS6000_OUTPUT_BASENAME (file, fname);
25217 putc ('-', file);
25218 rs6000_output_function_entry (file, fname);
25219 putc ('\n', file);
25221 /* Interrupt handler mask. */
25222 /* Omit this long, since we never set the interrupt handler bit
25223 above. */
25225 /* Number of CTL (controlled storage) anchors. */
25226 /* Omit this long, since the has_ctl bit is never set above. */
25228 /* Displacement into stack of each CTL anchor. */
25229 /* Omit this list of longs, because there are no CTL anchors. */
25231 /* Length of function name. */
25232 if (*fname == '*')
25233 ++fname;
25234 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25236 /* Function name. */
25237 assemble_string (fname, strlen (fname));
25239 /* Register for alloca automatic storage; this is always reg 31.
25240 Only emit this if the alloca bit was set above. */
25241 if (frame_pointer_needed)
25242 fputs ("\t.byte 31\n", file);
25244 fputs ("\t.align 2\n", file);
25248 /* A C compound statement that outputs the assembler code for a thunk
25249 function, used to implement C++ virtual function calls with
25250 multiple inheritance. The thunk acts as a wrapper around a virtual
25251 function, adjusting the implicit object parameter before handing
25252 control off to the real function.
25254 First, emit code to add the integer DELTA to the location that
25255 contains the incoming first argument. Assume that this argument
25256 contains a pointer, and is the one used to pass the `this' pointer
25257 in C++. This is the incoming argument *before* the function
25258 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25259 values of all other incoming arguments.
25261 After the addition, emit code to jump to FUNCTION, which is a
25262 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25263 not touch the return address. Hence returning from FUNCTION will
25264 return to whoever called the current `thunk'.
25266 The effect must be as if FUNCTION had been called directly with the
25267 adjusted first argument. This macro is responsible for emitting
25268 all of the code for a thunk function; output_function_prologue()
25269 and output_function_epilogue() are not invoked.
25271 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25272 been extracted from it.) It might possibly be useful on some
25273 targets, but probably not.
25275 If you do not define this macro, the target-independent code in the
25276 C++ frontend will generate a less efficient heavyweight thunk that
25277 calls FUNCTION instead of jumping to it. The generic approach does
25278 not support varargs. */
25280 static void
25281 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25282 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25283 tree function)
25285 rtx this_rtx, insn, funexp;
25287 reload_completed = 1;
25288 epilogue_completed = 1;
25290 /* Mark the end of the (empty) prologue. */
25291 emit_note (NOTE_INSN_PROLOGUE_END);
25293 /* Find the "this" pointer. If the function returns a structure,
25294 the structure return pointer is in r3. */
25295 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25296 this_rtx = gen_rtx_REG (Pmode, 4);
25297 else
25298 this_rtx = gen_rtx_REG (Pmode, 3);
25300 /* Apply the constant offset, if required. */
25301 if (delta)
25302 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25304 /* Apply the offset from the vtable, if required. */
25305 if (vcall_offset)
25307 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25308 rtx tmp = gen_rtx_REG (Pmode, 12);
25310 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25311 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25313 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25314 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25316 else
25318 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25320 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25322 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25325 /* Generate a tail call to the target function. */
25326 if (!TREE_USED (function))
25328 assemble_external (function);
25329 TREE_USED (function) = 1;
25331 funexp = XEXP (DECL_RTL (function), 0);
25332 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25334 #if TARGET_MACHO
25335 if (MACHOPIC_INDIRECT)
25336 funexp = machopic_indirect_call_target (funexp);
25337 #endif
25339 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25340 generate sibcall RTL explicitly. */
25341 insn = emit_call_insn (
25342 gen_rtx_PARALLEL (VOIDmode,
25343 gen_rtvec (4,
25344 gen_rtx_CALL (VOIDmode,
25345 funexp, const0_rtx),
25346 gen_rtx_USE (VOIDmode, const0_rtx),
25347 gen_rtx_USE (VOIDmode,
25348 gen_rtx_REG (SImode,
25349 LR_REGNO)),
25350 simple_return_rtx)));
25351 SIBLING_CALL_P (insn) = 1;
25352 emit_barrier ();
25354 /* Ensure we have a global entry point for the thunk. ??? We could
25355 avoid that if the target routine doesn't need a global entry point,
25356 but we do not know whether this is the case at this point. */
25357 if (DEFAULT_ABI == ABI_ELFv2)
25358 cfun->machine->r2_setup_needed = true;
25360 /* Run just enough of rest_of_compilation to get the insns emitted.
25361 There's not really enough bulk here to make other passes such as
25362 instruction scheduling worth while. Note that use_thunk calls
25363 assemble_start_function and assemble_end_function. */
25364 insn = get_insns ();
25365 shorten_branches (insn);
25366 final_start_function (insn, file, 1);
25367 final (insn, file, 1);
25368 final_end_function ();
25370 reload_completed = 0;
25371 epilogue_completed = 0;
25374 /* A quick summary of the various types of 'constant-pool tables'
25375 under PowerPC:
25377 Target Flags Name One table per
25378 AIX (none) AIX TOC object file
25379 AIX -mfull-toc AIX TOC object file
25380 AIX -mminimal-toc AIX minimal TOC translation unit
25381 SVR4/EABI (none) SVR4 SDATA object file
25382 SVR4/EABI -fpic SVR4 pic object file
25383 SVR4/EABI -fPIC SVR4 PIC translation unit
25384 SVR4/EABI -mrelocatable EABI TOC function
25385 SVR4/EABI -maix AIX TOC object file
25386 SVR4/EABI -maix -mminimal-toc
25387 AIX minimal TOC translation unit
25389 Name Reg. Set by entries contains:
25390 made by addrs? fp? sum?
25392 AIX TOC 2 crt0 as Y option option
25393 AIX minimal TOC 30 prolog gcc Y Y option
25394 SVR4 SDATA 13 crt0 gcc N Y N
25395 SVR4 pic 30 prolog ld Y not yet N
25396 SVR4 PIC 30 prolog gcc Y option option
25397 EABI TOC 30 prolog gcc Y option option
25401 /* Hash functions for the hash table. */
25403 static unsigned
25404 rs6000_hash_constant (rtx k)
25406 enum rtx_code code = GET_CODE (k);
25407 enum machine_mode mode = GET_MODE (k);
25408 unsigned result = (code << 3) ^ mode;
25409 const char *format;
25410 int flen, fidx;
25412 format = GET_RTX_FORMAT (code);
25413 flen = strlen (format);
25414 fidx = 0;
25416 switch (code)
25418 case LABEL_REF:
25419 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25421 case CONST_DOUBLE:
25422 if (mode != VOIDmode)
25423 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25424 flen = 2;
25425 break;
25427 case CODE_LABEL:
25428 fidx = 3;
25429 break;
25431 default:
25432 break;
25435 for (; fidx < flen; fidx++)
25436 switch (format[fidx])
25438 case 's':
25440 unsigned i, len;
25441 const char *str = XSTR (k, fidx);
25442 len = strlen (str);
25443 result = result * 613 + len;
25444 for (i = 0; i < len; i++)
25445 result = result * 613 + (unsigned) str[i];
25446 break;
25448 case 'u':
25449 case 'e':
25450 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25451 break;
25452 case 'i':
25453 case 'n':
25454 result = result * 613 + (unsigned) XINT (k, fidx);
25455 break;
25456 case 'w':
25457 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25458 result = result * 613 + (unsigned) XWINT (k, fidx);
25459 else
25461 size_t i;
25462 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25463 result = result * 613 + (unsigned) (XWINT (k, fidx)
25464 >> CHAR_BIT * i);
25466 break;
25467 case '0':
25468 break;
25469 default:
25470 gcc_unreachable ();
25473 return result;
25476 static unsigned
25477 toc_hash_function (const void *hash_entry)
25479 const struct toc_hash_struct *thc =
25480 (const struct toc_hash_struct *) hash_entry;
25481 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25484 /* Compare H1 and H2 for equivalence. */
25486 static int
25487 toc_hash_eq (const void *h1, const void *h2)
25489 rtx r1 = ((const struct toc_hash_struct *) h1)->key;
25490 rtx r2 = ((const struct toc_hash_struct *) h2)->key;
25492 if (((const struct toc_hash_struct *) h1)->key_mode
25493 != ((const struct toc_hash_struct *) h2)->key_mode)
25494 return 0;
25496 return rtx_equal_p (r1, r2);
25499 /* These are the names given by the C++ front-end to vtables, and
25500 vtable-like objects. Ideally, this logic should not be here;
25501 instead, there should be some programmatic way of inquiring as
25502 to whether or not an object is a vtable. */
25504 #define VTABLE_NAME_P(NAME) \
25505 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25506 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25507 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25508 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25509 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25511 #ifdef NO_DOLLAR_IN_LABEL
25512 /* Return a GGC-allocated character string translating dollar signs in
25513 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25515 const char *
25516 rs6000_xcoff_strip_dollar (const char *name)
25518 char *strip, *p;
25519 const char *q;
25520 size_t len;
25522 q = (const char *) strchr (name, '$');
25524 if (q == 0 || q == name)
25525 return name;
25527 len = strlen (name);
25528 strip = XALLOCAVEC (char, len + 1);
25529 strcpy (strip, name);
25530 p = strip + (q - name);
25531 while (p)
25533 *p = '_';
25534 p = strchr (p + 1, '$');
25537 return ggc_alloc_string (strip, len);
25539 #endif
25541 void
25542 rs6000_output_symbol_ref (FILE *file, rtx x)
25544 /* Currently C++ toc references to vtables can be emitted before it
25545 is decided whether the vtable is public or private. If this is
25546 the case, then the linker will eventually complain that there is
25547 a reference to an unknown section. Thus, for vtables only,
25548 we emit the TOC reference to reference the symbol and not the
25549 section. */
25550 const char *name = XSTR (x, 0);
25552 if (VTABLE_NAME_P (name))
25554 RS6000_OUTPUT_BASENAME (file, name);
25556 else
25557 assemble_name (file, name);
25560 /* Output a TOC entry. We derive the entry name from what is being
25561 written. */
25563 void
25564 output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
25566 char buf[256];
25567 const char *name = buf;
25568 rtx base = x;
25569 HOST_WIDE_INT offset = 0;
25571 gcc_assert (!TARGET_NO_TOC);
25573 /* When the linker won't eliminate them, don't output duplicate
25574 TOC entries (this happens on AIX if there is any kind of TOC,
25575 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
25576 CODE_LABELs. */
25577 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
25579 struct toc_hash_struct *h;
25580 void * * found;
25582 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
25583 time because GGC is not initialized at that point. */
25584 if (toc_hash_table == NULL)
25585 toc_hash_table = htab_create_ggc (1021, toc_hash_function,
25586 toc_hash_eq, NULL);
25588 h = ggc_alloc_toc_hash_struct ();
25589 h->key = x;
25590 h->key_mode = mode;
25591 h->labelno = labelno;
25593 found = htab_find_slot (toc_hash_table, h, INSERT);
25594 if (*found == NULL)
25595 *found = h;
25596 else /* This is indeed a duplicate.
25597 Set this label equal to that label. */
25599 fputs ("\t.set ", file);
25600 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25601 fprintf (file, "%d,", labelno);
25602 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25603 fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
25604 found)->labelno));
25606 #ifdef HAVE_AS_TLS
25607 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
25608 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
25609 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
25611 fputs ("\t.set ", file);
25612 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25613 fprintf (file, "%d,", labelno);
25614 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25615 fprintf (file, "%d\n", ((*(const struct toc_hash_struct **)
25616 found)->labelno));
25618 #endif
25619 return;
25623 /* If we're going to put a double constant in the TOC, make sure it's
25624 aligned properly when strict alignment is on. */
25625 if (GET_CODE (x) == CONST_DOUBLE
25626 && STRICT_ALIGNMENT
25627 && GET_MODE_BITSIZE (mode) >= 64
25628 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
25629 ASM_OUTPUT_ALIGN (file, 3);
25632 (*targetm.asm_out.internal_label) (file, "LC", labelno);
25634 /* Handle FP constants specially. Note that if we have a minimal
25635 TOC, things we put here aren't actually in the TOC, so we can allow
25636 FP constants. */
25637 if (GET_CODE (x) == CONST_DOUBLE &&
25638 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
25640 REAL_VALUE_TYPE rv;
25641 long k[4];
25643 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25644 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25645 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
25646 else
25647 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
25649 if (TARGET_64BIT)
25651 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25652 fputs (DOUBLE_INT_ASM_OP, file);
25653 else
25654 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
25655 k[0] & 0xffffffff, k[1] & 0xffffffff,
25656 k[2] & 0xffffffff, k[3] & 0xffffffff);
25657 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
25658 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
25659 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
25660 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
25661 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
25662 return;
25664 else
25666 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25667 fputs ("\t.long ", file);
25668 else
25669 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
25670 k[0] & 0xffffffff, k[1] & 0xffffffff,
25671 k[2] & 0xffffffff, k[3] & 0xffffffff);
25672 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
25673 k[0] & 0xffffffff, k[1] & 0xffffffff,
25674 k[2] & 0xffffffff, k[3] & 0xffffffff);
25675 return;
25678 else if (GET_CODE (x) == CONST_DOUBLE &&
25679 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
25681 REAL_VALUE_TYPE rv;
25682 long k[2];
25684 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25686 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25687 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
25688 else
25689 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
25691 if (TARGET_64BIT)
25693 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25694 fputs (DOUBLE_INT_ASM_OP, file);
25695 else
25696 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
25697 k[0] & 0xffffffff, k[1] & 0xffffffff);
25698 fprintf (file, "0x%lx%08lx\n",
25699 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
25700 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
25701 return;
25703 else
25705 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25706 fputs ("\t.long ", file);
25707 else
25708 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
25709 k[0] & 0xffffffff, k[1] & 0xffffffff);
25710 fprintf (file, "0x%lx,0x%lx\n",
25711 k[0] & 0xffffffff, k[1] & 0xffffffff);
25712 return;
25715 else if (GET_CODE (x) == CONST_DOUBLE &&
25716 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
25718 REAL_VALUE_TYPE rv;
25719 long l;
25721 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25722 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25723 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
25724 else
25725 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
25727 if (TARGET_64BIT)
25729 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25730 fputs (DOUBLE_INT_ASM_OP, file);
25731 else
25732 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
25733 if (WORDS_BIG_ENDIAN)
25734 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
25735 else
25736 fprintf (file, "0x%lx\n", l & 0xffffffff);
25737 return;
25739 else
25741 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25742 fputs ("\t.long ", file);
25743 else
25744 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
25745 fprintf (file, "0x%lx\n", l & 0xffffffff);
25746 return;
25749 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
25751 unsigned HOST_WIDE_INT low;
25752 HOST_WIDE_INT high;
25754 low = INTVAL (x) & 0xffffffff;
25755 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
25757 /* TOC entries are always Pmode-sized, so when big-endian
25758 smaller integer constants in the TOC need to be padded.
25759 (This is still a win over putting the constants in
25760 a separate constant pool, because then we'd have
25761 to have both a TOC entry _and_ the actual constant.)
25763 For a 32-bit target, CONST_INT values are loaded and shifted
25764 entirely within `low' and can be stored in one TOC entry. */
25766 /* It would be easy to make this work, but it doesn't now. */
25767 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
25769 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
25771 low |= high << 32;
25772 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
25773 high = (HOST_WIDE_INT) low >> 32;
25774 low &= 0xffffffff;
25777 if (TARGET_64BIT)
25779 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25780 fputs (DOUBLE_INT_ASM_OP, file);
25781 else
25782 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
25783 (long) high & 0xffffffff, (long) low & 0xffffffff);
25784 fprintf (file, "0x%lx%08lx\n",
25785 (long) high & 0xffffffff, (long) low & 0xffffffff);
25786 return;
25788 else
25790 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
25792 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25793 fputs ("\t.long ", file);
25794 else
25795 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
25796 (long) high & 0xffffffff, (long) low & 0xffffffff);
25797 fprintf (file, "0x%lx,0x%lx\n",
25798 (long) high & 0xffffffff, (long) low & 0xffffffff);
25800 else
25802 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25803 fputs ("\t.long ", file);
25804 else
25805 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
25806 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
25808 return;
25812 if (GET_CODE (x) == CONST)
25814 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
25815 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
25817 base = XEXP (XEXP (x, 0), 0);
25818 offset = INTVAL (XEXP (XEXP (x, 0), 1));
25821 switch (GET_CODE (base))
25823 case SYMBOL_REF:
25824 name = XSTR (base, 0);
25825 break;
25827 case LABEL_REF:
25828 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
25829 CODE_LABEL_NUMBER (XEXP (base, 0)));
25830 break;
25832 case CODE_LABEL:
25833 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
25834 break;
25836 default:
25837 gcc_unreachable ();
25840 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25841 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
25842 else
25844 fputs ("\t.tc ", file);
25845 RS6000_OUTPUT_BASENAME (file, name);
25847 if (offset < 0)
25848 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
25849 else if (offset)
25850 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
25852 /* Mark large TOC symbols on AIX with [TE] so they are mapped
25853 after other TOC symbols, reducing overflow of small TOC access
25854 to [TC] symbols. */
25855 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
25856 ? "[TE]," : "[TC],", file);
25859 /* Currently C++ toc references to vtables can be emitted before it
25860 is decided whether the vtable is public or private. If this is
25861 the case, then the linker will eventually complain that there is
25862 a TOC reference to an unknown section. Thus, for vtables only,
25863 we emit the TOC reference to reference the symbol and not the
25864 section. */
25865 if (VTABLE_NAME_P (name))
25867 RS6000_OUTPUT_BASENAME (file, name);
25868 if (offset < 0)
25869 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
25870 else if (offset > 0)
25871 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
25873 else
25874 output_addr_const (file, x);
25876 #if HAVE_AS_TLS
25877 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
25878 && SYMBOL_REF_TLS_MODEL (base) != 0)
25880 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
25881 fputs ("@le", file);
25882 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
25883 fputs ("@ie", file);
25884 /* Use global-dynamic for local-dynamic. */
25885 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
25886 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
25888 putc ('\n', file);
25889 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
25890 fputs ("\t.tc .", file);
25891 RS6000_OUTPUT_BASENAME (file, name);
25892 fputs ("[TC],", file);
25893 output_addr_const (file, x);
25894 fputs ("@m", file);
25897 #endif
25899 putc ('\n', file);
25902 /* Output an assembler pseudo-op to write an ASCII string of N characters
25903 starting at P to FILE.
25905 On the RS/6000, we have to do this using the .byte operation and
25906 write out special characters outside the quoted string.
25907 Also, the assembler is broken; very long strings are truncated,
25908 so we must artificially break them up early. */
25910 void
25911 output_ascii (FILE *file, const char *p, int n)
25913 char c;
25914 int i, count_string;
25915 const char *for_string = "\t.byte \"";
25916 const char *for_decimal = "\t.byte ";
25917 const char *to_close = NULL;
25919 count_string = 0;
25920 for (i = 0; i < n; i++)
25922 c = *p++;
25923 if (c >= ' ' && c < 0177)
25925 if (for_string)
25926 fputs (for_string, file);
25927 putc (c, file);
25929 /* Write two quotes to get one. */
25930 if (c == '"')
25932 putc (c, file);
25933 ++count_string;
25936 for_string = NULL;
25937 for_decimal = "\"\n\t.byte ";
25938 to_close = "\"\n";
25939 ++count_string;
25941 if (count_string >= 512)
25943 fputs (to_close, file);
25945 for_string = "\t.byte \"";
25946 for_decimal = "\t.byte ";
25947 to_close = NULL;
25948 count_string = 0;
25951 else
25953 if (for_decimal)
25954 fputs (for_decimal, file);
25955 fprintf (file, "%d", c);
25957 for_string = "\n\t.byte \"";
25958 for_decimal = ", ";
25959 to_close = "\n";
25960 count_string = 0;
25964 /* Now close the string if we have written one. Then end the line. */
25965 if (to_close)
25966 fputs (to_close, file);
25969 /* Generate a unique section name for FILENAME for a section type
25970 represented by SECTION_DESC. Output goes into BUF.
25972 SECTION_DESC can be any string, as long as it is different for each
25973 possible section type.
25975 We name the section in the same manner as xlc. The name begins with an
25976 underscore followed by the filename (after stripping any leading directory
25977 names) with the last period replaced by the string SECTION_DESC. If
25978 FILENAME does not contain a period, SECTION_DESC is appended to the end of
25979 the name. */
25981 void
25982 rs6000_gen_section_name (char **buf, const char *filename,
25983 const char *section_desc)
25985 const char *q, *after_last_slash, *last_period = 0;
25986 char *p;
25987 int len;
25989 after_last_slash = filename;
25990 for (q = filename; *q; q++)
25992 if (*q == '/')
25993 after_last_slash = q + 1;
25994 else if (*q == '.')
25995 last_period = q;
25998 len = strlen (after_last_slash) + strlen (section_desc) + 2;
25999 *buf = (char *) xmalloc (len);
26001 p = *buf;
26002 *p++ = '_';
26004 for (q = after_last_slash; *q; q++)
26006 if (q == last_period)
26008 strcpy (p, section_desc);
26009 p += strlen (section_desc);
26010 break;
26013 else if (ISALNUM (*q))
26014 *p++ = *q;
26017 if (last_period == 0)
26018 strcpy (p, section_desc);
26019 else
26020 *p = '\0';
26023 /* Emit profile function. */
26025 void
26026 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26028 /* Non-standard profiling for kernels, which just saves LR then calls
26029 _mcount without worrying about arg saves. The idea is to change
26030 the function prologue as little as possible as it isn't easy to
26031 account for arg save/restore code added just for _mcount. */
26032 if (TARGET_PROFILE_KERNEL)
26033 return;
26035 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26037 #ifndef NO_PROFILE_COUNTERS
26038 # define NO_PROFILE_COUNTERS 0
26039 #endif
26040 if (NO_PROFILE_COUNTERS)
26041 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26042 LCT_NORMAL, VOIDmode, 0);
26043 else
26045 char buf[30];
26046 const char *label_name;
26047 rtx fun;
26049 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26050 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26051 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26053 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26054 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26057 else if (DEFAULT_ABI == ABI_DARWIN)
26059 const char *mcount_name = RS6000_MCOUNT;
26060 int caller_addr_regno = LR_REGNO;
26062 /* Be conservative and always set this, at least for now. */
26063 crtl->uses_pic_offset_table = 1;
26065 #if TARGET_MACHO
26066 /* For PIC code, set up a stub and collect the caller's address
26067 from r0, which is where the prologue puts it. */
26068 if (MACHOPIC_INDIRECT
26069 && crtl->uses_pic_offset_table)
26070 caller_addr_regno = 0;
26071 #endif
26072 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26073 LCT_NORMAL, VOIDmode, 1,
26074 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26078 /* Write function profiler code. */
26080 void
26081 output_function_profiler (FILE *file, int labelno)
26083 char buf[100];
26085 switch (DEFAULT_ABI)
26087 default:
26088 gcc_unreachable ();
26090 case ABI_V4:
26091 if (!TARGET_32BIT)
26093 warning (0, "no profiling of 64-bit code for this ABI");
26094 return;
26096 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26097 fprintf (file, "\tmflr %s\n", reg_names[0]);
26098 if (NO_PROFILE_COUNTERS)
26100 asm_fprintf (file, "\tstw %s,4(%s)\n",
26101 reg_names[0], reg_names[1]);
26103 else if (TARGET_SECURE_PLT && flag_pic)
26105 if (TARGET_LINK_STACK)
26107 char name[32];
26108 get_ppc476_thunk_name (name);
26109 asm_fprintf (file, "\tbl %s\n", name);
26111 else
26112 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26113 asm_fprintf (file, "\tstw %s,4(%s)\n",
26114 reg_names[0], reg_names[1]);
26115 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26116 asm_fprintf (file, "\taddis %s,%s,",
26117 reg_names[12], reg_names[12]);
26118 assemble_name (file, buf);
26119 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26120 assemble_name (file, buf);
26121 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26123 else if (flag_pic == 1)
26125 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26126 asm_fprintf (file, "\tstw %s,4(%s)\n",
26127 reg_names[0], reg_names[1]);
26128 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26129 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26130 assemble_name (file, buf);
26131 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26133 else if (flag_pic > 1)
26135 asm_fprintf (file, "\tstw %s,4(%s)\n",
26136 reg_names[0], reg_names[1]);
26137 /* Now, we need to get the address of the label. */
26138 if (TARGET_LINK_STACK)
26140 char name[32];
26141 get_ppc476_thunk_name (name);
26142 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26143 assemble_name (file, buf);
26144 fputs ("-.\n1:", file);
26145 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26146 asm_fprintf (file, "\taddi %s,%s,4\n",
26147 reg_names[11], reg_names[11]);
26149 else
26151 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26152 assemble_name (file, buf);
26153 fputs ("-.\n1:", file);
26154 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26156 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26157 reg_names[0], reg_names[11]);
26158 asm_fprintf (file, "\tadd %s,%s,%s\n",
26159 reg_names[0], reg_names[0], reg_names[11]);
26161 else
26163 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26164 assemble_name (file, buf);
26165 fputs ("@ha\n", file);
26166 asm_fprintf (file, "\tstw %s,4(%s)\n",
26167 reg_names[0], reg_names[1]);
26168 asm_fprintf (file, "\tla %s,", reg_names[0]);
26169 assemble_name (file, buf);
26170 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26173 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26174 fprintf (file, "\tbl %s%s\n",
26175 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26176 break;
26178 case ABI_AIX:
26179 case ABI_ELFv2:
26180 case ABI_DARWIN:
26181 /* Don't do anything, done in output_profile_hook (). */
26182 break;
26188 /* The following variable value is the last issued insn. */
26190 static rtx last_scheduled_insn;
26192 /* The following variable helps to balance issuing of load and
26193 store instructions */
26195 static int load_store_pendulum;
26197 /* Power4 load update and store update instructions are cracked into a
26198 load or store and an integer insn which are executed in the same cycle.
26199 Branches have their own dispatch slot which does not count against the
26200 GCC issue rate, but it changes the program flow so there are no other
26201 instructions to issue in this cycle. */
26203 static int
26204 rs6000_variable_issue_1 (rtx insn, int more)
26206 last_scheduled_insn = insn;
26207 if (GET_CODE (PATTERN (insn)) == USE
26208 || GET_CODE (PATTERN (insn)) == CLOBBER)
26210 cached_can_issue_more = more;
26211 return cached_can_issue_more;
26214 if (insn_terminates_group_p (insn, current_group))
26216 cached_can_issue_more = 0;
26217 return cached_can_issue_more;
26220 /* If no reservation, but reach here */
26221 if (recog_memoized (insn) < 0)
26222 return more;
26224 if (rs6000_sched_groups)
26226 if (is_microcoded_insn (insn))
26227 cached_can_issue_more = 0;
26228 else if (is_cracked_insn (insn))
26229 cached_can_issue_more = more > 2 ? more - 2 : 0;
26230 else
26231 cached_can_issue_more = more - 1;
26233 return cached_can_issue_more;
26236 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26237 return 0;
26239 cached_can_issue_more = more - 1;
26240 return cached_can_issue_more;
26243 static int
26244 rs6000_variable_issue (FILE *stream, int verbose, rtx insn, int more)
26246 int r = rs6000_variable_issue_1 (insn, more);
26247 if (verbose)
26248 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26249 return r;
26252 /* Adjust the cost of a scheduling dependency. Return the new cost of
26253 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26255 static int
26256 rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
26258 enum attr_type attr_type;
26260 if (! recog_memoized (insn))
26261 return 0;
26263 switch (REG_NOTE_KIND (link))
26265 case REG_DEP_TRUE:
26267 /* Data dependency; DEP_INSN writes a register that INSN reads
26268 some cycles later. */
26270 /* Separate a load from a narrower, dependent store. */
26271 if (rs6000_sched_groups
26272 && GET_CODE (PATTERN (insn)) == SET
26273 && GET_CODE (PATTERN (dep_insn)) == SET
26274 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26275 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26276 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26277 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26278 return cost + 14;
26280 attr_type = get_attr_type (insn);
26282 switch (attr_type)
26284 case TYPE_JMPREG:
26285 /* Tell the first scheduling pass about the latency between
26286 a mtctr and bctr (and mtlr and br/blr). The first
26287 scheduling pass will not know about this latency since
26288 the mtctr instruction, which has the latency associated
26289 to it, will be generated by reload. */
26290 return 4;
26291 case TYPE_BRANCH:
26292 /* Leave some extra cycles between a compare and its
26293 dependent branch, to inhibit expensive mispredicts. */
26294 if ((rs6000_cpu_attr == CPU_PPC603
26295 || rs6000_cpu_attr == CPU_PPC604
26296 || rs6000_cpu_attr == CPU_PPC604E
26297 || rs6000_cpu_attr == CPU_PPC620
26298 || rs6000_cpu_attr == CPU_PPC630
26299 || rs6000_cpu_attr == CPU_PPC750
26300 || rs6000_cpu_attr == CPU_PPC7400
26301 || rs6000_cpu_attr == CPU_PPC7450
26302 || rs6000_cpu_attr == CPU_PPCE5500
26303 || rs6000_cpu_attr == CPU_PPCE6500
26304 || rs6000_cpu_attr == CPU_POWER4
26305 || rs6000_cpu_attr == CPU_POWER5
26306 || rs6000_cpu_attr == CPU_POWER7
26307 || rs6000_cpu_attr == CPU_POWER8
26308 || rs6000_cpu_attr == CPU_CELL)
26309 && recog_memoized (dep_insn)
26310 && (INSN_CODE (dep_insn) >= 0))
26312 switch (get_attr_type (dep_insn))
26314 case TYPE_CMP:
26315 case TYPE_COMPARE:
26316 case TYPE_DELAYED_COMPARE:
26317 case TYPE_IMUL_COMPARE:
26318 case TYPE_LMUL_COMPARE:
26319 case TYPE_FPCOMPARE:
26320 case TYPE_CR_LOGICAL:
26321 case TYPE_DELAYED_CR:
26322 return cost + 2;
26323 default:
26324 break;
26326 break;
26328 case TYPE_STORE:
26329 case TYPE_STORE_U:
26330 case TYPE_STORE_UX:
26331 case TYPE_FPSTORE:
26332 case TYPE_FPSTORE_U:
26333 case TYPE_FPSTORE_UX:
26334 if ((rs6000_cpu == PROCESSOR_POWER6)
26335 && recog_memoized (dep_insn)
26336 && (INSN_CODE (dep_insn) >= 0))
26339 if (GET_CODE (PATTERN (insn)) != SET)
26340 /* If this happens, we have to extend this to schedule
26341 optimally. Return default for now. */
26342 return cost;
26344 /* Adjust the cost for the case where the value written
26345 by a fixed point operation is used as the address
26346 gen value on a store. */
26347 switch (get_attr_type (dep_insn))
26349 case TYPE_LOAD:
26350 case TYPE_LOAD_U:
26351 case TYPE_LOAD_UX:
26352 case TYPE_CNTLZ:
26354 if (! store_data_bypass_p (dep_insn, insn))
26355 return 4;
26356 break;
26358 case TYPE_LOAD_EXT:
26359 case TYPE_LOAD_EXT_U:
26360 case TYPE_LOAD_EXT_UX:
26361 case TYPE_VAR_SHIFT_ROTATE:
26362 case TYPE_VAR_DELAYED_COMPARE:
26364 if (! store_data_bypass_p (dep_insn, insn))
26365 return 6;
26366 break;
26368 case TYPE_INTEGER:
26369 case TYPE_COMPARE:
26370 case TYPE_FAST_COMPARE:
26371 case TYPE_EXTS:
26372 case TYPE_SHIFT:
26373 case TYPE_INSERT_WORD:
26374 case TYPE_INSERT_DWORD:
26375 case TYPE_FPLOAD_U:
26376 case TYPE_FPLOAD_UX:
26377 case TYPE_STORE_U:
26378 case TYPE_STORE_UX:
26379 case TYPE_FPSTORE_U:
26380 case TYPE_FPSTORE_UX:
26382 if (! store_data_bypass_p (dep_insn, insn))
26383 return 3;
26384 break;
26386 case TYPE_IMUL:
26387 case TYPE_IMUL2:
26388 case TYPE_IMUL3:
26389 case TYPE_LMUL:
26390 case TYPE_IMUL_COMPARE:
26391 case TYPE_LMUL_COMPARE:
26393 if (! store_data_bypass_p (dep_insn, insn))
26394 return 17;
26395 break;
26397 case TYPE_IDIV:
26399 if (! store_data_bypass_p (dep_insn, insn))
26400 return 45;
26401 break;
26403 case TYPE_LDIV:
26405 if (! store_data_bypass_p (dep_insn, insn))
26406 return 57;
26407 break;
26409 default:
26410 break;
26413 break;
26415 case TYPE_LOAD:
26416 case TYPE_LOAD_U:
26417 case TYPE_LOAD_UX:
26418 case TYPE_LOAD_EXT:
26419 case TYPE_LOAD_EXT_U:
26420 case TYPE_LOAD_EXT_UX:
26421 if ((rs6000_cpu == PROCESSOR_POWER6)
26422 && recog_memoized (dep_insn)
26423 && (INSN_CODE (dep_insn) >= 0))
26426 /* Adjust the cost for the case where the value written
26427 by a fixed point instruction is used within the address
26428 gen portion of a subsequent load(u)(x) */
26429 switch (get_attr_type (dep_insn))
26431 case TYPE_LOAD:
26432 case TYPE_LOAD_U:
26433 case TYPE_LOAD_UX:
26434 case TYPE_CNTLZ:
26436 if (set_to_load_agen (dep_insn, insn))
26437 return 4;
26438 break;
26440 case TYPE_LOAD_EXT:
26441 case TYPE_LOAD_EXT_U:
26442 case TYPE_LOAD_EXT_UX:
26443 case TYPE_VAR_SHIFT_ROTATE:
26444 case TYPE_VAR_DELAYED_COMPARE:
26446 if (set_to_load_agen (dep_insn, insn))
26447 return 6;
26448 break;
26450 case TYPE_INTEGER:
26451 case TYPE_COMPARE:
26452 case TYPE_FAST_COMPARE:
26453 case TYPE_EXTS:
26454 case TYPE_SHIFT:
26455 case TYPE_INSERT_WORD:
26456 case TYPE_INSERT_DWORD:
26457 case TYPE_FPLOAD_U:
26458 case TYPE_FPLOAD_UX:
26459 case TYPE_STORE_U:
26460 case TYPE_STORE_UX:
26461 case TYPE_FPSTORE_U:
26462 case TYPE_FPSTORE_UX:
26464 if (set_to_load_agen (dep_insn, insn))
26465 return 3;
26466 break;
26468 case TYPE_IMUL:
26469 case TYPE_IMUL2:
26470 case TYPE_IMUL3:
26471 case TYPE_LMUL:
26472 case TYPE_IMUL_COMPARE:
26473 case TYPE_LMUL_COMPARE:
26475 if (set_to_load_agen (dep_insn, insn))
26476 return 17;
26477 break;
26479 case TYPE_IDIV:
26481 if (set_to_load_agen (dep_insn, insn))
26482 return 45;
26483 break;
26485 case TYPE_LDIV:
26487 if (set_to_load_agen (dep_insn, insn))
26488 return 57;
26489 break;
26491 default:
26492 break;
26495 break;
26497 case TYPE_FPLOAD:
26498 if ((rs6000_cpu == PROCESSOR_POWER6)
26499 && recog_memoized (dep_insn)
26500 && (INSN_CODE (dep_insn) >= 0)
26501 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26502 return 2;
26504 default:
26505 break;
26508 /* Fall out to return default cost. */
26510 break;
26512 case REG_DEP_OUTPUT:
26513 /* Output dependency; DEP_INSN writes a register that INSN writes some
26514 cycles later. */
26515 if ((rs6000_cpu == PROCESSOR_POWER6)
26516 && recog_memoized (dep_insn)
26517 && (INSN_CODE (dep_insn) >= 0))
26519 attr_type = get_attr_type (insn);
26521 switch (attr_type)
26523 case TYPE_FP:
26524 if (get_attr_type (dep_insn) == TYPE_FP)
26525 return 1;
26526 break;
26527 case TYPE_FPLOAD:
26528 if (get_attr_type (dep_insn) == TYPE_MFFGPR)
26529 return 2;
26530 break;
26531 default:
26532 break;
26535 case REG_DEP_ANTI:
26536 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26537 cycles later. */
26538 return 0;
26540 default:
26541 gcc_unreachable ();
26544 return cost;
26547 /* Debug version of rs6000_adjust_cost. */
26549 static int
26550 rs6000_debug_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
26552 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
26554 if (ret != cost)
26556 const char *dep;
26558 switch (REG_NOTE_KIND (link))
26560 default: dep = "unknown depencency"; break;
26561 case REG_DEP_TRUE: dep = "data dependency"; break;
26562 case REG_DEP_OUTPUT: dep = "output dependency"; break;
26563 case REG_DEP_ANTI: dep = "anti depencency"; break;
26566 fprintf (stderr,
26567 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
26568 "%s, insn:\n", ret, cost, dep);
26570 debug_rtx (insn);
26573 return ret;
26576 /* The function returns a true if INSN is microcoded.
26577 Return false otherwise. */
26579 static bool
26580 is_microcoded_insn (rtx insn)
26582 if (!insn || !NONDEBUG_INSN_P (insn)
26583 || GET_CODE (PATTERN (insn)) == USE
26584 || GET_CODE (PATTERN (insn)) == CLOBBER)
26585 return false;
26587 if (rs6000_cpu_attr == CPU_CELL)
26588 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
26590 if (rs6000_sched_groups
26591 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26593 enum attr_type type = get_attr_type (insn);
26594 if (type == TYPE_LOAD_EXT_U
26595 || type == TYPE_LOAD_EXT_UX
26596 || type == TYPE_LOAD_UX
26597 || type == TYPE_STORE_UX
26598 || type == TYPE_MFCR)
26599 return true;
26602 return false;
26605 /* The function returns true if INSN is cracked into 2 instructions
26606 by the processor (and therefore occupies 2 issue slots). */
26608 static bool
26609 is_cracked_insn (rtx insn)
26611 if (!insn || !NONDEBUG_INSN_P (insn)
26612 || GET_CODE (PATTERN (insn)) == USE
26613 || GET_CODE (PATTERN (insn)) == CLOBBER)
26614 return false;
26616 if (rs6000_sched_groups
26617 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26619 enum attr_type type = get_attr_type (insn);
26620 if (type == TYPE_LOAD_U || type == TYPE_STORE_U
26621 || type == TYPE_FPLOAD_U || type == TYPE_FPSTORE_U
26622 || type == TYPE_FPLOAD_UX || type == TYPE_FPSTORE_UX
26623 || type == TYPE_LOAD_EXT || type == TYPE_DELAYED_CR
26624 || type == TYPE_COMPARE || type == TYPE_DELAYED_COMPARE
26625 || type == TYPE_IMUL_COMPARE || type == TYPE_LMUL_COMPARE
26626 || type == TYPE_IDIV || type == TYPE_LDIV
26627 || type == TYPE_INSERT_WORD)
26628 return true;
26631 return false;
26634 /* The function returns true if INSN can be issued only from
26635 the branch slot. */
26637 static bool
26638 is_branch_slot_insn (rtx insn)
26640 if (!insn || !NONDEBUG_INSN_P (insn)
26641 || GET_CODE (PATTERN (insn)) == USE
26642 || GET_CODE (PATTERN (insn)) == CLOBBER)
26643 return false;
26645 if (rs6000_sched_groups)
26647 enum attr_type type = get_attr_type (insn);
26648 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
26649 return true;
26650 return false;
26653 return false;
26656 /* The function returns true if out_inst sets a value that is
26657 used in the address generation computation of in_insn */
26658 static bool
26659 set_to_load_agen (rtx out_insn, rtx in_insn)
26661 rtx out_set, in_set;
26663 /* For performance reasons, only handle the simple case where
26664 both loads are a single_set. */
26665 out_set = single_set (out_insn);
26666 if (out_set)
26668 in_set = single_set (in_insn);
26669 if (in_set)
26670 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
26673 return false;
26676 /* Try to determine base/offset/size parts of the given MEM.
26677 Return true if successful, false if all the values couldn't
26678 be determined.
26680 This function only looks for REG or REG+CONST address forms.
26681 REG+REG address form will return false. */
26683 static bool
26684 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
26685 HOST_WIDE_INT *size)
26687 rtx addr_rtx;
26688 if MEM_SIZE_KNOWN_P (mem)
26689 *size = MEM_SIZE (mem);
26690 else
26691 return false;
26693 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
26694 addr_rtx = XEXP (XEXP (mem, 0), 1);
26695 else
26696 addr_rtx = (XEXP (mem, 0));
26698 if (GET_CODE (addr_rtx) == REG)
26700 *base = addr_rtx;
26701 *offset = 0;
26703 else if (GET_CODE (addr_rtx) == PLUS
26704 && CONST_INT_P (XEXP (addr_rtx, 1)))
26706 *base = XEXP (addr_rtx, 0);
26707 *offset = INTVAL (XEXP (addr_rtx, 1));
26709 else
26710 return false;
26712 return true;
26715 /* The function returns true if the target storage location of
26716 mem1 is adjacent to the target storage location of mem2 */
26717 /* Return 1 if memory locations are adjacent. */
26719 static bool
26720 adjacent_mem_locations (rtx mem1, rtx mem2)
26722 rtx reg1, reg2;
26723 HOST_WIDE_INT off1, size1, off2, size2;
26725 if (get_memref_parts (mem1, &reg1, &off1, &size1)
26726 && get_memref_parts (mem2, &reg2, &off2, &size2))
26727 return ((REGNO (reg1) == REGNO (reg2))
26728 && ((off1 + size1 == off2)
26729 || (off2 + size2 == off1)));
26731 return false;
26734 /* This function returns true if it can be determined that the two MEM
26735 locations overlap by at least 1 byte based on base reg/offset/size. */
26737 static bool
26738 mem_locations_overlap (rtx mem1, rtx mem2)
26740 rtx reg1, reg2;
26741 HOST_WIDE_INT off1, size1, off2, size2;
26743 if (get_memref_parts (mem1, &reg1, &off1, &size1)
26744 && get_memref_parts (mem2, &reg2, &off2, &size2))
26745 return ((REGNO (reg1) == REGNO (reg2))
26746 && (((off1 <= off2) && (off1 + size1 > off2))
26747 || ((off2 <= off1) && (off2 + size2 > off1))));
26749 return false;
26752 /* A C statement (sans semicolon) to update the integer scheduling
26753 priority INSN_PRIORITY (INSN). Increase the priority to execute the
26754 INSN earlier, reduce the priority to execute INSN later. Do not
26755 define this macro if you do not need to adjust the scheduling
26756 priorities of insns. */
26758 static int
26759 rs6000_adjust_priority (rtx insn ATTRIBUTE_UNUSED, int priority)
26761 rtx load_mem, str_mem;
26762 /* On machines (like the 750) which have asymmetric integer units,
26763 where one integer unit can do multiply and divides and the other
26764 can't, reduce the priority of multiply/divide so it is scheduled
26765 before other integer operations. */
26767 #if 0
26768 if (! INSN_P (insn))
26769 return priority;
26771 if (GET_CODE (PATTERN (insn)) == USE)
26772 return priority;
26774 switch (rs6000_cpu_attr) {
26775 case CPU_PPC750:
26776 switch (get_attr_type (insn))
26778 default:
26779 break;
26781 case TYPE_IMUL:
26782 case TYPE_IDIV:
26783 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
26784 priority, priority);
26785 if (priority >= 0 && priority < 0x01000000)
26786 priority >>= 3;
26787 break;
26790 #endif
26792 if (insn_must_be_first_in_group (insn)
26793 && reload_completed
26794 && current_sched_info->sched_max_insns_priority
26795 && rs6000_sched_restricted_insns_priority)
26798 /* Prioritize insns that can be dispatched only in the first
26799 dispatch slot. */
26800 if (rs6000_sched_restricted_insns_priority == 1)
26801 /* Attach highest priority to insn. This means that in
26802 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
26803 precede 'priority' (critical path) considerations. */
26804 return current_sched_info->sched_max_insns_priority;
26805 else if (rs6000_sched_restricted_insns_priority == 2)
26806 /* Increase priority of insn by a minimal amount. This means that in
26807 haifa-sched.c:ready_sort(), only 'priority' (critical path)
26808 considerations precede dispatch-slot restriction considerations. */
26809 return (priority + 1);
26812 if (rs6000_cpu == PROCESSOR_POWER6
26813 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
26814 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
26815 /* Attach highest priority to insn if the scheduler has just issued two
26816 stores and this instruction is a load, or two loads and this instruction
26817 is a store. Power6 wants loads and stores scheduled alternately
26818 when possible */
26819 return current_sched_info->sched_max_insns_priority;
26821 return priority;
26824 /* Return true if the instruction is nonpipelined on the Cell. */
26825 static bool
26826 is_nonpipeline_insn (rtx insn)
26828 enum attr_type type;
26829 if (!insn || !NONDEBUG_INSN_P (insn)
26830 || GET_CODE (PATTERN (insn)) == USE
26831 || GET_CODE (PATTERN (insn)) == CLOBBER)
26832 return false;
26834 type = get_attr_type (insn);
26835 if (type == TYPE_IMUL
26836 || type == TYPE_IMUL2
26837 || type == TYPE_IMUL3
26838 || type == TYPE_LMUL
26839 || type == TYPE_IDIV
26840 || type == TYPE_LDIV
26841 || type == TYPE_SDIV
26842 || type == TYPE_DDIV
26843 || type == TYPE_SSQRT
26844 || type == TYPE_DSQRT
26845 || type == TYPE_MFCR
26846 || type == TYPE_MFCRF
26847 || type == TYPE_MFJMPR)
26849 return true;
26851 return false;
26855 /* Return how many instructions the machine can issue per cycle. */
26857 static int
26858 rs6000_issue_rate (void)
26860 /* Unless scheduling for register pressure, use issue rate of 1 for
26861 first scheduling pass to decrease degradation. */
26862 if (!reload_completed && !flag_sched_pressure)
26863 return 1;
26865 switch (rs6000_cpu_attr) {
26866 case CPU_RS64A:
26867 case CPU_PPC601: /* ? */
26868 case CPU_PPC7450:
26869 return 3;
26870 case CPU_PPC440:
26871 case CPU_PPC603:
26872 case CPU_PPC750:
26873 case CPU_PPC7400:
26874 case CPU_PPC8540:
26875 case CPU_PPC8548:
26876 case CPU_CELL:
26877 case CPU_PPCE300C2:
26878 case CPU_PPCE300C3:
26879 case CPU_PPCE500MC:
26880 case CPU_PPCE500MC64:
26881 case CPU_PPCE5500:
26882 case CPU_PPCE6500:
26883 case CPU_TITAN:
26884 return 2;
26885 case CPU_PPC476:
26886 case CPU_PPC604:
26887 case CPU_PPC604E:
26888 case CPU_PPC620:
26889 case CPU_PPC630:
26890 return 4;
26891 case CPU_POWER4:
26892 case CPU_POWER5:
26893 case CPU_POWER6:
26894 case CPU_POWER7:
26895 return 5;
26896 case CPU_POWER8:
26897 return 7;
26898 default:
26899 return 1;
26903 /* Return how many instructions to look ahead for better insn
26904 scheduling. */
26906 static int
26907 rs6000_use_sched_lookahead (void)
26909 switch (rs6000_cpu_attr)
26911 case CPU_PPC8540:
26912 case CPU_PPC8548:
26913 return 4;
26915 case CPU_CELL:
26916 return (reload_completed ? 8 : 0);
26918 default:
26919 return 0;
26923 /* We are choosing insn from the ready queue. Return nonzero if INSN can be chosen. */
26924 static int
26925 rs6000_use_sched_lookahead_guard (rtx insn)
26927 if (rs6000_cpu_attr != CPU_CELL)
26928 return 1;
26930 if (insn == NULL_RTX || !INSN_P (insn))
26931 abort ();
26933 if (!reload_completed
26934 || is_nonpipeline_insn (insn)
26935 || is_microcoded_insn (insn))
26936 return 0;
26938 return 1;
26941 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
26942 and return true. */
26944 static bool
26945 find_mem_ref (rtx pat, rtx *mem_ref)
26947 const char * fmt;
26948 int i, j;
26950 /* stack_tie does not produce any real memory traffic. */
26951 if (tie_operand (pat, VOIDmode))
26952 return false;
26954 if (GET_CODE (pat) == MEM)
26956 *mem_ref = pat;
26957 return true;
26960 /* Recursively process the pattern. */
26961 fmt = GET_RTX_FORMAT (GET_CODE (pat));
26963 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
26965 if (fmt[i] == 'e')
26967 if (find_mem_ref (XEXP (pat, i), mem_ref))
26968 return true;
26970 else if (fmt[i] == 'E')
26971 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
26973 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
26974 return true;
26978 return false;
26981 /* Determine if PAT is a PATTERN of a load insn. */
26983 static bool
26984 is_load_insn1 (rtx pat, rtx *load_mem)
26986 if (!pat || pat == NULL_RTX)
26987 return false;
26989 if (GET_CODE (pat) == SET)
26990 return find_mem_ref (SET_SRC (pat), load_mem);
26992 if (GET_CODE (pat) == PARALLEL)
26994 int i;
26996 for (i = 0; i < XVECLEN (pat, 0); i++)
26997 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
26998 return true;
27001 return false;
27004 /* Determine if INSN loads from memory. */
27006 static bool
27007 is_load_insn (rtx insn, rtx *load_mem)
27009 if (!insn || !INSN_P (insn))
27010 return false;
27012 if (CALL_P (insn))
27013 return false;
27015 return is_load_insn1 (PATTERN (insn), load_mem);
27018 /* Determine if PAT is a PATTERN of a store insn. */
27020 static bool
27021 is_store_insn1 (rtx pat, rtx *str_mem)
27023 if (!pat || pat == NULL_RTX)
27024 return false;
27026 if (GET_CODE (pat) == SET)
27027 return find_mem_ref (SET_DEST (pat), str_mem);
27029 if (GET_CODE (pat) == PARALLEL)
27031 int i;
27033 for (i = 0; i < XVECLEN (pat, 0); i++)
27034 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27035 return true;
27038 return false;
27041 /* Determine if INSN stores to memory. */
27043 static bool
27044 is_store_insn (rtx insn, rtx *str_mem)
27046 if (!insn || !INSN_P (insn))
27047 return false;
27049 return is_store_insn1 (PATTERN (insn), str_mem);
27052 /* Returns whether the dependence between INSN and NEXT is considered
27053 costly by the given target. */
27055 static bool
27056 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27058 rtx insn;
27059 rtx next;
27060 rtx load_mem, str_mem;
27062 /* If the flag is not enabled - no dependence is considered costly;
27063 allow all dependent insns in the same group.
27064 This is the most aggressive option. */
27065 if (rs6000_sched_costly_dep == no_dep_costly)
27066 return false;
27068 /* If the flag is set to 1 - a dependence is always considered costly;
27069 do not allow dependent instructions in the same group.
27070 This is the most conservative option. */
27071 if (rs6000_sched_costly_dep == all_deps_costly)
27072 return true;
27074 insn = DEP_PRO (dep);
27075 next = DEP_CON (dep);
27077 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27078 && is_load_insn (next, &load_mem)
27079 && is_store_insn (insn, &str_mem))
27080 /* Prevent load after store in the same group. */
27081 return true;
27083 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27084 && is_load_insn (next, &load_mem)
27085 && is_store_insn (insn, &str_mem)
27086 && DEP_TYPE (dep) == REG_DEP_TRUE
27087 && mem_locations_overlap(str_mem, load_mem))
27088 /* Prevent load after store in the same group if it is a true
27089 dependence. */
27090 return true;
27092 /* The flag is set to X; dependences with latency >= X are considered costly,
27093 and will not be scheduled in the same group. */
27094 if (rs6000_sched_costly_dep <= max_dep_latency
27095 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27096 return true;
27098 return false;
27101 /* Return the next insn after INSN that is found before TAIL is reached,
27102 skipping any "non-active" insns - insns that will not actually occupy
27103 an issue slot. Return NULL_RTX if such an insn is not found. */
27105 static rtx
27106 get_next_active_insn (rtx insn, rtx tail)
27108 if (insn == NULL_RTX || insn == tail)
27109 return NULL_RTX;
27111 while (1)
27113 insn = NEXT_INSN (insn);
27114 if (insn == NULL_RTX || insn == tail)
27115 return NULL_RTX;
27117 if (CALL_P (insn)
27118 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27119 || (NONJUMP_INSN_P (insn)
27120 && GET_CODE (PATTERN (insn)) != USE
27121 && GET_CODE (PATTERN (insn)) != CLOBBER
27122 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27123 break;
27125 return insn;
27128 /* We are about to begin issuing insns for this clock cycle. */
27130 static int
27131 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27132 rtx *ready ATTRIBUTE_UNUSED,
27133 int *pn_ready ATTRIBUTE_UNUSED,
27134 int clock_var ATTRIBUTE_UNUSED)
27136 int n_ready = *pn_ready;
27138 if (sched_verbose)
27139 fprintf (dump, "// rs6000_sched_reorder :\n");
27141 /* Reorder the ready list, if the second to last ready insn
27142 is a nonepipeline insn. */
27143 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27145 if (is_nonpipeline_insn (ready[n_ready - 1])
27146 && (recog_memoized (ready[n_ready - 2]) > 0))
27147 /* Simply swap first two insns. */
27149 rtx tmp = ready[n_ready - 1];
27150 ready[n_ready - 1] = ready[n_ready - 2];
27151 ready[n_ready - 2] = tmp;
27155 if (rs6000_cpu == PROCESSOR_POWER6)
27156 load_store_pendulum = 0;
27158 return rs6000_issue_rate ();
27161 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27163 static int
27164 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx *ready,
27165 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27167 if (sched_verbose)
27168 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27170 /* For Power6, we need to handle some special cases to try and keep the
27171 store queue from overflowing and triggering expensive flushes.
27173 This code monitors how load and store instructions are being issued
27174 and skews the ready list one way or the other to increase the likelihood
27175 that a desired instruction is issued at the proper time.
27177 A couple of things are done. First, we maintain a "load_store_pendulum"
27178 to track the current state of load/store issue.
27180 - If the pendulum is at zero, then no loads or stores have been
27181 issued in the current cycle so we do nothing.
27183 - If the pendulum is 1, then a single load has been issued in this
27184 cycle and we attempt to locate another load in the ready list to
27185 issue with it.
27187 - If the pendulum is -2, then two stores have already been
27188 issued in this cycle, so we increase the priority of the first load
27189 in the ready list to increase it's likelihood of being chosen first
27190 in the next cycle.
27192 - If the pendulum is -1, then a single store has been issued in this
27193 cycle and we attempt to locate another store in the ready list to
27194 issue with it, preferring a store to an adjacent memory location to
27195 facilitate store pairing in the store queue.
27197 - If the pendulum is 2, then two loads have already been
27198 issued in this cycle, so we increase the priority of the first store
27199 in the ready list to increase it's likelihood of being chosen first
27200 in the next cycle.
27202 - If the pendulum < -2 or > 2, then do nothing.
27204 Note: This code covers the most common scenarios. There exist non
27205 load/store instructions which make use of the LSU and which
27206 would need to be accounted for to strictly model the behavior
27207 of the machine. Those instructions are currently unaccounted
27208 for to help minimize compile time overhead of this code.
27210 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27212 int pos;
27213 int i;
27214 rtx tmp, load_mem, str_mem;
27216 if (is_store_insn (last_scheduled_insn, &str_mem))
27217 /* Issuing a store, swing the load_store_pendulum to the left */
27218 load_store_pendulum--;
27219 else if (is_load_insn (last_scheduled_insn, &load_mem))
27220 /* Issuing a load, swing the load_store_pendulum to the right */
27221 load_store_pendulum++;
27222 else
27223 return cached_can_issue_more;
27225 /* If the pendulum is balanced, or there is only one instruction on
27226 the ready list, then all is well, so return. */
27227 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27228 return cached_can_issue_more;
27230 if (load_store_pendulum == 1)
27232 /* A load has been issued in this cycle. Scan the ready list
27233 for another load to issue with it */
27234 pos = *pn_ready-1;
27236 while (pos >= 0)
27238 if (is_load_insn (ready[pos], &load_mem))
27240 /* Found a load. Move it to the head of the ready list,
27241 and adjust it's priority so that it is more likely to
27242 stay there */
27243 tmp = ready[pos];
27244 for (i=pos; i<*pn_ready-1; i++)
27245 ready[i] = ready[i + 1];
27246 ready[*pn_ready-1] = tmp;
27248 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27249 INSN_PRIORITY (tmp)++;
27250 break;
27252 pos--;
27255 else if (load_store_pendulum == -2)
27257 /* Two stores have been issued in this cycle. Increase the
27258 priority of the first load in the ready list to favor it for
27259 issuing in the next cycle. */
27260 pos = *pn_ready-1;
27262 while (pos >= 0)
27264 if (is_load_insn (ready[pos], &load_mem)
27265 && !sel_sched_p ()
27266 && INSN_PRIORITY_KNOWN (ready[pos]))
27268 INSN_PRIORITY (ready[pos])++;
27270 /* Adjust the pendulum to account for the fact that a load
27271 was found and increased in priority. This is to prevent
27272 increasing the priority of multiple loads */
27273 load_store_pendulum--;
27275 break;
27277 pos--;
27280 else if (load_store_pendulum == -1)
27282 /* A store has been issued in this cycle. Scan the ready list for
27283 another store to issue with it, preferring a store to an adjacent
27284 memory location */
27285 int first_store_pos = -1;
27287 pos = *pn_ready-1;
27289 while (pos >= 0)
27291 if (is_store_insn (ready[pos], &str_mem))
27293 rtx str_mem2;
27294 /* Maintain the index of the first store found on the
27295 list */
27296 if (first_store_pos == -1)
27297 first_store_pos = pos;
27299 if (is_store_insn (last_scheduled_insn, &str_mem2)
27300 && adjacent_mem_locations (str_mem, str_mem2))
27302 /* Found an adjacent store. Move it to the head of the
27303 ready list, and adjust it's priority so that it is
27304 more likely to stay there */
27305 tmp = ready[pos];
27306 for (i=pos; i<*pn_ready-1; i++)
27307 ready[i] = ready[i + 1];
27308 ready[*pn_ready-1] = tmp;
27310 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27311 INSN_PRIORITY (tmp)++;
27313 first_store_pos = -1;
27315 break;
27318 pos--;
27321 if (first_store_pos >= 0)
27323 /* An adjacent store wasn't found, but a non-adjacent store was,
27324 so move the non-adjacent store to the front of the ready
27325 list, and adjust its priority so that it is more likely to
27326 stay there. */
27327 tmp = ready[first_store_pos];
27328 for (i=first_store_pos; i<*pn_ready-1; i++)
27329 ready[i] = ready[i + 1];
27330 ready[*pn_ready-1] = tmp;
27331 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27332 INSN_PRIORITY (tmp)++;
27335 else if (load_store_pendulum == 2)
27337 /* Two loads have been issued in this cycle. Increase the priority
27338 of the first store in the ready list to favor it for issuing in
27339 the next cycle. */
27340 pos = *pn_ready-1;
27342 while (pos >= 0)
27344 if (is_store_insn (ready[pos], &str_mem)
27345 && !sel_sched_p ()
27346 && INSN_PRIORITY_KNOWN (ready[pos]))
27348 INSN_PRIORITY (ready[pos])++;
27350 /* Adjust the pendulum to account for the fact that a store
27351 was found and increased in priority. This is to prevent
27352 increasing the priority of multiple stores */
27353 load_store_pendulum++;
27355 break;
27357 pos--;
27362 return cached_can_issue_more;
27365 /* Return whether the presence of INSN causes a dispatch group termination
27366 of group WHICH_GROUP.
27368 If WHICH_GROUP == current_group, this function will return true if INSN
27369 causes the termination of the current group (i.e, the dispatch group to
27370 which INSN belongs). This means that INSN will be the last insn in the
27371 group it belongs to.
27373 If WHICH_GROUP == previous_group, this function will return true if INSN
27374 causes the termination of the previous group (i.e, the dispatch group that
27375 precedes the group to which INSN belongs). This means that INSN will be
27376 the first insn in the group it belongs to). */
27378 static bool
27379 insn_terminates_group_p (rtx insn, enum group_termination which_group)
27381 bool first, last;
27383 if (! insn)
27384 return false;
27386 first = insn_must_be_first_in_group (insn);
27387 last = insn_must_be_last_in_group (insn);
27389 if (first && last)
27390 return true;
27392 if (which_group == current_group)
27393 return last;
27394 else if (which_group == previous_group)
27395 return first;
27397 return false;
27401 static bool
27402 insn_must_be_first_in_group (rtx insn)
27404 enum attr_type type;
27406 if (!insn
27407 || NOTE_P (insn)
27408 || DEBUG_INSN_P (insn)
27409 || GET_CODE (PATTERN (insn)) == USE
27410 || GET_CODE (PATTERN (insn)) == CLOBBER)
27411 return false;
27413 switch (rs6000_cpu)
27415 case PROCESSOR_POWER5:
27416 if (is_cracked_insn (insn))
27417 return true;
27418 case PROCESSOR_POWER4:
27419 if (is_microcoded_insn (insn))
27420 return true;
27422 if (!rs6000_sched_groups)
27423 return false;
27425 type = get_attr_type (insn);
27427 switch (type)
27429 case TYPE_MFCR:
27430 case TYPE_MFCRF:
27431 case TYPE_MTCR:
27432 case TYPE_DELAYED_CR:
27433 case TYPE_CR_LOGICAL:
27434 case TYPE_MTJMPR:
27435 case TYPE_MFJMPR:
27436 case TYPE_IDIV:
27437 case TYPE_LDIV:
27438 case TYPE_LOAD_L:
27439 case TYPE_STORE_C:
27440 case TYPE_ISYNC:
27441 case TYPE_SYNC:
27442 return true;
27443 default:
27444 break;
27446 break;
27447 case PROCESSOR_POWER6:
27448 type = get_attr_type (insn);
27450 switch (type)
27452 case TYPE_INSERT_DWORD:
27453 case TYPE_EXTS:
27454 case TYPE_CNTLZ:
27455 case TYPE_SHIFT:
27456 case TYPE_VAR_SHIFT_ROTATE:
27457 case TYPE_TRAP:
27458 case TYPE_IMUL:
27459 case TYPE_IMUL2:
27460 case TYPE_IMUL3:
27461 case TYPE_LMUL:
27462 case TYPE_IDIV:
27463 case TYPE_INSERT_WORD:
27464 case TYPE_DELAYED_COMPARE:
27465 case TYPE_IMUL_COMPARE:
27466 case TYPE_LMUL_COMPARE:
27467 case TYPE_FPCOMPARE:
27468 case TYPE_MFCR:
27469 case TYPE_MTCR:
27470 case TYPE_MFJMPR:
27471 case TYPE_MTJMPR:
27472 case TYPE_ISYNC:
27473 case TYPE_SYNC:
27474 case TYPE_LOAD_L:
27475 case TYPE_STORE_C:
27476 case TYPE_LOAD_U:
27477 case TYPE_LOAD_UX:
27478 case TYPE_LOAD_EXT_UX:
27479 case TYPE_STORE_U:
27480 case TYPE_STORE_UX:
27481 case TYPE_FPLOAD_U:
27482 case TYPE_FPLOAD_UX:
27483 case TYPE_FPSTORE_U:
27484 case TYPE_FPSTORE_UX:
27485 return true;
27486 default:
27487 break;
27489 break;
27490 case PROCESSOR_POWER7:
27491 type = get_attr_type (insn);
27493 switch (type)
27495 case TYPE_CR_LOGICAL:
27496 case TYPE_MFCR:
27497 case TYPE_MFCRF:
27498 case TYPE_MTCR:
27499 case TYPE_IDIV:
27500 case TYPE_LDIV:
27501 case TYPE_COMPARE:
27502 case TYPE_DELAYED_COMPARE:
27503 case TYPE_VAR_DELAYED_COMPARE:
27504 case TYPE_ISYNC:
27505 case TYPE_LOAD_L:
27506 case TYPE_STORE_C:
27507 case TYPE_LOAD_U:
27508 case TYPE_LOAD_UX:
27509 case TYPE_LOAD_EXT:
27510 case TYPE_LOAD_EXT_U:
27511 case TYPE_LOAD_EXT_UX:
27512 case TYPE_STORE_U:
27513 case TYPE_STORE_UX:
27514 case TYPE_FPLOAD_U:
27515 case TYPE_FPLOAD_UX:
27516 case TYPE_FPSTORE_U:
27517 case TYPE_FPSTORE_UX:
27518 case TYPE_MFJMPR:
27519 case TYPE_MTJMPR:
27520 return true;
27521 default:
27522 break;
27524 break;
27525 case PROCESSOR_POWER8:
27526 type = get_attr_type (insn);
27528 switch (type)
27530 case TYPE_CR_LOGICAL:
27531 case TYPE_DELAYED_CR:
27532 case TYPE_MFCR:
27533 case TYPE_MFCRF:
27534 case TYPE_MTCR:
27535 case TYPE_COMPARE:
27536 case TYPE_DELAYED_COMPARE:
27537 case TYPE_VAR_DELAYED_COMPARE:
27538 case TYPE_IMUL_COMPARE:
27539 case TYPE_LMUL_COMPARE:
27540 case TYPE_SYNC:
27541 case TYPE_ISYNC:
27542 case TYPE_LOAD_L:
27543 case TYPE_STORE_C:
27544 case TYPE_LOAD_U:
27545 case TYPE_LOAD_UX:
27546 case TYPE_LOAD_EXT:
27547 case TYPE_LOAD_EXT_U:
27548 case TYPE_LOAD_EXT_UX:
27549 case TYPE_STORE_UX:
27550 case TYPE_VECSTORE:
27551 case TYPE_MFJMPR:
27552 case TYPE_MTJMPR:
27553 return true;
27554 default:
27555 break;
27557 break;
27558 default:
27559 break;
27562 return false;
27565 static bool
27566 insn_must_be_last_in_group (rtx insn)
27568 enum attr_type type;
27570 if (!insn
27571 || NOTE_P (insn)
27572 || DEBUG_INSN_P (insn)
27573 || GET_CODE (PATTERN (insn)) == USE
27574 || GET_CODE (PATTERN (insn)) == CLOBBER)
27575 return false;
27577 switch (rs6000_cpu) {
27578 case PROCESSOR_POWER4:
27579 case PROCESSOR_POWER5:
27580 if (is_microcoded_insn (insn))
27581 return true;
27583 if (is_branch_slot_insn (insn))
27584 return true;
27586 break;
27587 case PROCESSOR_POWER6:
27588 type = get_attr_type (insn);
27590 switch (type)
27592 case TYPE_EXTS:
27593 case TYPE_CNTLZ:
27594 case TYPE_SHIFT:
27595 case TYPE_VAR_SHIFT_ROTATE:
27596 case TYPE_TRAP:
27597 case TYPE_IMUL:
27598 case TYPE_IMUL2:
27599 case TYPE_IMUL3:
27600 case TYPE_LMUL:
27601 case TYPE_IDIV:
27602 case TYPE_DELAYED_COMPARE:
27603 case TYPE_IMUL_COMPARE:
27604 case TYPE_LMUL_COMPARE:
27605 case TYPE_FPCOMPARE:
27606 case TYPE_MFCR:
27607 case TYPE_MTCR:
27608 case TYPE_MFJMPR:
27609 case TYPE_MTJMPR:
27610 case TYPE_ISYNC:
27611 case TYPE_SYNC:
27612 case TYPE_LOAD_L:
27613 case TYPE_STORE_C:
27614 return true;
27615 default:
27616 break;
27618 break;
27619 case PROCESSOR_POWER7:
27620 type = get_attr_type (insn);
27622 switch (type)
27624 case TYPE_ISYNC:
27625 case TYPE_SYNC:
27626 case TYPE_LOAD_L:
27627 case TYPE_STORE_C:
27628 case TYPE_LOAD_EXT_U:
27629 case TYPE_LOAD_EXT_UX:
27630 case TYPE_STORE_UX:
27631 return true;
27632 default:
27633 break;
27635 break;
27636 case PROCESSOR_POWER8:
27637 type = get_attr_type (insn);
27639 switch (type)
27641 case TYPE_MFCR:
27642 case TYPE_MTCR:
27643 case TYPE_ISYNC:
27644 case TYPE_SYNC:
27645 case TYPE_LOAD_L:
27646 case TYPE_STORE_C:
27647 case TYPE_LOAD_EXT_U:
27648 case TYPE_LOAD_EXT_UX:
27649 case TYPE_STORE_UX:
27650 return true;
27651 default:
27652 break;
27654 break;
27655 default:
27656 break;
27659 return false;
27662 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
27663 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
27665 static bool
27666 is_costly_group (rtx *group_insns, rtx next_insn)
27668 int i;
27669 int issue_rate = rs6000_issue_rate ();
27671 for (i = 0; i < issue_rate; i++)
27673 sd_iterator_def sd_it;
27674 dep_t dep;
27675 rtx insn = group_insns[i];
27677 if (!insn)
27678 continue;
27680 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
27682 rtx next = DEP_CON (dep);
27684 if (next == next_insn
27685 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
27686 return true;
27690 return false;
27693 /* Utility of the function redefine_groups.
27694 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
27695 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
27696 to keep it "far" (in a separate group) from GROUP_INSNS, following
27697 one of the following schemes, depending on the value of the flag
27698 -minsert_sched_nops = X:
27699 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
27700 in order to force NEXT_INSN into a separate group.
27701 (2) X < sched_finish_regroup_exact: insert exactly X nops.
27702 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
27703 insertion (has a group just ended, how many vacant issue slots remain in the
27704 last group, and how many dispatch groups were encountered so far). */
27706 static int
27707 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
27708 rtx next_insn, bool *group_end, int can_issue_more,
27709 int *group_count)
27711 rtx nop;
27712 bool force;
27713 int issue_rate = rs6000_issue_rate ();
27714 bool end = *group_end;
27715 int i;
27717 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
27718 return can_issue_more;
27720 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
27721 return can_issue_more;
27723 force = is_costly_group (group_insns, next_insn);
27724 if (!force)
27725 return can_issue_more;
27727 if (sched_verbose > 6)
27728 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
27729 *group_count ,can_issue_more);
27731 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
27733 if (*group_end)
27734 can_issue_more = 0;
27736 /* Since only a branch can be issued in the last issue_slot, it is
27737 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
27738 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
27739 in this case the last nop will start a new group and the branch
27740 will be forced to the new group. */
27741 if (can_issue_more && !is_branch_slot_insn (next_insn))
27742 can_issue_more--;
27744 /* Do we have a special group ending nop? */
27745 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
27746 || rs6000_cpu_attr == CPU_POWER8)
27748 nop = gen_group_ending_nop ();
27749 emit_insn_before (nop, next_insn);
27750 can_issue_more = 0;
27752 else
27753 while (can_issue_more > 0)
27755 nop = gen_nop ();
27756 emit_insn_before (nop, next_insn);
27757 can_issue_more--;
27760 *group_end = true;
27761 return 0;
27764 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
27766 int n_nops = rs6000_sched_insert_nops;
27768 /* Nops can't be issued from the branch slot, so the effective
27769 issue_rate for nops is 'issue_rate - 1'. */
27770 if (can_issue_more == 0)
27771 can_issue_more = issue_rate;
27772 can_issue_more--;
27773 if (can_issue_more == 0)
27775 can_issue_more = issue_rate - 1;
27776 (*group_count)++;
27777 end = true;
27778 for (i = 0; i < issue_rate; i++)
27780 group_insns[i] = 0;
27784 while (n_nops > 0)
27786 nop = gen_nop ();
27787 emit_insn_before (nop, next_insn);
27788 if (can_issue_more == issue_rate - 1) /* new group begins */
27789 end = false;
27790 can_issue_more--;
27791 if (can_issue_more == 0)
27793 can_issue_more = issue_rate - 1;
27794 (*group_count)++;
27795 end = true;
27796 for (i = 0; i < issue_rate; i++)
27798 group_insns[i] = 0;
27801 n_nops--;
27804 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
27805 can_issue_more++;
27807 /* Is next_insn going to start a new group? */
27808 *group_end
27809 = (end
27810 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
27811 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
27812 || (can_issue_more < issue_rate &&
27813 insn_terminates_group_p (next_insn, previous_group)));
27814 if (*group_end && end)
27815 (*group_count)--;
27817 if (sched_verbose > 6)
27818 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
27819 *group_count, can_issue_more);
27820 return can_issue_more;
27823 return can_issue_more;
27826 /* This function tries to synch the dispatch groups that the compiler "sees"
27827 with the dispatch groups that the processor dispatcher is expected to
27828 form in practice. It tries to achieve this synchronization by forcing the
27829 estimated processor grouping on the compiler (as opposed to the function
27830 'pad_goups' which tries to force the scheduler's grouping on the processor).
27832 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
27833 examines the (estimated) dispatch groups that will be formed by the processor
27834 dispatcher. It marks these group boundaries to reflect the estimated
27835 processor grouping, overriding the grouping that the scheduler had marked.
27836 Depending on the value of the flag '-minsert-sched-nops' this function can
27837 force certain insns into separate groups or force a certain distance between
27838 them by inserting nops, for example, if there exists a "costly dependence"
27839 between the insns.
27841 The function estimates the group boundaries that the processor will form as
27842 follows: It keeps track of how many vacant issue slots are available after
27843 each insn. A subsequent insn will start a new group if one of the following
27844 4 cases applies:
27845 - no more vacant issue slots remain in the current dispatch group.
27846 - only the last issue slot, which is the branch slot, is vacant, but the next
27847 insn is not a branch.
27848 - only the last 2 or less issue slots, including the branch slot, are vacant,
27849 which means that a cracked insn (which occupies two issue slots) can't be
27850 issued in this group.
27851 - less than 'issue_rate' slots are vacant, and the next insn always needs to
27852 start a new group. */
27854 static int
27855 redefine_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
27857 rtx insn, next_insn;
27858 int issue_rate;
27859 int can_issue_more;
27860 int slot, i;
27861 bool group_end;
27862 int group_count = 0;
27863 rtx *group_insns;
27865 /* Initialize. */
27866 issue_rate = rs6000_issue_rate ();
27867 group_insns = XALLOCAVEC (rtx, issue_rate);
27868 for (i = 0; i < issue_rate; i++)
27870 group_insns[i] = 0;
27872 can_issue_more = issue_rate;
27873 slot = 0;
27874 insn = get_next_active_insn (prev_head_insn, tail);
27875 group_end = false;
27877 while (insn != NULL_RTX)
27879 slot = (issue_rate - can_issue_more);
27880 group_insns[slot] = insn;
27881 can_issue_more =
27882 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
27883 if (insn_terminates_group_p (insn, current_group))
27884 can_issue_more = 0;
27886 next_insn = get_next_active_insn (insn, tail);
27887 if (next_insn == NULL_RTX)
27888 return group_count + 1;
27890 /* Is next_insn going to start a new group? */
27891 group_end
27892 = (can_issue_more == 0
27893 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
27894 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
27895 || (can_issue_more < issue_rate &&
27896 insn_terminates_group_p (next_insn, previous_group)));
27898 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
27899 next_insn, &group_end, can_issue_more,
27900 &group_count);
27902 if (group_end)
27904 group_count++;
27905 can_issue_more = 0;
27906 for (i = 0; i < issue_rate; i++)
27908 group_insns[i] = 0;
27912 if (GET_MODE (next_insn) == TImode && can_issue_more)
27913 PUT_MODE (next_insn, VOIDmode);
27914 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
27915 PUT_MODE (next_insn, TImode);
27917 insn = next_insn;
27918 if (can_issue_more == 0)
27919 can_issue_more = issue_rate;
27920 } /* while */
27922 return group_count;
27925 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
27926 dispatch group boundaries that the scheduler had marked. Pad with nops
27927 any dispatch groups which have vacant issue slots, in order to force the
27928 scheduler's grouping on the processor dispatcher. The function
27929 returns the number of dispatch groups found. */
27931 static int
27932 pad_groups (FILE *dump, int sched_verbose, rtx prev_head_insn, rtx tail)
27934 rtx insn, next_insn;
27935 rtx nop;
27936 int issue_rate;
27937 int can_issue_more;
27938 int group_end;
27939 int group_count = 0;
27941 /* Initialize issue_rate. */
27942 issue_rate = rs6000_issue_rate ();
27943 can_issue_more = issue_rate;
27945 insn = get_next_active_insn (prev_head_insn, tail);
27946 next_insn = get_next_active_insn (insn, tail);
27948 while (insn != NULL_RTX)
27950 can_issue_more =
27951 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
27953 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
27955 if (next_insn == NULL_RTX)
27956 break;
27958 if (group_end)
27960 /* If the scheduler had marked group termination at this location
27961 (between insn and next_insn), and neither insn nor next_insn will
27962 force group termination, pad the group with nops to force group
27963 termination. */
27964 if (can_issue_more
27965 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
27966 && !insn_terminates_group_p (insn, current_group)
27967 && !insn_terminates_group_p (next_insn, previous_group))
27969 if (!is_branch_slot_insn (next_insn))
27970 can_issue_more--;
27972 while (can_issue_more)
27974 nop = gen_nop ();
27975 emit_insn_before (nop, next_insn);
27976 can_issue_more--;
27980 can_issue_more = issue_rate;
27981 group_count++;
27984 insn = next_insn;
27985 next_insn = get_next_active_insn (insn, tail);
27988 return group_count;
27991 /* We're beginning a new block. Initialize data structures as necessary. */
27993 static void
27994 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
27995 int sched_verbose ATTRIBUTE_UNUSED,
27996 int max_ready ATTRIBUTE_UNUSED)
27998 last_scheduled_insn = NULL_RTX;
27999 load_store_pendulum = 0;
28002 /* The following function is called at the end of scheduling BB.
28003 After reload, it inserts nops at insn group bundling. */
28005 static void
28006 rs6000_sched_finish (FILE *dump, int sched_verbose)
28008 int n_groups;
28010 if (sched_verbose)
28011 fprintf (dump, "=== Finishing schedule.\n");
28013 if (reload_completed && rs6000_sched_groups)
28015 /* Do not run sched_finish hook when selective scheduling enabled. */
28016 if (sel_sched_p ())
28017 return;
28019 if (rs6000_sched_insert_nops == sched_finish_none)
28020 return;
28022 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28023 n_groups = pad_groups (dump, sched_verbose,
28024 current_sched_info->prev_head,
28025 current_sched_info->next_tail);
28026 else
28027 n_groups = redefine_groups (dump, sched_verbose,
28028 current_sched_info->prev_head,
28029 current_sched_info->next_tail);
28031 if (sched_verbose >= 6)
28033 fprintf (dump, "ngroups = %d\n", n_groups);
28034 print_rtl (dump, current_sched_info->prev_head);
28035 fprintf (dump, "Done finish_sched\n");
28040 struct _rs6000_sched_context
28042 short cached_can_issue_more;
28043 rtx last_scheduled_insn;
28044 int load_store_pendulum;
28047 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28048 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28050 /* Allocate store for new scheduling context. */
28051 static void *
28052 rs6000_alloc_sched_context (void)
28054 return xmalloc (sizeof (rs6000_sched_context_def));
28057 /* If CLEAN_P is true then initializes _SC with clean data,
28058 and from the global context otherwise. */
28059 static void
28060 rs6000_init_sched_context (void *_sc, bool clean_p)
28062 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28064 if (clean_p)
28066 sc->cached_can_issue_more = 0;
28067 sc->last_scheduled_insn = NULL_RTX;
28068 sc->load_store_pendulum = 0;
28070 else
28072 sc->cached_can_issue_more = cached_can_issue_more;
28073 sc->last_scheduled_insn = last_scheduled_insn;
28074 sc->load_store_pendulum = load_store_pendulum;
28078 /* Sets the global scheduling context to the one pointed to by _SC. */
28079 static void
28080 rs6000_set_sched_context (void *_sc)
28082 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28084 gcc_assert (sc != NULL);
28086 cached_can_issue_more = sc->cached_can_issue_more;
28087 last_scheduled_insn = sc->last_scheduled_insn;
28088 load_store_pendulum = sc->load_store_pendulum;
28091 /* Free _SC. */
28092 static void
28093 rs6000_free_sched_context (void *_sc)
28095 gcc_assert (_sc != NULL);
28097 free (_sc);
28101 /* Length in units of the trampoline for entering a nested function. */
28104 rs6000_trampoline_size (void)
28106 int ret = 0;
28108 switch (DEFAULT_ABI)
28110 default:
28111 gcc_unreachable ();
28113 case ABI_AIX:
28114 ret = (TARGET_32BIT) ? 12 : 24;
28115 break;
28117 case ABI_ELFv2:
28118 gcc_assert (!TARGET_32BIT);
28119 ret = 32;
28120 break;
28122 case ABI_DARWIN:
28123 case ABI_V4:
28124 ret = (TARGET_32BIT) ? 40 : 48;
28125 break;
28128 return ret;
28131 /* Emit RTL insns to initialize the variable parts of a trampoline.
28132 FNADDR is an RTX for the address of the function's pure code.
28133 CXT is an RTX for the static chain value for the function. */
28135 static void
28136 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28138 int regsize = (TARGET_32BIT) ? 4 : 8;
28139 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28140 rtx ctx_reg = force_reg (Pmode, cxt);
28141 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28143 switch (DEFAULT_ABI)
28145 default:
28146 gcc_unreachable ();
28148 /* Under AIX, just build the 3 word function descriptor */
28149 case ABI_AIX:
28151 rtx fnmem, fn_reg, toc_reg;
28153 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28154 error ("You cannot take the address of a nested function if you use "
28155 "the -mno-pointers-to-nested-functions option.");
28157 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28158 fn_reg = gen_reg_rtx (Pmode);
28159 toc_reg = gen_reg_rtx (Pmode);
28161 /* Macro to shorten the code expansions below. */
28162 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28164 m_tramp = replace_equiv_address (m_tramp, addr);
28166 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28167 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28168 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28169 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28170 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28172 # undef MEM_PLUS
28174 break;
28176 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28177 case ABI_ELFv2:
28178 case ABI_DARWIN:
28179 case ABI_V4:
28180 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28181 LCT_NORMAL, VOIDmode, 4,
28182 addr, Pmode,
28183 GEN_INT (rs6000_trampoline_size ()), SImode,
28184 fnaddr, Pmode,
28185 ctx_reg, Pmode);
28186 break;
28191 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28192 identifier as an argument, so the front end shouldn't look it up. */
28194 static bool
28195 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28197 return is_attribute_p ("altivec", attr_id);
28200 /* Handle the "altivec" attribute. The attribute may have
28201 arguments as follows:
28203 __attribute__((altivec(vector__)))
28204 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28205 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28207 and may appear more than once (e.g., 'vector bool char') in a
28208 given declaration. */
28210 static tree
28211 rs6000_handle_altivec_attribute (tree *node,
28212 tree name ATTRIBUTE_UNUSED,
28213 tree args,
28214 int flags ATTRIBUTE_UNUSED,
28215 bool *no_add_attrs)
28217 tree type = *node, result = NULL_TREE;
28218 enum machine_mode mode;
28219 int unsigned_p;
28220 char altivec_type
28221 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28222 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28223 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28224 : '?');
28226 while (POINTER_TYPE_P (type)
28227 || TREE_CODE (type) == FUNCTION_TYPE
28228 || TREE_CODE (type) == METHOD_TYPE
28229 || TREE_CODE (type) == ARRAY_TYPE)
28230 type = TREE_TYPE (type);
28232 mode = TYPE_MODE (type);
28234 /* Check for invalid AltiVec type qualifiers. */
28235 if (type == long_double_type_node)
28236 error ("use of %<long double%> in AltiVec types is invalid");
28237 else if (type == boolean_type_node)
28238 error ("use of boolean types in AltiVec types is invalid");
28239 else if (TREE_CODE (type) == COMPLEX_TYPE)
28240 error ("use of %<complex%> in AltiVec types is invalid");
28241 else if (DECIMAL_FLOAT_MODE_P (mode))
28242 error ("use of decimal floating point types in AltiVec types is invalid");
28243 else if (!TARGET_VSX)
28245 if (type == long_unsigned_type_node || type == long_integer_type_node)
28247 if (TARGET_64BIT)
28248 error ("use of %<long%> in AltiVec types is invalid for "
28249 "64-bit code without -mvsx");
28250 else if (rs6000_warn_altivec_long)
28251 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28252 "use %<int%>");
28254 else if (type == long_long_unsigned_type_node
28255 || type == long_long_integer_type_node)
28256 error ("use of %<long long%> in AltiVec types is invalid without "
28257 "-mvsx");
28258 else if (type == double_type_node)
28259 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28262 switch (altivec_type)
28264 case 'v':
28265 unsigned_p = TYPE_UNSIGNED (type);
28266 switch (mode)
28268 case TImode:
28269 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28270 break;
28271 case DImode:
28272 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28273 break;
28274 case SImode:
28275 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28276 break;
28277 case HImode:
28278 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28279 break;
28280 case QImode:
28281 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28282 break;
28283 case SFmode: result = V4SF_type_node; break;
28284 case DFmode: result = V2DF_type_node; break;
28285 /* If the user says 'vector int bool', we may be handed the 'bool'
28286 attribute _before_ the 'vector' attribute, and so select the
28287 proper type in the 'b' case below. */
28288 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28289 case V2DImode: case V2DFmode:
28290 result = type;
28291 default: break;
28293 break;
28294 case 'b':
28295 switch (mode)
28297 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28298 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28299 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28300 case QImode: case V16QImode: result = bool_V16QI_type_node;
28301 default: break;
28303 break;
28304 case 'p':
28305 switch (mode)
28307 case V8HImode: result = pixel_V8HI_type_node;
28308 default: break;
28310 default: break;
28313 /* Propagate qualifiers attached to the element type
28314 onto the vector type. */
28315 if (result && result != type && TYPE_QUALS (type))
28316 result = build_qualified_type (result, TYPE_QUALS (type));
28318 *no_add_attrs = true; /* No need to hang on to the attribute. */
28320 if (result)
28321 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28323 return NULL_TREE;
28326 /* AltiVec defines four built-in scalar types that serve as vector
28327 elements; we must teach the compiler how to mangle them. */
28329 static const char *
28330 rs6000_mangle_type (const_tree type)
28332 type = TYPE_MAIN_VARIANT (type);
28334 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28335 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28336 return NULL;
28338 if (type == bool_char_type_node) return "U6__boolc";
28339 if (type == bool_short_type_node) return "U6__bools";
28340 if (type == pixel_type_node) return "u7__pixel";
28341 if (type == bool_int_type_node) return "U6__booli";
28342 if (type == bool_long_type_node) return "U6__booll";
28344 /* Mangle IBM extended float long double as `g' (__float128) on
28345 powerpc*-linux where long-double-64 previously was the default. */
28346 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28347 && TARGET_ELF
28348 && TARGET_LONG_DOUBLE_128
28349 && !TARGET_IEEEQUAD)
28350 return "g";
28352 /* For all other types, use normal C++ mangling. */
28353 return NULL;
28356 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28357 struct attribute_spec.handler. */
28359 static tree
28360 rs6000_handle_longcall_attribute (tree *node, tree name,
28361 tree args ATTRIBUTE_UNUSED,
28362 int flags ATTRIBUTE_UNUSED,
28363 bool *no_add_attrs)
28365 if (TREE_CODE (*node) != FUNCTION_TYPE
28366 && TREE_CODE (*node) != FIELD_DECL
28367 && TREE_CODE (*node) != TYPE_DECL)
28369 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28370 name);
28371 *no_add_attrs = true;
28374 return NULL_TREE;
28377 /* Set longcall attributes on all functions declared when
28378 rs6000_default_long_calls is true. */
28379 static void
28380 rs6000_set_default_type_attributes (tree type)
28382 if (rs6000_default_long_calls
28383 && (TREE_CODE (type) == FUNCTION_TYPE
28384 || TREE_CODE (type) == METHOD_TYPE))
28385 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28386 NULL_TREE,
28387 TYPE_ATTRIBUTES (type));
28389 #if TARGET_MACHO
28390 darwin_set_default_type_attributes (type);
28391 #endif
28394 /* Return a reference suitable for calling a function with the
28395 longcall attribute. */
28398 rs6000_longcall_ref (rtx call_ref)
28400 const char *call_name;
28401 tree node;
28403 if (GET_CODE (call_ref) != SYMBOL_REF)
28404 return call_ref;
28406 /* System V adds '.' to the internal name, so skip them. */
28407 call_name = XSTR (call_ref, 0);
28408 if (*call_name == '.')
28410 while (*call_name == '.')
28411 call_name++;
28413 node = get_identifier (call_name);
28414 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28417 return force_reg (Pmode, call_ref);
28420 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28421 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28422 #endif
28424 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28425 struct attribute_spec.handler. */
28426 static tree
28427 rs6000_handle_struct_attribute (tree *node, tree name,
28428 tree args ATTRIBUTE_UNUSED,
28429 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28431 tree *type = NULL;
28432 if (DECL_P (*node))
28434 if (TREE_CODE (*node) == TYPE_DECL)
28435 type = &TREE_TYPE (*node);
28437 else
28438 type = node;
28440 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28441 || TREE_CODE (*type) == UNION_TYPE)))
28443 warning (OPT_Wattributes, "%qE attribute ignored", name);
28444 *no_add_attrs = true;
28447 else if ((is_attribute_p ("ms_struct", name)
28448 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28449 || ((is_attribute_p ("gcc_struct", name)
28450 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28452 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28453 name);
28454 *no_add_attrs = true;
28457 return NULL_TREE;
28460 static bool
28461 rs6000_ms_bitfield_layout_p (const_tree record_type)
28463 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28464 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28465 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28468 #ifdef USING_ELFOS_H
28470 /* A get_unnamed_section callback, used for switching to toc_section. */
28472 static void
28473 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28475 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28476 && TARGET_MINIMAL_TOC
28477 && !TARGET_RELOCATABLE)
28479 if (!toc_initialized)
28481 toc_initialized = 1;
28482 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28483 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28484 fprintf (asm_out_file, "\t.tc ");
28485 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28486 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28487 fprintf (asm_out_file, "\n");
28489 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28490 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28491 fprintf (asm_out_file, " = .+32768\n");
28493 else
28494 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28496 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28497 && !TARGET_RELOCATABLE)
28498 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28499 else
28501 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28502 if (!toc_initialized)
28504 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28505 fprintf (asm_out_file, " = .+32768\n");
28506 toc_initialized = 1;
28511 /* Implement TARGET_ASM_INIT_SECTIONS. */
28513 static void
28514 rs6000_elf_asm_init_sections (void)
28516 toc_section
28517 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
28519 sdata2_section
28520 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
28521 SDATA2_SECTION_ASM_OP);
28524 /* Implement TARGET_SELECT_RTX_SECTION. */
28526 static section *
28527 rs6000_elf_select_rtx_section (enum machine_mode mode, rtx x,
28528 unsigned HOST_WIDE_INT align)
28530 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28531 return toc_section;
28532 else
28533 return default_elf_select_rtx_section (mode, x, align);
28536 /* For a SYMBOL_REF, set generic flags and then perform some
28537 target-specific processing.
28539 When the AIX ABI is requested on a non-AIX system, replace the
28540 function name with the real name (with a leading .) rather than the
28541 function descriptor name. This saves a lot of overriding code to
28542 read the prefixes. */
28544 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
28545 static void
28546 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
28548 default_encode_section_info (decl, rtl, first);
28550 if (first
28551 && TREE_CODE (decl) == FUNCTION_DECL
28552 && !TARGET_AIX
28553 && DEFAULT_ABI == ABI_AIX)
28555 rtx sym_ref = XEXP (rtl, 0);
28556 size_t len = strlen (XSTR (sym_ref, 0));
28557 char *str = XALLOCAVEC (char, len + 2);
28558 str[0] = '.';
28559 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
28560 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
28564 static inline bool
28565 compare_section_name (const char *section, const char *templ)
28567 int len;
28569 len = strlen (templ);
28570 return (strncmp (section, templ, len) == 0
28571 && (section[len] == 0 || section[len] == '.'));
28574 bool
28575 rs6000_elf_in_small_data_p (const_tree decl)
28577 if (rs6000_sdata == SDATA_NONE)
28578 return false;
28580 /* We want to merge strings, so we never consider them small data. */
28581 if (TREE_CODE (decl) == STRING_CST)
28582 return false;
28584 /* Functions are never in the small data area. */
28585 if (TREE_CODE (decl) == FUNCTION_DECL)
28586 return false;
28588 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
28590 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (decl));
28591 if (compare_section_name (section, ".sdata")
28592 || compare_section_name (section, ".sdata2")
28593 || compare_section_name (section, ".gnu.linkonce.s")
28594 || compare_section_name (section, ".sbss")
28595 || compare_section_name (section, ".sbss2")
28596 || compare_section_name (section, ".gnu.linkonce.sb")
28597 || strcmp (section, ".PPC.EMB.sdata0") == 0
28598 || strcmp (section, ".PPC.EMB.sbss0") == 0)
28599 return true;
28601 else
28603 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
28605 if (size > 0
28606 && size <= g_switch_value
28607 /* If it's not public, and we're not going to reference it there,
28608 there's no need to put it in the small data section. */
28609 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
28610 return true;
28613 return false;
28616 #endif /* USING_ELFOS_H */
28618 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
28620 static bool
28621 rs6000_use_blocks_for_constant_p (enum machine_mode mode, const_rtx x)
28623 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
28626 /* Do not place thread-local symbols refs in the object blocks. */
28628 static bool
28629 rs6000_use_blocks_for_decl_p (const_tree decl)
28631 return !DECL_THREAD_LOCAL_P (decl);
28634 /* Return a REG that occurs in ADDR with coefficient 1.
28635 ADDR can be effectively incremented by incrementing REG.
28637 r0 is special and we must not select it as an address
28638 register by this routine since our caller will try to
28639 increment the returned register via an "la" instruction. */
28642 find_addr_reg (rtx addr)
28644 while (GET_CODE (addr) == PLUS)
28646 if (GET_CODE (XEXP (addr, 0)) == REG
28647 && REGNO (XEXP (addr, 0)) != 0)
28648 addr = XEXP (addr, 0);
28649 else if (GET_CODE (XEXP (addr, 1)) == REG
28650 && REGNO (XEXP (addr, 1)) != 0)
28651 addr = XEXP (addr, 1);
28652 else if (CONSTANT_P (XEXP (addr, 0)))
28653 addr = XEXP (addr, 1);
28654 else if (CONSTANT_P (XEXP (addr, 1)))
28655 addr = XEXP (addr, 0);
28656 else
28657 gcc_unreachable ();
28659 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
28660 return addr;
28663 void
28664 rs6000_fatal_bad_address (rtx op)
28666 fatal_insn ("bad address", op);
28669 #if TARGET_MACHO
28671 typedef struct branch_island_d {
28672 tree function_name;
28673 tree label_name;
28674 int line_number;
28675 } branch_island;
28678 static vec<branch_island, va_gc> *branch_islands;
28680 /* Remember to generate a branch island for far calls to the given
28681 function. */
28683 static void
28684 add_compiler_branch_island (tree label_name, tree function_name,
28685 int line_number)
28687 branch_island bi = {function_name, label_name, line_number};
28688 vec_safe_push (branch_islands, bi);
28691 /* Generate far-jump branch islands for everything recorded in
28692 branch_islands. Invoked immediately after the last instruction of
28693 the epilogue has been emitted; the branch islands must be appended
28694 to, and contiguous with, the function body. Mach-O stubs are
28695 generated in machopic_output_stub(). */
28697 static void
28698 macho_branch_islands (void)
28700 char tmp_buf[512];
28702 while (!vec_safe_is_empty (branch_islands))
28704 branch_island *bi = &branch_islands->last ();
28705 const char *label = IDENTIFIER_POINTER (bi->label_name);
28706 const char *name = IDENTIFIER_POINTER (bi->function_name);
28707 char name_buf[512];
28708 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
28709 if (name[0] == '*' || name[0] == '&')
28710 strcpy (name_buf, name+1);
28711 else
28713 name_buf[0] = '_';
28714 strcpy (name_buf+1, name);
28716 strcpy (tmp_buf, "\n");
28717 strcat (tmp_buf, label);
28718 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
28719 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28720 dbxout_stabd (N_SLINE, bi->line_number);
28721 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
28722 if (flag_pic)
28724 if (TARGET_LINK_STACK)
28726 char name[32];
28727 get_ppc476_thunk_name (name);
28728 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
28729 strcat (tmp_buf, name);
28730 strcat (tmp_buf, "\n");
28731 strcat (tmp_buf, label);
28732 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
28734 else
28736 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
28737 strcat (tmp_buf, label);
28738 strcat (tmp_buf, "_pic\n");
28739 strcat (tmp_buf, label);
28740 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
28743 strcat (tmp_buf, "\taddis r11,r11,ha16(");
28744 strcat (tmp_buf, name_buf);
28745 strcat (tmp_buf, " - ");
28746 strcat (tmp_buf, label);
28747 strcat (tmp_buf, "_pic)\n");
28749 strcat (tmp_buf, "\tmtlr r0\n");
28751 strcat (tmp_buf, "\taddi r12,r11,lo16(");
28752 strcat (tmp_buf, name_buf);
28753 strcat (tmp_buf, " - ");
28754 strcat (tmp_buf, label);
28755 strcat (tmp_buf, "_pic)\n");
28757 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
28759 else
28761 strcat (tmp_buf, ":\nlis r12,hi16(");
28762 strcat (tmp_buf, name_buf);
28763 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
28764 strcat (tmp_buf, name_buf);
28765 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
28767 output_asm_insn (tmp_buf, 0);
28768 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
28769 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28770 dbxout_stabd (N_SLINE, bi->line_number);
28771 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
28772 branch_islands->pop ();
28776 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
28777 already there or not. */
28779 static int
28780 no_previous_def (tree function_name)
28782 branch_island *bi;
28783 unsigned ix;
28785 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
28786 if (function_name == bi->function_name)
28787 return 0;
28788 return 1;
28791 /* GET_PREV_LABEL gets the label name from the previous definition of
28792 the function. */
28794 static tree
28795 get_prev_label (tree function_name)
28797 branch_island *bi;
28798 unsigned ix;
28800 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
28801 if (function_name == bi->function_name)
28802 return bi->label_name;
28803 return NULL_TREE;
28806 /* INSN is either a function call or a millicode call. It may have an
28807 unconditional jump in its delay slot.
28809 CALL_DEST is the routine we are calling. */
28811 char *
28812 output_call (rtx insn, rtx *operands, int dest_operand_number,
28813 int cookie_operand_number)
28815 static char buf[256];
28816 if (darwin_emit_branch_islands
28817 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
28818 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
28820 tree labelname;
28821 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
28823 if (no_previous_def (funname))
28825 rtx label_rtx = gen_label_rtx ();
28826 char *label_buf, temp_buf[256];
28827 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
28828 CODE_LABEL_NUMBER (label_rtx));
28829 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
28830 labelname = get_identifier (label_buf);
28831 add_compiler_branch_island (labelname, funname, insn_line (insn));
28833 else
28834 labelname = get_prev_label (funname);
28836 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
28837 instruction will reach 'foo', otherwise link as 'bl L42'".
28838 "L42" should be a 'branch island', that will do a far jump to
28839 'foo'. Branch islands are generated in
28840 macho_branch_islands(). */
28841 sprintf (buf, "jbsr %%z%d,%.246s",
28842 dest_operand_number, IDENTIFIER_POINTER (labelname));
28844 else
28845 sprintf (buf, "bl %%z%d", dest_operand_number);
28846 return buf;
28849 /* Generate PIC and indirect symbol stubs. */
28851 void
28852 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28854 unsigned int length;
28855 char *symbol_name, *lazy_ptr_name;
28856 char *local_label_0;
28857 static int label = 0;
28859 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28860 symb = (*targetm.strip_name_encoding) (symb);
28863 length = strlen (symb);
28864 symbol_name = XALLOCAVEC (char, length + 32);
28865 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28867 lazy_ptr_name = XALLOCAVEC (char, length + 32);
28868 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
28870 if (flag_pic == 2)
28871 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
28872 else
28873 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
28875 if (flag_pic == 2)
28877 fprintf (file, "\t.align 5\n");
28879 fprintf (file, "%s:\n", stub);
28880 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28882 label++;
28883 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
28884 sprintf (local_label_0, "\"L%011d$spb\"", label);
28886 fprintf (file, "\tmflr r0\n");
28887 if (TARGET_LINK_STACK)
28889 char name[32];
28890 get_ppc476_thunk_name (name);
28891 fprintf (file, "\tbl %s\n", name);
28892 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
28894 else
28896 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
28897 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
28899 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
28900 lazy_ptr_name, local_label_0);
28901 fprintf (file, "\tmtlr r0\n");
28902 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
28903 (TARGET_64BIT ? "ldu" : "lwzu"),
28904 lazy_ptr_name, local_label_0);
28905 fprintf (file, "\tmtctr r12\n");
28906 fprintf (file, "\tbctr\n");
28908 else
28910 fprintf (file, "\t.align 4\n");
28912 fprintf (file, "%s:\n", stub);
28913 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28915 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
28916 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
28917 (TARGET_64BIT ? "ldu" : "lwzu"),
28918 lazy_ptr_name);
28919 fprintf (file, "\tmtctr r12\n");
28920 fprintf (file, "\tbctr\n");
28923 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
28924 fprintf (file, "%s:\n", lazy_ptr_name);
28925 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
28926 fprintf (file, "%sdyld_stub_binding_helper\n",
28927 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
28930 /* Legitimize PIC addresses. If the address is already
28931 position-independent, we return ORIG. Newly generated
28932 position-independent addresses go into a reg. This is REG if non
28933 zero, otherwise we allocate register(s) as necessary. */
28935 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
28938 rs6000_machopic_legitimize_pic_address (rtx orig, enum machine_mode mode,
28939 rtx reg)
28941 rtx base, offset;
28943 if (reg == NULL && ! reload_in_progress && ! reload_completed)
28944 reg = gen_reg_rtx (Pmode);
28946 if (GET_CODE (orig) == CONST)
28948 rtx reg_temp;
28950 if (GET_CODE (XEXP (orig, 0)) == PLUS
28951 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
28952 return orig;
28954 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
28956 /* Use a different reg for the intermediate value, as
28957 it will be marked UNCHANGING. */
28958 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
28959 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
28960 Pmode, reg_temp);
28961 offset =
28962 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
28963 Pmode, reg);
28965 if (GET_CODE (offset) == CONST_INT)
28967 if (SMALL_INT (offset))
28968 return plus_constant (Pmode, base, INTVAL (offset));
28969 else if (! reload_in_progress && ! reload_completed)
28970 offset = force_reg (Pmode, offset);
28971 else
28973 rtx mem = force_const_mem (Pmode, orig);
28974 return machopic_legitimize_pic_address (mem, Pmode, reg);
28977 return gen_rtx_PLUS (Pmode, base, offset);
28980 /* Fall back on generic machopic code. */
28981 return machopic_legitimize_pic_address (orig, mode, reg);
28984 /* Output a .machine directive for the Darwin assembler, and call
28985 the generic start_file routine. */
28987 static void
28988 rs6000_darwin_file_start (void)
28990 static const struct
28992 const char *arg;
28993 const char *name;
28994 HOST_WIDE_INT if_set;
28995 } mapping[] = {
28996 { "ppc64", "ppc64", MASK_64BIT },
28997 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
28998 { "power4", "ppc970", 0 },
28999 { "G5", "ppc970", 0 },
29000 { "7450", "ppc7450", 0 },
29001 { "7400", "ppc7400", MASK_ALTIVEC },
29002 { "G4", "ppc7400", 0 },
29003 { "750", "ppc750", 0 },
29004 { "740", "ppc750", 0 },
29005 { "G3", "ppc750", 0 },
29006 { "604e", "ppc604e", 0 },
29007 { "604", "ppc604", 0 },
29008 { "603e", "ppc603", 0 },
29009 { "603", "ppc603", 0 },
29010 { "601", "ppc601", 0 },
29011 { NULL, "ppc", 0 } };
29012 const char *cpu_id = "";
29013 size_t i;
29015 rs6000_file_start ();
29016 darwin_file_start ();
29018 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29020 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29021 cpu_id = rs6000_default_cpu;
29023 if (global_options_set.x_rs6000_cpu_index)
29024 cpu_id = processor_target_table[rs6000_cpu_index].name;
29026 /* Look through the mapping array. Pick the first name that either
29027 matches the argument, has a bit set in IF_SET that is also set
29028 in the target flags, or has a NULL name. */
29030 i = 0;
29031 while (mapping[i].arg != NULL
29032 && strcmp (mapping[i].arg, cpu_id) != 0
29033 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29034 i++;
29036 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29039 #endif /* TARGET_MACHO */
29041 #if TARGET_ELF
29042 static int
29043 rs6000_elf_reloc_rw_mask (void)
29045 if (flag_pic)
29046 return 3;
29047 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29048 return 2;
29049 else
29050 return 0;
29053 /* Record an element in the table of global constructors. SYMBOL is
29054 a SYMBOL_REF of the function to be called; PRIORITY is a number
29055 between 0 and MAX_INIT_PRIORITY.
29057 This differs from default_named_section_asm_out_constructor in
29058 that we have special handling for -mrelocatable. */
29060 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29061 static void
29062 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29064 const char *section = ".ctors";
29065 char buf[16];
29067 if (priority != DEFAULT_INIT_PRIORITY)
29069 sprintf (buf, ".ctors.%.5u",
29070 /* Invert the numbering so the linker puts us in the proper
29071 order; constructors are run from right to left, and the
29072 linker sorts in increasing order. */
29073 MAX_INIT_PRIORITY - priority);
29074 section = buf;
29077 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29078 assemble_align (POINTER_SIZE);
29080 if (TARGET_RELOCATABLE)
29082 fputs ("\t.long (", asm_out_file);
29083 output_addr_const (asm_out_file, symbol);
29084 fputs (")@fixup\n", asm_out_file);
29086 else
29087 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29090 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29091 static void
29092 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29094 const char *section = ".dtors";
29095 char buf[16];
29097 if (priority != DEFAULT_INIT_PRIORITY)
29099 sprintf (buf, ".dtors.%.5u",
29100 /* Invert the numbering so the linker puts us in the proper
29101 order; constructors are run from right to left, and the
29102 linker sorts in increasing order. */
29103 MAX_INIT_PRIORITY - priority);
29104 section = buf;
29107 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29108 assemble_align (POINTER_SIZE);
29110 if (TARGET_RELOCATABLE)
29112 fputs ("\t.long (", asm_out_file);
29113 output_addr_const (asm_out_file, symbol);
29114 fputs (")@fixup\n", asm_out_file);
29116 else
29117 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29120 void
29121 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29123 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29125 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29126 ASM_OUTPUT_LABEL (file, name);
29127 fputs (DOUBLE_INT_ASM_OP, file);
29128 rs6000_output_function_entry (file, name);
29129 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29130 if (DOT_SYMBOLS)
29132 fputs ("\t.size\t", file);
29133 assemble_name (file, name);
29134 fputs (",24\n\t.type\t.", file);
29135 assemble_name (file, name);
29136 fputs (",@function\n", file);
29137 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29139 fputs ("\t.globl\t.", file);
29140 assemble_name (file, name);
29141 putc ('\n', file);
29144 else
29145 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29146 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29147 rs6000_output_function_entry (file, name);
29148 fputs (":\n", file);
29149 return;
29152 if (TARGET_RELOCATABLE
29153 && !TARGET_SECURE_PLT
29154 && (get_pool_size () != 0 || crtl->profile)
29155 && uses_TOC ())
29157 char buf[256];
29159 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29161 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29162 fprintf (file, "\t.long ");
29163 assemble_name (file, buf);
29164 putc ('-', file);
29165 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29166 assemble_name (file, buf);
29167 putc ('\n', file);
29170 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29171 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29173 if (DEFAULT_ABI == ABI_AIX)
29175 const char *desc_name, *orig_name;
29177 orig_name = (*targetm.strip_name_encoding) (name);
29178 desc_name = orig_name;
29179 while (*desc_name == '.')
29180 desc_name++;
29182 if (TREE_PUBLIC (decl))
29183 fprintf (file, "\t.globl %s\n", desc_name);
29185 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29186 fprintf (file, "%s:\n", desc_name);
29187 fprintf (file, "\t.long %s\n", orig_name);
29188 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29189 fputs ("\t.long 0\n", file);
29190 fprintf (file, "\t.previous\n");
29192 ASM_OUTPUT_LABEL (file, name);
29195 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29196 static void
29197 rs6000_elf_file_end (void)
29199 #ifdef HAVE_AS_GNU_ATTRIBUTE
29200 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29202 if (rs6000_passes_float)
29203 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29204 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29205 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29206 : 2));
29207 if (rs6000_passes_vector)
29208 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29209 (TARGET_ALTIVEC_ABI ? 2
29210 : TARGET_SPE_ABI ? 3
29211 : 1));
29212 if (rs6000_returns_struct)
29213 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29214 aix_struct_return ? 2 : 1);
29216 #endif
29217 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29218 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29219 file_end_indicate_exec_stack ();
29220 #endif
29222 #endif
29224 #if TARGET_XCOFF
29225 static void
29226 rs6000_xcoff_asm_output_anchor (rtx symbol)
29228 char buffer[100];
29230 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29231 SYMBOL_REF_BLOCK_OFFSET (symbol));
29232 ASM_OUTPUT_DEF (asm_out_file, XSTR (symbol, 0), buffer);
29235 static void
29236 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29238 fputs (GLOBAL_ASM_OP, stream);
29239 RS6000_OUTPUT_BASENAME (stream, name);
29240 putc ('\n', stream);
29243 /* A get_unnamed_decl callback, used for read-only sections. PTR
29244 points to the section string variable. */
29246 static void
29247 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29249 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29250 *(const char *const *) directive,
29251 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29254 /* Likewise for read-write sections. */
29256 static void
29257 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29259 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29260 *(const char *const *) directive,
29261 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29264 static void
29265 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29267 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29268 *(const char *const *) directive,
29269 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29272 /* A get_unnamed_section callback, used for switching to toc_section. */
29274 static void
29275 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29277 if (TARGET_MINIMAL_TOC)
29279 /* toc_section is always selected at least once from
29280 rs6000_xcoff_file_start, so this is guaranteed to
29281 always be defined once and only once in each file. */
29282 if (!toc_initialized)
29284 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29285 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29286 toc_initialized = 1;
29288 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29289 (TARGET_32BIT ? "" : ",3"));
29291 else
29292 fputs ("\t.toc\n", asm_out_file);
29295 /* Implement TARGET_ASM_INIT_SECTIONS. */
29297 static void
29298 rs6000_xcoff_asm_init_sections (void)
29300 read_only_data_section
29301 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29302 &xcoff_read_only_section_name);
29304 private_data_section
29305 = get_unnamed_section (SECTION_WRITE,
29306 rs6000_xcoff_output_readwrite_section_asm_op,
29307 &xcoff_private_data_section_name);
29309 tls_data_section
29310 = get_unnamed_section (SECTION_TLS,
29311 rs6000_xcoff_output_tls_section_asm_op,
29312 &xcoff_tls_data_section_name);
29314 tls_private_data_section
29315 = get_unnamed_section (SECTION_TLS,
29316 rs6000_xcoff_output_tls_section_asm_op,
29317 &xcoff_private_data_section_name);
29319 read_only_private_data_section
29320 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29321 &xcoff_private_data_section_name);
29323 toc_section
29324 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29326 readonly_data_section = read_only_data_section;
29327 exception_section = data_section;
29330 static int
29331 rs6000_xcoff_reloc_rw_mask (void)
29333 return 3;
29336 static void
29337 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29338 tree decl ATTRIBUTE_UNUSED)
29340 int smclass;
29341 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29343 if (flags & SECTION_CODE)
29344 smclass = 0;
29345 else if (flags & SECTION_TLS)
29346 smclass = 3;
29347 else if (flags & SECTION_WRITE)
29348 smclass = 2;
29349 else
29350 smclass = 1;
29352 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29353 (flags & SECTION_CODE) ? "." : "",
29354 name, suffix[smclass], flags & SECTION_ENTSIZE);
29357 #define IN_NAMED_SECTION(DECL) \
29358 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29359 && DECL_SECTION_NAME (DECL) != NULL_TREE)
29361 static section *
29362 rs6000_xcoff_select_section (tree decl, int reloc,
29363 unsigned HOST_WIDE_INT align)
29365 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29366 named section. */
29367 if (align > BIGGEST_ALIGNMENT)
29369 resolve_unique_section (decl, reloc, true);
29370 if (IN_NAMED_SECTION (decl))
29371 return get_named_section (decl, NULL, reloc);
29374 if (decl_readonly_section (decl, reloc))
29376 if (TREE_PUBLIC (decl))
29377 return read_only_data_section;
29378 else
29379 return read_only_private_data_section;
29381 else
29383 #if HAVE_AS_TLS
29384 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29386 if (TREE_PUBLIC (decl))
29387 return tls_data_section;
29388 else if (bss_initializer_p (decl))
29390 /* Convert to COMMON to emit in BSS. */
29391 DECL_COMMON (decl) = 1;
29392 return tls_comm_section;
29394 else
29395 return tls_private_data_section;
29397 else
29398 #endif
29399 if (TREE_PUBLIC (decl))
29400 return data_section;
29401 else
29402 return private_data_section;
29406 static void
29407 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29409 const char *name;
29411 /* Use select_section for private data and uninitialized data with
29412 alignment <= BIGGEST_ALIGNMENT. */
29413 if (!TREE_PUBLIC (decl)
29414 || DECL_COMMON (decl)
29415 || (DECL_INITIAL (decl) == NULL_TREE
29416 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29417 || DECL_INITIAL (decl) == error_mark_node
29418 || (flag_zero_initialized_in_bss
29419 && initializer_zerop (DECL_INITIAL (decl))))
29420 return;
29422 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29423 name = (*targetm.strip_name_encoding) (name);
29424 DECL_SECTION_NAME (decl) = build_string (strlen (name), name);
29427 /* Select section for constant in constant pool.
29429 On RS/6000, all constants are in the private read-only data area.
29430 However, if this is being placed in the TOC it must be output as a
29431 toc entry. */
29433 static section *
29434 rs6000_xcoff_select_rtx_section (enum machine_mode mode, rtx x,
29435 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29437 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29438 return toc_section;
29439 else
29440 return read_only_private_data_section;
29443 /* Remove any trailing [DS] or the like from the symbol name. */
29445 static const char *
29446 rs6000_xcoff_strip_name_encoding (const char *name)
29448 size_t len;
29449 if (*name == '*')
29450 name++;
29451 len = strlen (name);
29452 if (name[len - 1] == ']')
29453 return ggc_alloc_string (name, len - 4);
29454 else
29455 return name;
29458 /* Section attributes. AIX is always PIC. */
29460 static unsigned int
29461 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29463 unsigned int align;
29464 unsigned int flags = default_section_type_flags (decl, name, reloc);
29466 /* Align to at least UNIT size. */
29467 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29468 align = MIN_UNITS_PER_WORD;
29469 else
29470 /* Increase alignment of large objects if not already stricter. */
29471 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29472 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29473 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29475 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29478 /* Output at beginning of assembler file.
29480 Initialize the section names for the RS/6000 at this point.
29482 Specify filename, including full path, to assembler.
29484 We want to go into the TOC section so at least one .toc will be emitted.
29485 Also, in order to output proper .bs/.es pairs, we need at least one static
29486 [RW] section emitted.
29488 Finally, declare mcount when profiling to make the assembler happy. */
29490 static void
29491 rs6000_xcoff_file_start (void)
29493 rs6000_gen_section_name (&xcoff_bss_section_name,
29494 main_input_filename, ".bss_");
29495 rs6000_gen_section_name (&xcoff_private_data_section_name,
29496 main_input_filename, ".rw_");
29497 rs6000_gen_section_name (&xcoff_read_only_section_name,
29498 main_input_filename, ".ro_");
29499 rs6000_gen_section_name (&xcoff_tls_data_section_name,
29500 main_input_filename, ".tls_");
29501 rs6000_gen_section_name (&xcoff_tbss_section_name,
29502 main_input_filename, ".tbss_[UL]");
29504 fputs ("\t.file\t", asm_out_file);
29505 output_quoted_string (asm_out_file, main_input_filename);
29506 fputc ('\n', asm_out_file);
29507 if (write_symbols != NO_DEBUG)
29508 switch_to_section (private_data_section);
29509 switch_to_section (text_section);
29510 if (profile_flag)
29511 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
29512 rs6000_file_start ();
29515 /* Output at end of assembler file.
29516 On the RS/6000, referencing data should automatically pull in text. */
29518 static void
29519 rs6000_xcoff_file_end (void)
29521 switch_to_section (text_section);
29522 fputs ("_section_.text:\n", asm_out_file);
29523 switch_to_section (data_section);
29524 fputs (TARGET_32BIT
29525 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
29526 asm_out_file);
29529 #ifdef HAVE_AS_TLS
29530 static void
29531 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
29533 rtx symbol;
29534 int flags;
29536 default_encode_section_info (decl, rtl, first);
29538 /* Careful not to prod global register variables. */
29539 if (!MEM_P (rtl))
29540 return;
29541 symbol = XEXP (rtl, 0);
29542 if (GET_CODE (symbol) != SYMBOL_REF)
29543 return;
29545 flags = SYMBOL_REF_FLAGS (symbol);
29547 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29548 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
29550 SYMBOL_REF_FLAGS (symbol) = flags;
29552 #endif /* HAVE_AS_TLS */
29553 #endif /* TARGET_XCOFF */
29555 /* Compute a (partial) cost for rtx X. Return true if the complete
29556 cost has been computed, and false if subexpressions should be
29557 scanned. In either case, *TOTAL contains the cost result. */
29559 static bool
29560 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
29561 int *total, bool speed)
29563 enum machine_mode mode = GET_MODE (x);
29565 switch (code)
29567 /* On the RS/6000, if it is valid in the insn, it is free. */
29568 case CONST_INT:
29569 if (((outer_code == SET
29570 || outer_code == PLUS
29571 || outer_code == MINUS)
29572 && (satisfies_constraint_I (x)
29573 || satisfies_constraint_L (x)))
29574 || (outer_code == AND
29575 && (satisfies_constraint_K (x)
29576 || (mode == SImode
29577 ? satisfies_constraint_L (x)
29578 : satisfies_constraint_J (x))
29579 || mask_operand (x, mode)
29580 || (mode == DImode
29581 && mask64_operand (x, DImode))))
29582 || ((outer_code == IOR || outer_code == XOR)
29583 && (satisfies_constraint_K (x)
29584 || (mode == SImode
29585 ? satisfies_constraint_L (x)
29586 : satisfies_constraint_J (x))))
29587 || outer_code == ASHIFT
29588 || outer_code == ASHIFTRT
29589 || outer_code == LSHIFTRT
29590 || outer_code == ROTATE
29591 || outer_code == ROTATERT
29592 || outer_code == ZERO_EXTRACT
29593 || (outer_code == MULT
29594 && satisfies_constraint_I (x))
29595 || ((outer_code == DIV || outer_code == UDIV
29596 || outer_code == MOD || outer_code == UMOD)
29597 && exact_log2 (INTVAL (x)) >= 0)
29598 || (outer_code == COMPARE
29599 && (satisfies_constraint_I (x)
29600 || satisfies_constraint_K (x)))
29601 || ((outer_code == EQ || outer_code == NE)
29602 && (satisfies_constraint_I (x)
29603 || satisfies_constraint_K (x)
29604 || (mode == SImode
29605 ? satisfies_constraint_L (x)
29606 : satisfies_constraint_J (x))))
29607 || (outer_code == GTU
29608 && satisfies_constraint_I (x))
29609 || (outer_code == LTU
29610 && satisfies_constraint_P (x)))
29612 *total = 0;
29613 return true;
29615 else if ((outer_code == PLUS
29616 && reg_or_add_cint_operand (x, VOIDmode))
29617 || (outer_code == MINUS
29618 && reg_or_sub_cint_operand (x, VOIDmode))
29619 || ((outer_code == SET
29620 || outer_code == IOR
29621 || outer_code == XOR)
29622 && (INTVAL (x)
29623 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
29625 *total = COSTS_N_INSNS (1);
29626 return true;
29628 /* FALLTHRU */
29630 case CONST_DOUBLE:
29631 case CONST:
29632 case HIGH:
29633 case SYMBOL_REF:
29634 case MEM:
29635 /* When optimizing for size, MEM should be slightly more expensive
29636 than generating address, e.g., (plus (reg) (const)).
29637 L1 cache latency is about two instructions. */
29638 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
29639 return true;
29641 case LABEL_REF:
29642 *total = 0;
29643 return true;
29645 case PLUS:
29646 case MINUS:
29647 if (FLOAT_MODE_P (mode))
29648 *total = rs6000_cost->fp;
29649 else
29650 *total = COSTS_N_INSNS (1);
29651 return false;
29653 case MULT:
29654 if (GET_CODE (XEXP (x, 1)) == CONST_INT
29655 && satisfies_constraint_I (XEXP (x, 1)))
29657 if (INTVAL (XEXP (x, 1)) >= -256
29658 && INTVAL (XEXP (x, 1)) <= 255)
29659 *total = rs6000_cost->mulsi_const9;
29660 else
29661 *total = rs6000_cost->mulsi_const;
29663 else if (mode == SFmode)
29664 *total = rs6000_cost->fp;
29665 else if (FLOAT_MODE_P (mode))
29666 *total = rs6000_cost->dmul;
29667 else if (mode == DImode)
29668 *total = rs6000_cost->muldi;
29669 else
29670 *total = rs6000_cost->mulsi;
29671 return false;
29673 case FMA:
29674 if (mode == SFmode)
29675 *total = rs6000_cost->fp;
29676 else
29677 *total = rs6000_cost->dmul;
29678 break;
29680 case DIV:
29681 case MOD:
29682 if (FLOAT_MODE_P (mode))
29684 *total = mode == DFmode ? rs6000_cost->ddiv
29685 : rs6000_cost->sdiv;
29686 return false;
29688 /* FALLTHRU */
29690 case UDIV:
29691 case UMOD:
29692 if (GET_CODE (XEXP (x, 1)) == CONST_INT
29693 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
29695 if (code == DIV || code == MOD)
29696 /* Shift, addze */
29697 *total = COSTS_N_INSNS (2);
29698 else
29699 /* Shift */
29700 *total = COSTS_N_INSNS (1);
29702 else
29704 if (GET_MODE (XEXP (x, 1)) == DImode)
29705 *total = rs6000_cost->divdi;
29706 else
29707 *total = rs6000_cost->divsi;
29709 /* Add in shift and subtract for MOD. */
29710 if (code == MOD || code == UMOD)
29711 *total += COSTS_N_INSNS (2);
29712 return false;
29714 case CTZ:
29715 case FFS:
29716 *total = COSTS_N_INSNS (4);
29717 return false;
29719 case POPCOUNT:
29720 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
29721 return false;
29723 case PARITY:
29724 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
29725 return false;
29727 case NOT:
29728 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
29730 *total = 0;
29731 return false;
29733 /* FALLTHRU */
29735 case AND:
29736 case CLZ:
29737 case IOR:
29738 case XOR:
29739 case ZERO_EXTRACT:
29740 *total = COSTS_N_INSNS (1);
29741 return false;
29743 case ASHIFT:
29744 case ASHIFTRT:
29745 case LSHIFTRT:
29746 case ROTATE:
29747 case ROTATERT:
29748 /* Handle mul_highpart. */
29749 if (outer_code == TRUNCATE
29750 && GET_CODE (XEXP (x, 0)) == MULT)
29752 if (mode == DImode)
29753 *total = rs6000_cost->muldi;
29754 else
29755 *total = rs6000_cost->mulsi;
29756 return true;
29758 else if (outer_code == AND)
29759 *total = 0;
29760 else
29761 *total = COSTS_N_INSNS (1);
29762 return false;
29764 case SIGN_EXTEND:
29765 case ZERO_EXTEND:
29766 if (GET_CODE (XEXP (x, 0)) == MEM)
29767 *total = 0;
29768 else
29769 *total = COSTS_N_INSNS (1);
29770 return false;
29772 case COMPARE:
29773 case NEG:
29774 case ABS:
29775 if (!FLOAT_MODE_P (mode))
29777 *total = COSTS_N_INSNS (1);
29778 return false;
29780 /* FALLTHRU */
29782 case FLOAT:
29783 case UNSIGNED_FLOAT:
29784 case FIX:
29785 case UNSIGNED_FIX:
29786 case FLOAT_TRUNCATE:
29787 *total = rs6000_cost->fp;
29788 return false;
29790 case FLOAT_EXTEND:
29791 if (mode == DFmode)
29792 *total = 0;
29793 else
29794 *total = rs6000_cost->fp;
29795 return false;
29797 case UNSPEC:
29798 switch (XINT (x, 1))
29800 case UNSPEC_FRSP:
29801 *total = rs6000_cost->fp;
29802 return true;
29804 default:
29805 break;
29807 break;
29809 case CALL:
29810 case IF_THEN_ELSE:
29811 if (!speed)
29813 *total = COSTS_N_INSNS (1);
29814 return true;
29816 else if (FLOAT_MODE_P (mode)
29817 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
29819 *total = rs6000_cost->fp;
29820 return false;
29822 break;
29824 case EQ:
29825 case GTU:
29826 case LTU:
29827 /* Carry bit requires mode == Pmode.
29828 NEG or PLUS already counted so only add one. */
29829 if (mode == Pmode
29830 && (outer_code == NEG || outer_code == PLUS))
29832 *total = COSTS_N_INSNS (1);
29833 return true;
29835 if (outer_code == SET)
29837 if (XEXP (x, 1) == const0_rtx)
29839 if (TARGET_ISEL && !TARGET_MFCRF)
29840 *total = COSTS_N_INSNS (8);
29841 else
29842 *total = COSTS_N_INSNS (2);
29843 return true;
29845 else if (mode == Pmode)
29847 *total = COSTS_N_INSNS (3);
29848 return false;
29851 /* FALLTHRU */
29853 case GT:
29854 case LT:
29855 case UNORDERED:
29856 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
29858 if (TARGET_ISEL && !TARGET_MFCRF)
29859 *total = COSTS_N_INSNS (8);
29860 else
29861 *total = COSTS_N_INSNS (2);
29862 return true;
29864 /* CC COMPARE. */
29865 if (outer_code == COMPARE)
29867 *total = 0;
29868 return true;
29870 break;
29872 default:
29873 break;
29876 return false;
29879 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
29881 static bool
29882 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
29883 bool speed)
29885 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
29887 fprintf (stderr,
29888 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
29889 "opno = %d, total = %d, speed = %s, x:\n",
29890 ret ? "complete" : "scan inner",
29891 GET_RTX_NAME (code),
29892 GET_RTX_NAME (outer_code),
29893 opno,
29894 *total,
29895 speed ? "true" : "false");
29897 debug_rtx (x);
29899 return ret;
29902 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
29904 static int
29905 rs6000_debug_address_cost (rtx x, enum machine_mode mode,
29906 addr_space_t as, bool speed)
29908 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
29910 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
29911 ret, speed ? "true" : "false");
29912 debug_rtx (x);
29914 return ret;
29918 /* A C expression returning the cost of moving data from a register of class
29919 CLASS1 to one of CLASS2. */
29921 static int
29922 rs6000_register_move_cost (enum machine_mode mode,
29923 reg_class_t from, reg_class_t to)
29925 int ret;
29927 if (TARGET_DEBUG_COST)
29928 dbg_cost_ctrl++;
29930 /* Moves from/to GENERAL_REGS. */
29931 if (reg_classes_intersect_p (to, GENERAL_REGS)
29932 || reg_classes_intersect_p (from, GENERAL_REGS))
29934 reg_class_t rclass = from;
29936 if (! reg_classes_intersect_p (to, GENERAL_REGS))
29937 rclass = to;
29939 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
29940 ret = (rs6000_memory_move_cost (mode, rclass, false)
29941 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
29943 /* It's more expensive to move CR_REGS than CR0_REGS because of the
29944 shift. */
29945 else if (rclass == CR_REGS)
29946 ret = 4;
29948 /* For those processors that have slow LR/CTR moves, make them more
29949 expensive than memory in order to bias spills to memory .*/
29950 else if ((rs6000_cpu == PROCESSOR_POWER6
29951 || rs6000_cpu == PROCESSOR_POWER7
29952 || rs6000_cpu == PROCESSOR_POWER8)
29953 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
29954 ret = 6 * hard_regno_nregs[0][mode];
29956 else
29957 /* A move will cost one instruction per GPR moved. */
29958 ret = 2 * hard_regno_nregs[0][mode];
29961 /* If we have VSX, we can easily move between FPR or Altivec registers. */
29962 else if (VECTOR_MEM_VSX_P (mode)
29963 && reg_classes_intersect_p (to, VSX_REGS)
29964 && reg_classes_intersect_p (from, VSX_REGS))
29965 ret = 2 * hard_regno_nregs[32][mode];
29967 /* Moving between two similar registers is just one instruction. */
29968 else if (reg_classes_intersect_p (to, from))
29969 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
29971 /* Everything else has to go through GENERAL_REGS. */
29972 else
29973 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
29974 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
29976 if (TARGET_DEBUG_COST)
29978 if (dbg_cost_ctrl == 1)
29979 fprintf (stderr,
29980 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
29981 ret, GET_MODE_NAME (mode), reg_class_names[from],
29982 reg_class_names[to]);
29983 dbg_cost_ctrl--;
29986 return ret;
29989 /* A C expressions returning the cost of moving data of MODE from a register to
29990 or from memory. */
29992 static int
29993 rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
29994 bool in ATTRIBUTE_UNUSED)
29996 int ret;
29998 if (TARGET_DEBUG_COST)
29999 dbg_cost_ctrl++;
30001 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30002 ret = 4 * hard_regno_nregs[0][mode];
30003 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30004 || reg_classes_intersect_p (rclass, VSX_REGS)))
30005 ret = 4 * hard_regno_nregs[32][mode];
30006 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30007 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30008 else
30009 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30011 if (TARGET_DEBUG_COST)
30013 if (dbg_cost_ctrl == 1)
30014 fprintf (stderr,
30015 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30016 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30017 dbg_cost_ctrl--;
30020 return ret;
30023 /* Returns a code for a target-specific builtin that implements
30024 reciprocal of the function, or NULL_TREE if not available. */
30026 static tree
30027 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30028 bool sqrt ATTRIBUTE_UNUSED)
30030 if (optimize_insn_for_size_p ())
30031 return NULL_TREE;
30033 if (md_fn)
30034 switch (fn)
30036 case VSX_BUILTIN_XVSQRTDP:
30037 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30038 return NULL_TREE;
30040 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30042 case VSX_BUILTIN_XVSQRTSP:
30043 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30044 return NULL_TREE;
30046 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30048 default:
30049 return NULL_TREE;
30052 else
30053 switch (fn)
30055 case BUILT_IN_SQRT:
30056 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30057 return NULL_TREE;
30059 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30061 case BUILT_IN_SQRTF:
30062 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30063 return NULL_TREE;
30065 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30067 default:
30068 return NULL_TREE;
30072 /* Load up a constant. If the mode is a vector mode, splat the value across
30073 all of the vector elements. */
30075 static rtx
30076 rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
30078 rtx reg;
30080 if (mode == SFmode || mode == DFmode)
30082 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30083 reg = force_reg (mode, d);
30085 else if (mode == V4SFmode)
30087 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30088 rtvec v = gen_rtvec (4, d, d, d, d);
30089 reg = gen_reg_rtx (mode);
30090 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30092 else if (mode == V2DFmode)
30094 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30095 rtvec v = gen_rtvec (2, d, d);
30096 reg = gen_reg_rtx (mode);
30097 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30099 else
30100 gcc_unreachable ();
30102 return reg;
30105 /* Generate an FMA instruction. */
30107 static void
30108 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30110 enum machine_mode mode = GET_MODE (target);
30111 rtx dst;
30113 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30114 gcc_assert (dst != NULL);
30116 if (dst != target)
30117 emit_move_insn (target, dst);
30120 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30122 static void
30123 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30125 enum machine_mode mode = GET_MODE (target);
30126 rtx dst;
30128 /* Altivec does not support fms directly;
30129 generate in terms of fma in that case. */
30130 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30131 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30132 else
30134 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30135 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30137 gcc_assert (dst != NULL);
30139 if (dst != target)
30140 emit_move_insn (target, dst);
30143 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30145 static void
30146 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30148 enum machine_mode mode = GET_MODE (dst);
30149 rtx r;
30151 /* This is a tad more complicated, since the fnma_optab is for
30152 a different expression: fma(-m1, m2, a), which is the same
30153 thing except in the case of signed zeros.
30155 Fortunately we know that if FMA is supported that FNMSUB is
30156 also supported in the ISA. Just expand it directly. */
30158 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30160 r = gen_rtx_NEG (mode, a);
30161 r = gen_rtx_FMA (mode, m1, m2, r);
30162 r = gen_rtx_NEG (mode, r);
30163 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30166 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30167 add a reg_note saying that this was a division. Support both scalar and
30168 vector divide. Assumes no trapping math and finite arguments. */
30170 void
30171 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30173 enum machine_mode mode = GET_MODE (dst);
30174 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30175 int i;
30177 /* Low precision estimates guarantee 5 bits of accuracy. High
30178 precision estimates guarantee 14 bits of accuracy. SFmode
30179 requires 23 bits of accuracy. DFmode requires 52 bits of
30180 accuracy. Each pass at least doubles the accuracy, leading
30181 to the following. */
30182 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30183 if (mode == DFmode || mode == V2DFmode)
30184 passes++;
30186 enum insn_code code = optab_handler (smul_optab, mode);
30187 insn_gen_fn gen_mul = GEN_FCN (code);
30189 gcc_assert (code != CODE_FOR_nothing);
30191 one = rs6000_load_constant_and_splat (mode, dconst1);
30193 /* x0 = 1./d estimate */
30194 x0 = gen_reg_rtx (mode);
30195 emit_insn (gen_rtx_SET (VOIDmode, x0,
30196 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30197 UNSPEC_FRES)));
30199 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30200 if (passes > 1) {
30202 /* e0 = 1. - d * x0 */
30203 e0 = gen_reg_rtx (mode);
30204 rs6000_emit_nmsub (e0, d, x0, one);
30206 /* x1 = x0 + e0 * x0 */
30207 x1 = gen_reg_rtx (mode);
30208 rs6000_emit_madd (x1, e0, x0, x0);
30210 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30211 ++i, xprev = xnext, eprev = enext) {
30213 /* enext = eprev * eprev */
30214 enext = gen_reg_rtx (mode);
30215 emit_insn (gen_mul (enext, eprev, eprev));
30217 /* xnext = xprev + enext * xprev */
30218 xnext = gen_reg_rtx (mode);
30219 rs6000_emit_madd (xnext, enext, xprev, xprev);
30222 } else
30223 xprev = x0;
30225 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30227 /* u = n * xprev */
30228 u = gen_reg_rtx (mode);
30229 emit_insn (gen_mul (u, n, xprev));
30231 /* v = n - (d * u) */
30232 v = gen_reg_rtx (mode);
30233 rs6000_emit_nmsub (v, d, u, n);
30235 /* dst = (v * xprev) + u */
30236 rs6000_emit_madd (dst, v, xprev, u);
30238 if (note_p)
30239 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30242 /* Newton-Raphson approximation of single/double-precision floating point
30243 rsqrt. Assumes no trapping math and finite arguments. */
30245 void
30246 rs6000_emit_swrsqrt (rtx dst, rtx src)
30248 enum machine_mode mode = GET_MODE (src);
30249 rtx x0 = gen_reg_rtx (mode);
30250 rtx y = gen_reg_rtx (mode);
30252 /* Low precision estimates guarantee 5 bits of accuracy. High
30253 precision estimates guarantee 14 bits of accuracy. SFmode
30254 requires 23 bits of accuracy. DFmode requires 52 bits of
30255 accuracy. Each pass at least doubles the accuracy, leading
30256 to the following. */
30257 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30258 if (mode == DFmode || mode == V2DFmode)
30259 passes++;
30261 REAL_VALUE_TYPE dconst3_2;
30262 int i;
30263 rtx halfthree;
30264 enum insn_code code = optab_handler (smul_optab, mode);
30265 insn_gen_fn gen_mul = GEN_FCN (code);
30267 gcc_assert (code != CODE_FOR_nothing);
30269 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30270 real_from_integer (&dconst3_2, VOIDmode, 3, 0, 0);
30271 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30273 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30275 /* x0 = rsqrt estimate */
30276 emit_insn (gen_rtx_SET (VOIDmode, x0,
30277 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30278 UNSPEC_RSQRT)));
30280 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30281 rs6000_emit_msub (y, src, halfthree, src);
30283 for (i = 0; i < passes; i++)
30285 rtx x1 = gen_reg_rtx (mode);
30286 rtx u = gen_reg_rtx (mode);
30287 rtx v = gen_reg_rtx (mode);
30289 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30290 emit_insn (gen_mul (u, x0, x0));
30291 rs6000_emit_nmsub (v, y, u, halfthree);
30292 emit_insn (gen_mul (x1, x0, v));
30293 x0 = x1;
30296 emit_move_insn (dst, x0);
30297 return;
30300 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30301 (Power7) targets. DST is the target, and SRC is the argument operand. */
30303 void
30304 rs6000_emit_popcount (rtx dst, rtx src)
30306 enum machine_mode mode = GET_MODE (dst);
30307 rtx tmp1, tmp2;
30309 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30310 if (TARGET_POPCNTD)
30312 if (mode == SImode)
30313 emit_insn (gen_popcntdsi2 (dst, src));
30314 else
30315 emit_insn (gen_popcntddi2 (dst, src));
30316 return;
30319 tmp1 = gen_reg_rtx (mode);
30321 if (mode == SImode)
30323 emit_insn (gen_popcntbsi2 (tmp1, src));
30324 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30325 NULL_RTX, 0);
30326 tmp2 = force_reg (SImode, tmp2);
30327 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30329 else
30331 emit_insn (gen_popcntbdi2 (tmp1, src));
30332 tmp2 = expand_mult (DImode, tmp1,
30333 GEN_INT ((HOST_WIDE_INT)
30334 0x01010101 << 32 | 0x01010101),
30335 NULL_RTX, 0);
30336 tmp2 = force_reg (DImode, tmp2);
30337 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
30342 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
30343 target, and SRC is the argument operand. */
30345 void
30346 rs6000_emit_parity (rtx dst, rtx src)
30348 enum machine_mode mode = GET_MODE (dst);
30349 rtx tmp;
30351 tmp = gen_reg_rtx (mode);
30353 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
30354 if (TARGET_CMPB)
30356 if (mode == SImode)
30358 emit_insn (gen_popcntbsi2 (tmp, src));
30359 emit_insn (gen_paritysi2_cmpb (dst, tmp));
30361 else
30363 emit_insn (gen_popcntbdi2 (tmp, src));
30364 emit_insn (gen_paritydi2_cmpb (dst, tmp));
30366 return;
30369 if (mode == SImode)
30371 /* Is mult+shift >= shift+xor+shift+xor? */
30372 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
30374 rtx tmp1, tmp2, tmp3, tmp4;
30376 tmp1 = gen_reg_rtx (SImode);
30377 emit_insn (gen_popcntbsi2 (tmp1, src));
30379 tmp2 = gen_reg_rtx (SImode);
30380 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
30381 tmp3 = gen_reg_rtx (SImode);
30382 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
30384 tmp4 = gen_reg_rtx (SImode);
30385 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
30386 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
30388 else
30389 rs6000_emit_popcount (tmp, src);
30390 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
30392 else
30394 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
30395 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
30397 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
30399 tmp1 = gen_reg_rtx (DImode);
30400 emit_insn (gen_popcntbdi2 (tmp1, src));
30402 tmp2 = gen_reg_rtx (DImode);
30403 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
30404 tmp3 = gen_reg_rtx (DImode);
30405 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
30407 tmp4 = gen_reg_rtx (DImode);
30408 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
30409 tmp5 = gen_reg_rtx (DImode);
30410 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
30412 tmp6 = gen_reg_rtx (DImode);
30413 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
30414 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
30416 else
30417 rs6000_emit_popcount (tmp, src);
30418 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
30422 /* Expand an Altivec constant permutation for little endian mode.
30423 There are two issues: First, the two input operands must be
30424 swapped so that together they form a double-wide array in LE
30425 order. Second, the vperm instruction has surprising behavior
30426 in LE mode: it interprets the elements of the source vectors
30427 in BE mode ("left to right") and interprets the elements of
30428 the destination vector in LE mode ("right to left"). To
30429 correct for this, we must subtract each element of the permute
30430 control vector from 31.
30432 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
30433 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
30434 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
30435 serve as the permute control vector. Then, in BE mode,
30437 vperm 9,10,11,12
30439 places the desired result in vr9. However, in LE mode the
30440 vector contents will be
30442 vr10 = 00000003 00000002 00000001 00000000
30443 vr11 = 00000007 00000006 00000005 00000004
30445 The result of the vperm using the same permute control vector is
30447 vr9 = 05000000 07000000 01000000 03000000
30449 That is, the leftmost 4 bytes of vr10 are interpreted as the
30450 source for the rightmost 4 bytes of vr9, and so on.
30452 If we change the permute control vector to
30454 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
30456 and issue
30458 vperm 9,11,10,12
30460 we get the desired
30462 vr9 = 00000006 00000004 00000002 00000000. */
30464 void
30465 altivec_expand_vec_perm_const_le (rtx operands[4])
30467 unsigned int i;
30468 rtx perm[16];
30469 rtx constv, unspec;
30470 rtx target = operands[0];
30471 rtx op0 = operands[1];
30472 rtx op1 = operands[2];
30473 rtx sel = operands[3];
30475 /* Unpack and adjust the constant selector. */
30476 for (i = 0; i < 16; ++i)
30478 rtx e = XVECEXP (sel, 0, i);
30479 unsigned int elt = 31 - (INTVAL (e) & 31);
30480 perm[i] = GEN_INT (elt);
30483 /* Expand to a permute, swapping the inputs and using the
30484 adjusted selector. */
30485 if (!REG_P (op0))
30486 op0 = force_reg (V16QImode, op0);
30487 if (!REG_P (op1))
30488 op1 = force_reg (V16QImode, op1);
30490 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
30491 constv = force_reg (V16QImode, constv);
30492 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
30493 UNSPEC_VPERM);
30494 if (!REG_P (target))
30496 rtx tmp = gen_reg_rtx (V16QImode);
30497 emit_move_insn (tmp, unspec);
30498 unspec = tmp;
30501 emit_move_insn (target, unspec);
30504 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
30505 permute control vector. But here it's not a constant, so we must
30506 generate a vector NAND or NOR to do the adjustment. */
30508 void
30509 altivec_expand_vec_perm_le (rtx operands[4])
30511 rtx notx, iorx, unspec;
30512 rtx target = operands[0];
30513 rtx op0 = operands[1];
30514 rtx op1 = operands[2];
30515 rtx sel = operands[3];
30516 rtx tmp = target;
30517 rtx norreg = gen_reg_rtx (V16QImode);
30518 enum machine_mode mode = GET_MODE (target);
30520 /* Get everything in regs so the pattern matches. */
30521 if (!REG_P (op0))
30522 op0 = force_reg (mode, op0);
30523 if (!REG_P (op1))
30524 op1 = force_reg (mode, op1);
30525 if (!REG_P (sel))
30526 sel = force_reg (V16QImode, sel);
30527 if (!REG_P (target))
30528 tmp = gen_reg_rtx (mode);
30530 /* Invert the selector with a VNAND if available, else a VNOR.
30531 The VNAND is preferred for future fusion opportunities. */
30532 notx = gen_rtx_NOT (V16QImode, sel);
30533 iorx = (TARGET_P8_VECTOR
30534 ? gen_rtx_IOR (V16QImode, notx, notx)
30535 : gen_rtx_AND (V16QImode, notx, notx));
30536 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
30538 /* Permute with operands reversed and adjusted selector. */
30539 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
30540 UNSPEC_VPERM);
30542 /* Copy into target, possibly by way of a register. */
30543 if (!REG_P (target))
30545 emit_move_insn (tmp, unspec);
30546 unspec = tmp;
30549 emit_move_insn (target, unspec);
30552 /* Expand an Altivec constant permutation. Return true if we match
30553 an efficient implementation; false to fall back to VPERM. */
30555 bool
30556 altivec_expand_vec_perm_const (rtx operands[4])
30558 struct altivec_perm_insn {
30559 HOST_WIDE_INT mask;
30560 enum insn_code impl;
30561 unsigned char perm[16];
30563 static const struct altivec_perm_insn patterns[] = {
30564 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
30565 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
30566 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
30567 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
30568 { OPTION_MASK_ALTIVEC,
30569 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
30570 : CODE_FOR_altivec_vmrglb_direct),
30571 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
30572 { OPTION_MASK_ALTIVEC,
30573 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
30574 : CODE_FOR_altivec_vmrglh_direct),
30575 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
30576 { OPTION_MASK_ALTIVEC,
30577 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
30578 : CODE_FOR_altivec_vmrglw_direct),
30579 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
30580 { OPTION_MASK_ALTIVEC,
30581 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
30582 : CODE_FOR_altivec_vmrghb_direct),
30583 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
30584 { OPTION_MASK_ALTIVEC,
30585 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
30586 : CODE_FOR_altivec_vmrghh_direct),
30587 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
30588 { OPTION_MASK_ALTIVEC,
30589 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
30590 : CODE_FOR_altivec_vmrghw_direct),
30591 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
30592 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
30593 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
30594 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
30595 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
30598 unsigned int i, j, elt, which;
30599 unsigned char perm[16];
30600 rtx target, op0, op1, sel, x;
30601 bool one_vec;
30603 target = operands[0];
30604 op0 = operands[1];
30605 op1 = operands[2];
30606 sel = operands[3];
30608 /* Unpack the constant selector. */
30609 for (i = which = 0; i < 16; ++i)
30611 rtx e = XVECEXP (sel, 0, i);
30612 elt = INTVAL (e) & 31;
30613 which |= (elt < 16 ? 1 : 2);
30614 perm[i] = elt;
30617 /* Simplify the constant selector based on operands. */
30618 switch (which)
30620 default:
30621 gcc_unreachable ();
30623 case 3:
30624 one_vec = false;
30625 if (!rtx_equal_p (op0, op1))
30626 break;
30627 /* FALLTHRU */
30629 case 2:
30630 for (i = 0; i < 16; ++i)
30631 perm[i] &= 15;
30632 op0 = op1;
30633 one_vec = true;
30634 break;
30636 case 1:
30637 op1 = op0;
30638 one_vec = true;
30639 break;
30642 /* Look for splat patterns. */
30643 if (one_vec)
30645 elt = perm[0];
30647 for (i = 0; i < 16; ++i)
30648 if (perm[i] != elt)
30649 break;
30650 if (i == 16)
30652 if (!BYTES_BIG_ENDIAN)
30653 elt = 15 - elt;
30654 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
30655 return true;
30658 if (elt % 2 == 0)
30660 for (i = 0; i < 16; i += 2)
30661 if (perm[i] != elt || perm[i + 1] != elt + 1)
30662 break;
30663 if (i == 16)
30665 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
30666 x = gen_reg_rtx (V8HImode);
30667 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
30668 GEN_INT (field)));
30669 emit_move_insn (target, gen_lowpart (V16QImode, x));
30670 return true;
30674 if (elt % 4 == 0)
30676 for (i = 0; i < 16; i += 4)
30677 if (perm[i] != elt
30678 || perm[i + 1] != elt + 1
30679 || perm[i + 2] != elt + 2
30680 || perm[i + 3] != elt + 3)
30681 break;
30682 if (i == 16)
30684 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
30685 x = gen_reg_rtx (V4SImode);
30686 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
30687 GEN_INT (field)));
30688 emit_move_insn (target, gen_lowpart (V16QImode, x));
30689 return true;
30694 /* Look for merge and pack patterns. */
30695 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
30697 bool swapped;
30699 if ((patterns[j].mask & rs6000_isa_flags) == 0)
30700 continue;
30702 elt = patterns[j].perm[0];
30703 if (perm[0] == elt)
30704 swapped = false;
30705 else if (perm[0] == elt + 16)
30706 swapped = true;
30707 else
30708 continue;
30709 for (i = 1; i < 16; ++i)
30711 elt = patterns[j].perm[i];
30712 if (swapped)
30713 elt = (elt >= 16 ? elt - 16 : elt + 16);
30714 else if (one_vec && elt >= 16)
30715 elt -= 16;
30716 if (perm[i] != elt)
30717 break;
30719 if (i == 16)
30721 enum insn_code icode = patterns[j].impl;
30722 enum machine_mode omode = insn_data[icode].operand[0].mode;
30723 enum machine_mode imode = insn_data[icode].operand[1].mode;
30725 /* For little-endian, don't use vpkuwum and vpkuhum if the
30726 underlying vector type is not V4SI and V8HI, respectively.
30727 For example, using vpkuwum with a V8HI picks up the even
30728 halfwords (BE numbering) when the even halfwords (LE
30729 numbering) are what we need. */
30730 if (!BYTES_BIG_ENDIAN
30731 && icode == CODE_FOR_altivec_vpkuwum_direct
30732 && ((GET_CODE (op0) == REG
30733 && GET_MODE (op0) != V4SImode)
30734 || (GET_CODE (op0) == SUBREG
30735 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
30736 continue;
30737 if (!BYTES_BIG_ENDIAN
30738 && icode == CODE_FOR_altivec_vpkuhum_direct
30739 && ((GET_CODE (op0) == REG
30740 && GET_MODE (op0) != V8HImode)
30741 || (GET_CODE (op0) == SUBREG
30742 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
30743 continue;
30745 /* For little-endian, the two input operands must be swapped
30746 (or swapped back) to ensure proper right-to-left numbering
30747 from 0 to 2N-1. */
30748 if (swapped ^ !BYTES_BIG_ENDIAN)
30749 x = op0, op0 = op1, op1 = x;
30750 if (imode != V16QImode)
30752 op0 = gen_lowpart (imode, op0);
30753 op1 = gen_lowpart (imode, op1);
30755 if (omode == V16QImode)
30756 x = target;
30757 else
30758 x = gen_reg_rtx (omode);
30759 emit_insn (GEN_FCN (icode) (x, op0, op1));
30760 if (omode != V16QImode)
30761 emit_move_insn (target, gen_lowpart (V16QImode, x));
30762 return true;
30766 if (!BYTES_BIG_ENDIAN)
30768 altivec_expand_vec_perm_const_le (operands);
30769 return true;
30772 return false;
30775 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
30776 Return true if we match an efficient implementation. */
30778 static bool
30779 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
30780 unsigned char perm0, unsigned char perm1)
30782 rtx x;
30784 /* If both selectors come from the same operand, fold to single op. */
30785 if ((perm0 & 2) == (perm1 & 2))
30787 if (perm0 & 2)
30788 op0 = op1;
30789 else
30790 op1 = op0;
30792 /* If both operands are equal, fold to simpler permutation. */
30793 if (rtx_equal_p (op0, op1))
30795 perm0 = perm0 & 1;
30796 perm1 = (perm1 & 1) + 2;
30798 /* If the first selector comes from the second operand, swap. */
30799 else if (perm0 & 2)
30801 if (perm1 & 2)
30802 return false;
30803 perm0 -= 2;
30804 perm1 += 2;
30805 x = op0, op0 = op1, op1 = x;
30807 /* If the second selector does not come from the second operand, fail. */
30808 else if ((perm1 & 2) == 0)
30809 return false;
30811 /* Success! */
30812 if (target != NULL)
30814 enum machine_mode vmode, dmode;
30815 rtvec v;
30817 vmode = GET_MODE (target);
30818 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
30819 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
30820 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
30821 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
30822 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
30823 emit_insn (gen_rtx_SET (VOIDmode, target, x));
30825 return true;
30828 bool
30829 rs6000_expand_vec_perm_const (rtx operands[4])
30831 rtx target, op0, op1, sel;
30832 unsigned char perm0, perm1;
30834 target = operands[0];
30835 op0 = operands[1];
30836 op1 = operands[2];
30837 sel = operands[3];
30839 /* Unpack the constant selector. */
30840 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
30841 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
30843 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
30846 /* Test whether a constant permutation is supported. */
30848 static bool
30849 rs6000_vectorize_vec_perm_const_ok (enum machine_mode vmode,
30850 const unsigned char *sel)
30852 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
30853 if (TARGET_ALTIVEC)
30854 return true;
30856 /* Check for ps_merge* or evmerge* insns. */
30857 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
30858 || (TARGET_SPE && vmode == V2SImode))
30860 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
30861 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
30862 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
30865 return false;
30868 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
30870 static void
30871 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
30872 enum machine_mode vmode, unsigned nelt, rtx perm[])
30874 enum machine_mode imode;
30875 rtx x;
30877 imode = vmode;
30878 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
30880 imode = GET_MODE_INNER (vmode);
30881 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
30882 imode = mode_for_vector (imode, nelt);
30885 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
30886 x = expand_vec_perm (vmode, op0, op1, x, target);
30887 if (x != target)
30888 emit_move_insn (target, x);
30891 /* Expand an extract even operation. */
30893 void
30894 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
30896 enum machine_mode vmode = GET_MODE (target);
30897 unsigned i, nelt = GET_MODE_NUNITS (vmode);
30898 rtx perm[16];
30900 for (i = 0; i < nelt; i++)
30901 perm[i] = GEN_INT (i * 2);
30903 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
30906 /* Expand a vector interleave operation. */
30908 void
30909 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
30911 enum machine_mode vmode = GET_MODE (target);
30912 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
30913 rtx perm[16];
30915 high = (highp ? 0 : nelt / 2);
30916 for (i = 0; i < nelt / 2; i++)
30918 perm[i * 2] = GEN_INT (i + high);
30919 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
30922 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
30925 /* Return an RTX representing where to find the function value of a
30926 function returning MODE. */
30927 static rtx
30928 rs6000_complex_function_value (enum machine_mode mode)
30930 unsigned int regno;
30931 rtx r1, r2;
30932 enum machine_mode inner = GET_MODE_INNER (mode);
30933 unsigned int inner_bytes = GET_MODE_SIZE (inner);
30935 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
30936 regno = FP_ARG_RETURN;
30937 else
30939 regno = GP_ARG_RETURN;
30941 /* 32-bit is OK since it'll go in r3/r4. */
30942 if (TARGET_32BIT && inner_bytes >= 4)
30943 return gen_rtx_REG (mode, regno);
30946 if (inner_bytes >= 8)
30947 return gen_rtx_REG (mode, regno);
30949 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
30950 const0_rtx);
30951 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
30952 GEN_INT (inner_bytes));
30953 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
30956 /* Target hook for TARGET_FUNCTION_VALUE.
30958 On the SPE, both FPs and vectors are returned in r3.
30960 On RS/6000 an integer value is in r3 and a floating-point value is in
30961 fp1, unless -msoft-float. */
30963 static rtx
30964 rs6000_function_value (const_tree valtype,
30965 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
30966 bool outgoing ATTRIBUTE_UNUSED)
30968 enum machine_mode mode;
30969 unsigned int regno;
30970 enum machine_mode elt_mode;
30971 int n_elts;
30973 /* Special handling for structs in darwin64. */
30974 if (TARGET_MACHO
30975 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
30977 CUMULATIVE_ARGS valcum;
30978 rtx valret;
30980 valcum.words = 0;
30981 valcum.fregno = FP_ARG_MIN_REG;
30982 valcum.vregno = ALTIVEC_ARG_MIN_REG;
30983 /* Do a trial code generation as if this were going to be passed as
30984 an argument; if any part goes in memory, we return NULL. */
30985 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
30986 if (valret)
30987 return valret;
30988 /* Otherwise fall through to standard ABI rules. */
30991 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
30992 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
30993 &elt_mode, &n_elts))
30995 int first_reg, n_regs, i;
30996 rtx par;
30998 if (SCALAR_FLOAT_MODE_P (elt_mode))
31000 /* _Decimal128 must use even/odd register pairs. */
31001 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31002 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31004 else
31006 first_reg = ALTIVEC_ARG_RETURN;
31007 n_regs = 1;
31010 par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
31011 for (i = 0; i < n_elts; i++)
31013 rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
31014 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31015 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31018 return par;
31021 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
31023 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31024 return gen_rtx_PARALLEL (DImode,
31025 gen_rtvec (2,
31026 gen_rtx_EXPR_LIST (VOIDmode,
31027 gen_rtx_REG (SImode, GP_ARG_RETURN),
31028 const0_rtx),
31029 gen_rtx_EXPR_LIST (VOIDmode,
31030 gen_rtx_REG (SImode,
31031 GP_ARG_RETURN + 1),
31032 GEN_INT (4))));
31034 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
31036 return gen_rtx_PARALLEL (DCmode,
31037 gen_rtvec (4,
31038 gen_rtx_EXPR_LIST (VOIDmode,
31039 gen_rtx_REG (SImode, GP_ARG_RETURN),
31040 const0_rtx),
31041 gen_rtx_EXPR_LIST (VOIDmode,
31042 gen_rtx_REG (SImode,
31043 GP_ARG_RETURN + 1),
31044 GEN_INT (4)),
31045 gen_rtx_EXPR_LIST (VOIDmode,
31046 gen_rtx_REG (SImode,
31047 GP_ARG_RETURN + 2),
31048 GEN_INT (8)),
31049 gen_rtx_EXPR_LIST (VOIDmode,
31050 gen_rtx_REG (SImode,
31051 GP_ARG_RETURN + 3),
31052 GEN_INT (12))));
31055 mode = TYPE_MODE (valtype);
31056 if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
31057 || POINTER_TYPE_P (valtype))
31058 mode = TARGET_32BIT ? SImode : DImode;
31060 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31061 /* _Decimal128 must use an even/odd register pair. */
31062 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31063 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31064 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31065 regno = FP_ARG_RETURN;
31066 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31067 && targetm.calls.split_complex_arg)
31068 return rs6000_complex_function_value (mode);
31069 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31070 return register is used in both cases, and we won't see V2DImode/V2DFmode
31071 for pure altivec, combine the two cases. */
31072 else if (TREE_CODE (valtype) == VECTOR_TYPE
31073 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31074 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31075 regno = ALTIVEC_ARG_RETURN;
31076 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31077 && (mode == DFmode || mode == DCmode
31078 || mode == TFmode || mode == TCmode))
31079 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31080 else
31081 regno = GP_ARG_RETURN;
31083 return gen_rtx_REG (mode, regno);
31086 /* Define how to find the value returned by a library function
31087 assuming the value has mode MODE. */
31089 rs6000_libcall_value (enum machine_mode mode)
31091 unsigned int regno;
31093 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31095 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31096 return gen_rtx_PARALLEL (DImode,
31097 gen_rtvec (2,
31098 gen_rtx_EXPR_LIST (VOIDmode,
31099 gen_rtx_REG (SImode, GP_ARG_RETURN),
31100 const0_rtx),
31101 gen_rtx_EXPR_LIST (VOIDmode,
31102 gen_rtx_REG (SImode,
31103 GP_ARG_RETURN + 1),
31104 GEN_INT (4))));
31107 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31108 /* _Decimal128 must use an even/odd register pair. */
31109 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31110 else if (SCALAR_FLOAT_MODE_P (mode)
31111 && TARGET_HARD_FLOAT && TARGET_FPRS
31112 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31113 regno = FP_ARG_RETURN;
31114 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31115 return register is used in both cases, and we won't see V2DImode/V2DFmode
31116 for pure altivec, combine the two cases. */
31117 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31118 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31119 regno = ALTIVEC_ARG_RETURN;
31120 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31121 return rs6000_complex_function_value (mode);
31122 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31123 && (mode == DFmode || mode == DCmode
31124 || mode == TFmode || mode == TCmode))
31125 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31126 else
31127 regno = GP_ARG_RETURN;
31129 return gen_rtx_REG (mode, regno);
31133 /* Return true if we use LRA instead of reload pass. */
31134 static bool
31135 rs6000_lra_p (void)
31137 return rs6000_lra_flag;
31140 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31141 Frame pointer elimination is automatically handled.
31143 For the RS/6000, if frame pointer elimination is being done, we would like
31144 to convert ap into fp, not sp.
31146 We need r30 if -mminimal-toc was specified, and there are constant pool
31147 references. */
31149 static bool
31150 rs6000_can_eliminate (const int from, const int to)
31152 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31153 ? ! frame_pointer_needed
31154 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31155 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31156 : true);
31159 /* Define the offset between two registers, FROM to be eliminated and its
31160 replacement TO, at the start of a routine. */
31161 HOST_WIDE_INT
31162 rs6000_initial_elimination_offset (int from, int to)
31164 rs6000_stack_t *info = rs6000_stack_info ();
31165 HOST_WIDE_INT offset;
31167 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31168 offset = info->push_p ? 0 : -info->total_size;
31169 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31171 offset = info->push_p ? 0 : -info->total_size;
31172 if (FRAME_GROWS_DOWNWARD)
31173 offset += info->fixed_size + info->vars_size + info->parm_size;
31175 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31176 offset = FRAME_GROWS_DOWNWARD
31177 ? info->fixed_size + info->vars_size + info->parm_size
31178 : 0;
31179 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31180 offset = info->total_size;
31181 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31182 offset = info->push_p ? info->total_size : 0;
31183 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31184 offset = 0;
31185 else
31186 gcc_unreachable ();
31188 return offset;
31191 static rtx
31192 rs6000_dwarf_register_span (rtx reg)
31194 rtx parts[8];
31195 int i, words;
31196 unsigned regno = REGNO (reg);
31197 enum machine_mode mode = GET_MODE (reg);
31199 if (TARGET_SPE
31200 && regno < 32
31201 && (SPE_VECTOR_MODE (GET_MODE (reg))
31202 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31203 && mode != SFmode && mode != SDmode && mode != SCmode)))
31205 else
31206 return NULL_RTX;
31208 regno = REGNO (reg);
31210 /* The duality of the SPE register size wreaks all kinds of havoc.
31211 This is a way of distinguishing r0 in 32-bits from r0 in
31212 64-bits. */
31213 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31214 gcc_assert (words <= 4);
31215 for (i = 0; i < words; i++, regno++)
31217 if (BYTES_BIG_ENDIAN)
31219 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31220 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31222 else
31224 parts[2 * i] = gen_rtx_REG (SImode, regno);
31225 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31229 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31232 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31234 static void
31235 rs6000_init_dwarf_reg_sizes_extra (tree address)
31237 if (TARGET_SPE)
31239 int i;
31240 enum machine_mode mode = TYPE_MODE (char_type_node);
31241 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31242 rtx mem = gen_rtx_MEM (BLKmode, addr);
31243 rtx value = gen_int_mode (4, mode);
31245 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31247 int column = DWARF_REG_TO_UNWIND_COLUMN
31248 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31249 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31251 emit_move_insn (adjust_address (mem, mode, offset), value);
31255 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31257 int i;
31258 enum machine_mode mode = TYPE_MODE (char_type_node);
31259 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31260 rtx mem = gen_rtx_MEM (BLKmode, addr);
31261 rtx value = gen_int_mode (16, mode);
31263 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31264 The unwinder still needs to know the size of Altivec registers. */
31266 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31268 int column = DWARF_REG_TO_UNWIND_COLUMN
31269 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31270 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31272 emit_move_insn (adjust_address (mem, mode, offset), value);
31277 /* Map internal gcc register numbers to DWARF2 register numbers. */
31279 unsigned int
31280 rs6000_dbx_register_number (unsigned int regno)
31282 if (regno <= 63 || write_symbols != DWARF2_DEBUG)
31283 return regno;
31284 if (regno == LR_REGNO)
31285 return 108;
31286 if (regno == CTR_REGNO)
31287 return 109;
31288 if (CR_REGNO_P (regno))
31289 return regno - CR0_REGNO + 86;
31290 if (regno == CA_REGNO)
31291 return 101; /* XER */
31292 if (ALTIVEC_REGNO_P (regno))
31293 return regno - FIRST_ALTIVEC_REGNO + 1124;
31294 if (regno == VRSAVE_REGNO)
31295 return 356;
31296 if (regno == VSCR_REGNO)
31297 return 67;
31298 if (regno == SPE_ACC_REGNO)
31299 return 99;
31300 if (regno == SPEFSCR_REGNO)
31301 return 612;
31302 if (SPE_HIGH_REGNO_P (regno))
31303 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31304 return regno;
31307 /* target hook eh_return_filter_mode */
31308 static enum machine_mode
31309 rs6000_eh_return_filter_mode (void)
31311 return TARGET_32BIT ? SImode : word_mode;
31314 /* Target hook for scalar_mode_supported_p. */
31315 static bool
31316 rs6000_scalar_mode_supported_p (enum machine_mode mode)
31318 if (DECIMAL_FLOAT_MODE_P (mode))
31319 return default_decimal_float_supported_p ();
31320 else
31321 return default_scalar_mode_supported_p (mode);
31324 /* Target hook for vector_mode_supported_p. */
31325 static bool
31326 rs6000_vector_mode_supported_p (enum machine_mode mode)
31329 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
31330 return true;
31332 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
31333 return true;
31335 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
31336 return true;
31338 else
31339 return false;
31342 /* Target hook for invalid_arg_for_unprototyped_fn. */
31343 static const char *
31344 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
31346 return (!rs6000_darwin64_abi
31347 && typelist == 0
31348 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
31349 && (funcdecl == NULL_TREE
31350 || (TREE_CODE (funcdecl) == FUNCTION_DECL
31351 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
31352 ? N_("AltiVec argument passed to unprototyped function")
31353 : NULL;
31356 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
31357 setup by using __stack_chk_fail_local hidden function instead of
31358 calling __stack_chk_fail directly. Otherwise it is better to call
31359 __stack_chk_fail directly. */
31361 static tree ATTRIBUTE_UNUSED
31362 rs6000_stack_protect_fail (void)
31364 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
31365 ? default_hidden_stack_protect_fail ()
31366 : default_external_stack_protect_fail ();
31369 void
31370 rs6000_final_prescan_insn (rtx insn, rtx *operand ATTRIBUTE_UNUSED,
31371 int num_operands ATTRIBUTE_UNUSED)
31373 if (rs6000_warn_cell_microcode)
31375 const char *temp;
31376 int insn_code_number = recog_memoized (insn);
31377 location_t location = INSN_LOCATION (insn);
31379 /* Punt on insns we cannot recognize. */
31380 if (insn_code_number < 0)
31381 return;
31383 temp = get_insn_template (insn_code_number, insn);
31385 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
31386 warning_at (location, OPT_mwarn_cell_microcode,
31387 "emitting microcode insn %s\t[%s] #%d",
31388 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31389 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
31390 warning_at (location, OPT_mwarn_cell_microcode,
31391 "emitting conditional microcode insn %s\t[%s] #%d",
31392 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31396 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31398 #if TARGET_ELF
31399 static unsigned HOST_WIDE_INT
31400 rs6000_asan_shadow_offset (void)
31402 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
31404 #endif
31406 /* Mask options that we want to support inside of attribute((target)) and
31407 #pragma GCC target operations. Note, we do not include things like
31408 64/32-bit, endianess, hard/soft floating point, etc. that would have
31409 different calling sequences. */
31411 struct rs6000_opt_mask {
31412 const char *name; /* option name */
31413 HOST_WIDE_INT mask; /* mask to set */
31414 bool invert; /* invert sense of mask */
31415 bool valid_target; /* option is a target option */
31418 static struct rs6000_opt_mask const rs6000_opt_masks[] =
31420 { "altivec", OPTION_MASK_ALTIVEC, false, true },
31421 { "cmpb", OPTION_MASK_CMPB, false, true },
31422 { "crypto", OPTION_MASK_CRYPTO, false, true },
31423 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
31424 { "dlmzb", OPTION_MASK_DLMZB, false, true },
31425 { "fprnd", OPTION_MASK_FPRND, false, true },
31426 { "hard-dfp", OPTION_MASK_DFP, false, true },
31427 { "htm", OPTION_MASK_HTM, false, true },
31428 { "isel", OPTION_MASK_ISEL, false, true },
31429 { "mfcrf", OPTION_MASK_MFCRF, false, true },
31430 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
31431 { "mulhw", OPTION_MASK_MULHW, false, true },
31432 { "multiple", OPTION_MASK_MULTIPLE, false, true },
31433 { "popcntb", OPTION_MASK_POPCNTB, false, true },
31434 { "popcntd", OPTION_MASK_POPCNTD, false, true },
31435 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
31436 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
31437 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
31438 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
31439 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
31440 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
31441 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
31442 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
31443 { "string", OPTION_MASK_STRING, false, true },
31444 { "update", OPTION_MASK_NO_UPDATE, true , true },
31445 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
31446 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
31447 { "vsx", OPTION_MASK_VSX, false, true },
31448 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
31449 #ifdef OPTION_MASK_64BIT
31450 #if TARGET_AIX_OS
31451 { "aix64", OPTION_MASK_64BIT, false, false },
31452 { "aix32", OPTION_MASK_64BIT, true, false },
31453 #else
31454 { "64", OPTION_MASK_64BIT, false, false },
31455 { "32", OPTION_MASK_64BIT, true, false },
31456 #endif
31457 #endif
31458 #ifdef OPTION_MASK_EABI
31459 { "eabi", OPTION_MASK_EABI, false, false },
31460 #endif
31461 #ifdef OPTION_MASK_LITTLE_ENDIAN
31462 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
31463 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
31464 #endif
31465 #ifdef OPTION_MASK_RELOCATABLE
31466 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
31467 #endif
31468 #ifdef OPTION_MASK_STRICT_ALIGN
31469 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
31470 #endif
31471 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
31472 { "string", OPTION_MASK_STRING, false, false },
31475 /* Builtin mask mapping for printing the flags. */
31476 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
31478 { "altivec", RS6000_BTM_ALTIVEC, false, false },
31479 { "vsx", RS6000_BTM_VSX, false, false },
31480 { "spe", RS6000_BTM_SPE, false, false },
31481 { "paired", RS6000_BTM_PAIRED, false, false },
31482 { "fre", RS6000_BTM_FRE, false, false },
31483 { "fres", RS6000_BTM_FRES, false, false },
31484 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
31485 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
31486 { "popcntd", RS6000_BTM_POPCNTD, false, false },
31487 { "cell", RS6000_BTM_CELL, false, false },
31488 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
31489 { "crypto", RS6000_BTM_CRYPTO, false, false },
31490 { "htm", RS6000_BTM_HTM, false, false },
31491 { "hard-dfp", RS6000_BTM_DFP, false, false },
31492 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
31493 { "long-double-128", RS6000_BTM_LDBL128, false, false },
31496 /* Option variables that we want to support inside attribute((target)) and
31497 #pragma GCC target operations. */
31499 struct rs6000_opt_var {
31500 const char *name; /* option name */
31501 size_t global_offset; /* offset of the option in global_options. */
31502 size_t target_offset; /* offset of the option in target optiosn. */
31505 static struct rs6000_opt_var const rs6000_opt_vars[] =
31507 { "friz",
31508 offsetof (struct gcc_options, x_TARGET_FRIZ),
31509 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
31510 { "avoid-indexed-addresses",
31511 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
31512 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
31513 { "paired",
31514 offsetof (struct gcc_options, x_rs6000_paired_float),
31515 offsetof (struct cl_target_option, x_rs6000_paired_float), },
31516 { "longcall",
31517 offsetof (struct gcc_options, x_rs6000_default_long_calls),
31518 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
31521 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
31522 parsing. Return true if there were no errors. */
31524 static bool
31525 rs6000_inner_target_options (tree args, bool attr_p)
31527 bool ret = true;
31529 if (args == NULL_TREE)
31532 else if (TREE_CODE (args) == STRING_CST)
31534 char *p = ASTRDUP (TREE_STRING_POINTER (args));
31535 char *q;
31537 while ((q = strtok (p, ",")) != NULL)
31539 bool error_p = false;
31540 bool not_valid_p = false;
31541 const char *cpu_opt = NULL;
31543 p = NULL;
31544 if (strncmp (q, "cpu=", 4) == 0)
31546 int cpu_index = rs6000_cpu_name_lookup (q+4);
31547 if (cpu_index >= 0)
31548 rs6000_cpu_index = cpu_index;
31549 else
31551 error_p = true;
31552 cpu_opt = q+4;
31555 else if (strncmp (q, "tune=", 5) == 0)
31557 int tune_index = rs6000_cpu_name_lookup (q+5);
31558 if (tune_index >= 0)
31559 rs6000_tune_index = tune_index;
31560 else
31562 error_p = true;
31563 cpu_opt = q+5;
31566 else
31568 size_t i;
31569 bool invert = false;
31570 char *r = q;
31572 error_p = true;
31573 if (strncmp (r, "no-", 3) == 0)
31575 invert = true;
31576 r += 3;
31579 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
31580 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
31582 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
31584 if (!rs6000_opt_masks[i].valid_target)
31585 not_valid_p = true;
31586 else
31588 error_p = false;
31589 rs6000_isa_flags_explicit |= mask;
31591 /* VSX needs altivec, so -mvsx automagically sets
31592 altivec. */
31593 if (mask == OPTION_MASK_VSX && !invert)
31594 mask |= OPTION_MASK_ALTIVEC;
31596 if (rs6000_opt_masks[i].invert)
31597 invert = !invert;
31599 if (invert)
31600 rs6000_isa_flags &= ~mask;
31601 else
31602 rs6000_isa_flags |= mask;
31604 break;
31607 if (error_p && !not_valid_p)
31609 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
31610 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
31612 size_t j = rs6000_opt_vars[i].global_offset;
31613 *((int *) ((char *)&global_options + j)) = !invert;
31614 error_p = false;
31615 break;
31620 if (error_p)
31622 const char *eprefix, *esuffix;
31624 ret = false;
31625 if (attr_p)
31627 eprefix = "__attribute__((__target__(";
31628 esuffix = ")))";
31630 else
31632 eprefix = "#pragma GCC target ";
31633 esuffix = "";
31636 if (cpu_opt)
31637 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
31638 q, esuffix);
31639 else if (not_valid_p)
31640 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
31641 else
31642 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
31647 else if (TREE_CODE (args) == TREE_LIST)
31651 tree value = TREE_VALUE (args);
31652 if (value)
31654 bool ret2 = rs6000_inner_target_options (value, attr_p);
31655 if (!ret2)
31656 ret = false;
31658 args = TREE_CHAIN (args);
31660 while (args != NULL_TREE);
31663 else
31664 gcc_unreachable ();
31666 return ret;
31669 /* Print out the target options as a list for -mdebug=target. */
31671 static void
31672 rs6000_debug_target_options (tree args, const char *prefix)
31674 if (args == NULL_TREE)
31675 fprintf (stderr, "%s<NULL>", prefix);
31677 else if (TREE_CODE (args) == STRING_CST)
31679 char *p = ASTRDUP (TREE_STRING_POINTER (args));
31680 char *q;
31682 while ((q = strtok (p, ",")) != NULL)
31684 p = NULL;
31685 fprintf (stderr, "%s\"%s\"", prefix, q);
31686 prefix = ", ";
31690 else if (TREE_CODE (args) == TREE_LIST)
31694 tree value = TREE_VALUE (args);
31695 if (value)
31697 rs6000_debug_target_options (value, prefix);
31698 prefix = ", ";
31700 args = TREE_CHAIN (args);
31702 while (args != NULL_TREE);
31705 else
31706 gcc_unreachable ();
31708 return;
31712 /* Hook to validate attribute((target("..."))). */
31714 static bool
31715 rs6000_valid_attribute_p (tree fndecl,
31716 tree ARG_UNUSED (name),
31717 tree args,
31718 int flags)
31720 struct cl_target_option cur_target;
31721 bool ret;
31722 tree old_optimize = build_optimization_node (&global_options);
31723 tree new_target, new_optimize;
31724 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31726 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31728 if (TARGET_DEBUG_TARGET)
31730 tree tname = DECL_NAME (fndecl);
31731 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
31732 if (tname)
31733 fprintf (stderr, "function: %.*s\n",
31734 (int) IDENTIFIER_LENGTH (tname),
31735 IDENTIFIER_POINTER (tname));
31736 else
31737 fprintf (stderr, "function: unknown\n");
31739 fprintf (stderr, "args:");
31740 rs6000_debug_target_options (args, " ");
31741 fprintf (stderr, "\n");
31743 if (flags)
31744 fprintf (stderr, "flags: 0x%x\n", flags);
31746 fprintf (stderr, "--------------------\n");
31749 old_optimize = build_optimization_node (&global_options);
31750 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31752 /* If the function changed the optimization levels as well as setting target
31753 options, start with the optimizations specified. */
31754 if (func_optimize && func_optimize != old_optimize)
31755 cl_optimization_restore (&global_options,
31756 TREE_OPTIMIZATION (func_optimize));
31758 /* The target attributes may also change some optimization flags, so update
31759 the optimization options if necessary. */
31760 cl_target_option_save (&cur_target, &global_options);
31761 rs6000_cpu_index = rs6000_tune_index = -1;
31762 ret = rs6000_inner_target_options (args, true);
31764 /* Set up any additional state. */
31765 if (ret)
31767 ret = rs6000_option_override_internal (false);
31768 new_target = build_target_option_node (&global_options);
31770 else
31771 new_target = NULL;
31773 new_optimize = build_optimization_node (&global_options);
31775 if (!new_target)
31776 ret = false;
31778 else if (fndecl)
31780 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
31782 if (old_optimize != new_optimize)
31783 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31786 cl_target_option_restore (&global_options, &cur_target);
31788 if (old_optimize != new_optimize)
31789 cl_optimization_restore (&global_options,
31790 TREE_OPTIMIZATION (old_optimize));
31792 return ret;
31796 /* Hook to validate the current #pragma GCC target and set the state, and
31797 update the macros based on what was changed. If ARGS is NULL, then
31798 POP_TARGET is used to reset the options. */
31800 bool
31801 rs6000_pragma_target_parse (tree args, tree pop_target)
31803 tree prev_tree = build_target_option_node (&global_options);
31804 tree cur_tree;
31805 struct cl_target_option *prev_opt, *cur_opt;
31806 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
31807 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
31809 if (TARGET_DEBUG_TARGET)
31811 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
31812 fprintf (stderr, "args:");
31813 rs6000_debug_target_options (args, " ");
31814 fprintf (stderr, "\n");
31816 if (pop_target)
31818 fprintf (stderr, "pop_target:\n");
31819 debug_tree (pop_target);
31821 else
31822 fprintf (stderr, "pop_target: <NULL>\n");
31824 fprintf (stderr, "--------------------\n");
31827 if (! args)
31829 cur_tree = ((pop_target)
31830 ? pop_target
31831 : target_option_default_node);
31832 cl_target_option_restore (&global_options,
31833 TREE_TARGET_OPTION (cur_tree));
31835 else
31837 rs6000_cpu_index = rs6000_tune_index = -1;
31838 if (!rs6000_inner_target_options (args, false)
31839 || !rs6000_option_override_internal (false)
31840 || (cur_tree = build_target_option_node (&global_options))
31841 == NULL_TREE)
31843 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
31844 fprintf (stderr, "invalid pragma\n");
31846 return false;
31850 target_option_current_node = cur_tree;
31852 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
31853 change the macros that are defined. */
31854 if (rs6000_target_modify_macros_ptr)
31856 prev_opt = TREE_TARGET_OPTION (prev_tree);
31857 prev_bumask = prev_opt->x_rs6000_builtin_mask;
31858 prev_flags = prev_opt->x_rs6000_isa_flags;
31860 cur_opt = TREE_TARGET_OPTION (cur_tree);
31861 cur_flags = cur_opt->x_rs6000_isa_flags;
31862 cur_bumask = cur_opt->x_rs6000_builtin_mask;
31864 diff_bumask = (prev_bumask ^ cur_bumask);
31865 diff_flags = (prev_flags ^ cur_flags);
31867 if ((diff_flags != 0) || (diff_bumask != 0))
31869 /* Delete old macros. */
31870 rs6000_target_modify_macros_ptr (false,
31871 prev_flags & diff_flags,
31872 prev_bumask & diff_bumask);
31874 /* Define new macros. */
31875 rs6000_target_modify_macros_ptr (true,
31876 cur_flags & diff_flags,
31877 cur_bumask & diff_bumask);
31881 return true;
31885 /* Remember the last target of rs6000_set_current_function. */
31886 static GTY(()) tree rs6000_previous_fndecl;
31888 /* Establish appropriate back-end context for processing the function
31889 FNDECL. The argument might be NULL to indicate processing at top
31890 level, outside of any function scope. */
31891 static void
31892 rs6000_set_current_function (tree fndecl)
31894 tree old_tree = (rs6000_previous_fndecl
31895 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
31896 : NULL_TREE);
31898 tree new_tree = (fndecl
31899 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
31900 : NULL_TREE);
31902 if (TARGET_DEBUG_TARGET)
31904 bool print_final = false;
31905 fprintf (stderr, "\n==================== rs6000_set_current_function");
31907 if (fndecl)
31908 fprintf (stderr, ", fndecl %s (%p)",
31909 (DECL_NAME (fndecl)
31910 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
31911 : "<unknown>"), (void *)fndecl);
31913 if (rs6000_previous_fndecl)
31914 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
31916 fprintf (stderr, "\n");
31917 if (new_tree)
31919 fprintf (stderr, "\nnew fndecl target specific options:\n");
31920 debug_tree (new_tree);
31921 print_final = true;
31924 if (old_tree)
31926 fprintf (stderr, "\nold fndecl target specific options:\n");
31927 debug_tree (old_tree);
31928 print_final = true;
31931 if (print_final)
31932 fprintf (stderr, "--------------------\n");
31935 /* Only change the context if the function changes. This hook is called
31936 several times in the course of compiling a function, and we don't want to
31937 slow things down too much or call target_reinit when it isn't safe. */
31938 if (fndecl && fndecl != rs6000_previous_fndecl)
31940 rs6000_previous_fndecl = fndecl;
31941 if (old_tree == new_tree)
31944 else if (new_tree)
31946 cl_target_option_restore (&global_options,
31947 TREE_TARGET_OPTION (new_tree));
31948 if (TREE_TARGET_GLOBALS (new_tree))
31949 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31950 else
31951 TREE_TARGET_GLOBALS (new_tree)
31952 = save_target_globals_default_opts ();
31955 else if (old_tree)
31957 new_tree = target_option_current_node;
31958 cl_target_option_restore (&global_options,
31959 TREE_TARGET_OPTION (new_tree));
31960 if (TREE_TARGET_GLOBALS (new_tree))
31961 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
31962 else if (new_tree == target_option_default_node)
31963 restore_target_globals (&default_target_globals);
31964 else
31965 TREE_TARGET_GLOBALS (new_tree)
31966 = save_target_globals_default_opts ();
31972 /* Save the current options */
31974 static void
31975 rs6000_function_specific_save (struct cl_target_option *ptr,
31976 struct gcc_options *opts)
31978 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
31979 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
31982 /* Restore the current options */
31984 static void
31985 rs6000_function_specific_restore (struct gcc_options *opts,
31986 struct cl_target_option *ptr)
31989 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
31990 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
31991 (void) rs6000_option_override_internal (false);
31994 /* Print the current options */
31996 static void
31997 rs6000_function_specific_print (FILE *file, int indent,
31998 struct cl_target_option *ptr)
32000 rs6000_print_isa_options (file, indent, "Isa options set",
32001 ptr->x_rs6000_isa_flags);
32003 rs6000_print_isa_options (file, indent, "Isa options explicit",
32004 ptr->x_rs6000_isa_flags_explicit);
32007 /* Helper function to print the current isa or misc options on a line. */
32009 static void
32010 rs6000_print_options_internal (FILE *file,
32011 int indent,
32012 const char *string,
32013 HOST_WIDE_INT flags,
32014 const char *prefix,
32015 const struct rs6000_opt_mask *opts,
32016 size_t num_elements)
32018 size_t i;
32019 size_t start_column = 0;
32020 size_t cur_column;
32021 size_t max_column = 76;
32022 const char *comma = "";
32024 if (indent)
32025 start_column += fprintf (file, "%*s", indent, "");
32027 if (!flags)
32029 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32030 return;
32033 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32035 /* Print the various mask options. */
32036 cur_column = start_column;
32037 for (i = 0; i < num_elements; i++)
32039 if ((flags & opts[i].mask) != 0)
32041 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32042 size_t len = (strlen (comma)
32043 + strlen (prefix)
32044 + strlen (no_str)
32045 + strlen (rs6000_opt_masks[i].name));
32047 cur_column += len;
32048 if (cur_column > max_column)
32050 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32051 cur_column = start_column + len;
32052 comma = "";
32055 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32056 rs6000_opt_masks[i].name);
32057 flags &= ~ opts[i].mask;
32058 comma = ", ";
32062 fputs ("\n", file);
32065 /* Helper function to print the current isa options on a line. */
32067 static void
32068 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32069 HOST_WIDE_INT flags)
32071 rs6000_print_options_internal (file, indent, string, flags, "-m",
32072 &rs6000_opt_masks[0],
32073 ARRAY_SIZE (rs6000_opt_masks));
32076 static void
32077 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32078 HOST_WIDE_INT flags)
32080 rs6000_print_options_internal (file, indent, string, flags, "",
32081 &rs6000_builtin_mask_names[0],
32082 ARRAY_SIZE (rs6000_builtin_mask_names));
32086 /* Hook to determine if one function can safely inline another. */
32088 static bool
32089 rs6000_can_inline_p (tree caller, tree callee)
32091 bool ret = false;
32092 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32093 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32095 /* If callee has no option attributes, then it is ok to inline. */
32096 if (!callee_tree)
32097 ret = true;
32099 /* If caller has no option attributes, but callee does then it is not ok to
32100 inline. */
32101 else if (!caller_tree)
32102 ret = false;
32104 else
32106 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32107 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32109 /* Callee's options should a subset of the caller's, i.e. a vsx function
32110 can inline an altivec function but a non-vsx function can't inline a
32111 vsx function. */
32112 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32113 == callee_opts->x_rs6000_isa_flags)
32114 ret = true;
32117 if (TARGET_DEBUG_TARGET)
32118 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32119 (DECL_NAME (caller)
32120 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32121 : "<unknown>"),
32122 (DECL_NAME (callee)
32123 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32124 : "<unknown>"),
32125 (ret ? "can" : "cannot"));
32127 return ret;
32130 /* Allocate a stack temp and fixup the address so it meets the particular
32131 memory requirements (either offetable or REG+REG addressing). */
32134 rs6000_allocate_stack_temp (enum machine_mode mode,
32135 bool offsettable_p,
32136 bool reg_reg_p)
32138 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32139 rtx addr = XEXP (stack, 0);
32140 int strict_p = (reload_in_progress || reload_completed);
32142 if (!legitimate_indirect_address_p (addr, strict_p))
32144 if (offsettable_p
32145 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32146 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32148 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32149 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32152 return stack;
32155 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32156 to such a form to deal with memory reference instructions like STFIWX that
32157 only take reg+reg addressing. */
32160 rs6000_address_for_fpconvert (rtx x)
32162 int strict_p = (reload_in_progress || reload_completed);
32163 rtx addr;
32165 gcc_assert (MEM_P (x));
32166 addr = XEXP (x, 0);
32167 if (! legitimate_indirect_address_p (addr, strict_p)
32168 && ! legitimate_indexed_address_p (addr, strict_p))
32170 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32172 rtx reg = XEXP (addr, 0);
32173 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32174 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32175 gcc_assert (REG_P (reg));
32176 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32177 addr = reg;
32179 else if (GET_CODE (addr) == PRE_MODIFY)
32181 rtx reg = XEXP (addr, 0);
32182 rtx expr = XEXP (addr, 1);
32183 gcc_assert (REG_P (reg));
32184 gcc_assert (GET_CODE (expr) == PLUS);
32185 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32186 addr = reg;
32189 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32192 return x;
32195 /* Given a memory reference, if it is not in the form for altivec memory
32196 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32197 convert to the altivec format. */
32200 rs6000_address_for_altivec (rtx x)
32202 gcc_assert (MEM_P (x));
32203 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32205 rtx addr = XEXP (x, 0);
32206 int strict_p = (reload_in_progress || reload_completed);
32208 if (!legitimate_indexed_address_p (addr, strict_p)
32209 && !legitimate_indirect_address_p (addr, strict_p))
32210 addr = copy_to_mode_reg (Pmode, addr);
32212 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32213 x = change_address (x, GET_MODE (x), addr);
32216 return x;
32219 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32221 On the RS/6000, all integer constants are acceptable, most won't be valid
32222 for particular insns, though. Only easy FP constants are acceptable. */
32224 static bool
32225 rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
32227 if (TARGET_ELF && rs6000_tls_referenced_p (x))
32228 return false;
32230 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32231 || GET_MODE (x) == VOIDmode
32232 || (TARGET_POWERPC64 && mode == DImode)
32233 || easy_fp_constant (x, mode)
32234 || easy_vector_constant (x, mode));
32239 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32241 void
32242 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32244 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32245 rtx toc_load = NULL_RTX;
32246 rtx toc_restore = NULL_RTX;
32247 rtx func_addr;
32248 rtx abi_reg = NULL_RTX;
32249 rtx call[4];
32250 int n_call;
32251 rtx insn;
32253 /* Handle longcall attributes. */
32254 if (INTVAL (cookie) & CALL_LONG)
32255 func_desc = rs6000_longcall_ref (func_desc);
32257 /* Handle indirect calls. */
32258 if (GET_CODE (func_desc) != SYMBOL_REF
32259 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32261 /* Save the TOC into its reserved slot before the call,
32262 and prepare to restore it after the call. */
32263 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32264 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32265 rtx stack_toc_mem = gen_frame_mem (Pmode,
32266 gen_rtx_PLUS (Pmode, stack_ptr,
32267 stack_toc_offset));
32268 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
32270 /* Can we optimize saving the TOC in the prologue or
32271 do we need to do it at every call? */
32272 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32273 cfun->machine->save_toc_in_prologue = true;
32274 else
32276 MEM_VOLATILE_P (stack_toc_mem) = 1;
32277 emit_move_insn (stack_toc_mem, toc_reg);
32280 if (DEFAULT_ABI == ABI_ELFv2)
32282 /* A function pointer in the ELFv2 ABI is just a plain address, but
32283 the ABI requires it to be loaded into r12 before the call. */
32284 func_addr = gen_rtx_REG (Pmode, 12);
32285 emit_move_insn (func_addr, func_desc);
32286 abi_reg = func_addr;
32288 else
32290 /* A function pointer under AIX is a pointer to a data area whose
32291 first word contains the actual address of the function, whose
32292 second word contains a pointer to its TOC, and whose third word
32293 contains a value to place in the static chain register (r11).
32294 Note that if we load the static chain, our "trampoline" need
32295 not have any executable code. */
32297 /* Load up address of the actual function. */
32298 func_desc = force_reg (Pmode, func_desc);
32299 func_addr = gen_reg_rtx (Pmode);
32300 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
32302 /* Prepare to load the TOC of the called function. Note that the
32303 TOC load must happen immediately before the actual call so
32304 that unwinding the TOC registers works correctly. See the
32305 comment in frob_update_context. */
32306 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32307 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32308 gen_rtx_PLUS (Pmode, func_desc,
32309 func_toc_offset));
32310 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32312 /* If we have a static chain, load it up. */
32313 if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32315 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32316 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32317 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32318 gen_rtx_PLUS (Pmode, func_desc,
32319 func_sc_offset));
32320 emit_move_insn (sc_reg, func_sc_mem);
32321 abi_reg = sc_reg;
32325 else
32327 /* Direct calls use the TOC: for local calls, the callee will
32328 assume the TOC register is set; for non-local calls, the
32329 PLT stub needs the TOC register. */
32330 abi_reg = toc_reg;
32331 func_addr = func_desc;
32334 /* Create the call. */
32335 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
32336 if (value != NULL_RTX)
32337 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32338 n_call = 1;
32340 if (toc_load)
32341 call[n_call++] = toc_load;
32342 if (toc_restore)
32343 call[n_call++] = toc_restore;
32345 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
32347 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32348 insn = emit_call_insn (insn);
32350 /* Mention all registers defined by the ABI to hold information
32351 as uses in CALL_INSN_FUNCTION_USAGE. */
32352 if (abi_reg)
32353 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32356 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32358 void
32359 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32361 rtx call[2];
32362 rtx insn;
32364 gcc_assert (INTVAL (cookie) == 0);
32366 /* Create the call. */
32367 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
32368 if (value != NULL_RTX)
32369 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32371 call[1] = simple_return_rtx;
32373 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32374 insn = emit_call_insn (insn);
32376 /* Note use of the TOC register. */
32377 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
32378 /* We need to also mark a use of the link register since the function we
32379 sibling-call to will use it to return to our caller. */
32380 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
32383 /* Return whether we need to always update the saved TOC pointer when we update
32384 the stack pointer. */
32386 static bool
32387 rs6000_save_toc_in_prologue_p (void)
32389 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
32392 #ifdef HAVE_GAS_HIDDEN
32393 # define USE_HIDDEN_LINKONCE 1
32394 #else
32395 # define USE_HIDDEN_LINKONCE 0
32396 #endif
32398 /* Fills in the label name that should be used for a 476 link stack thunk. */
32400 void
32401 get_ppc476_thunk_name (char name[32])
32403 gcc_assert (TARGET_LINK_STACK);
32405 if (USE_HIDDEN_LINKONCE)
32406 sprintf (name, "__ppc476.get_thunk");
32407 else
32408 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
32411 /* This function emits the simple thunk routine that is used to preserve
32412 the link stack on the 476 cpu. */
32414 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
32415 static void
32416 rs6000_code_end (void)
32418 char name[32];
32419 tree decl;
32421 if (!TARGET_LINK_STACK)
32422 return;
32424 get_ppc476_thunk_name (name);
32426 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
32427 build_function_type_list (void_type_node, NULL_TREE));
32428 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
32429 NULL_TREE, void_type_node);
32430 TREE_PUBLIC (decl) = 1;
32431 TREE_STATIC (decl) = 1;
32433 #if RS6000_WEAK
32434 if (USE_HIDDEN_LINKONCE)
32436 DECL_COMDAT_GROUP (decl) = DECL_ASSEMBLER_NAME (decl);
32437 targetm.asm_out.unique_section (decl, 0);
32438 switch_to_section (get_named_section (decl, NULL, 0));
32439 DECL_WEAK (decl) = 1;
32440 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
32441 targetm.asm_out.globalize_label (asm_out_file, name);
32442 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
32443 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
32445 else
32446 #endif
32448 switch_to_section (text_section);
32449 ASM_OUTPUT_LABEL (asm_out_file, name);
32452 DECL_INITIAL (decl) = make_node (BLOCK);
32453 current_function_decl = decl;
32454 init_function_start (decl);
32455 first_function_block_is_cold = false;
32456 /* Make sure unwind info is emitted for the thunk if needed. */
32457 final_start_function (emit_barrier (), asm_out_file, 1);
32459 fputs ("\tblr\n", asm_out_file);
32461 final_end_function ();
32462 init_insn_lengths ();
32463 free_after_compilation (cfun);
32464 set_cfun (NULL);
32465 current_function_decl = NULL;
32468 /* Add r30 to hard reg set if the prologue sets it up and it is not
32469 pic_offset_table_rtx. */
32471 static void
32472 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
32474 if (!TARGET_SINGLE_PIC_BASE
32475 && TARGET_TOC
32476 && TARGET_MINIMAL_TOC
32477 && get_pool_size () != 0)
32478 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32482 /* Helper function for rs6000_split_logical to emit a logical instruction after
32483 spliting the operation to single GPR registers.
32485 DEST is the destination register.
32486 OP1 and OP2 are the input source registers.
32487 CODE is the base operation (AND, IOR, XOR, NOT).
32488 MODE is the machine mode.
32489 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32490 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32491 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32492 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32493 formation of the AND instructions. */
32495 static void
32496 rs6000_split_logical_inner (rtx dest,
32497 rtx op1,
32498 rtx op2,
32499 enum rtx_code code,
32500 enum machine_mode mode,
32501 bool complement_final_p,
32502 bool complement_op1_p,
32503 bool complement_op2_p,
32504 rtx clobber_reg)
32506 rtx bool_rtx;
32507 rtx set_rtx;
32509 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
32510 if (op2 && GET_CODE (op2) == CONST_INT
32511 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
32512 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32514 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
32515 HOST_WIDE_INT value = INTVAL (op2) & mask;
32517 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
32518 if (code == AND)
32520 if (value == 0)
32522 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
32523 return;
32526 else if (value == mask)
32528 if (!rtx_equal_p (dest, op1))
32529 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
32530 return;
32534 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
32535 into separate ORI/ORIS or XORI/XORIS instrucitons. */
32536 else if (code == IOR || code == XOR)
32538 if (value == 0)
32540 if (!rtx_equal_p (dest, op1))
32541 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
32542 return;
32547 if (complement_op1_p)
32548 op1 = gen_rtx_NOT (mode, op1);
32550 if (complement_op2_p)
32551 op2 = gen_rtx_NOT (mode, op2);
32553 bool_rtx = ((code == NOT)
32554 ? gen_rtx_NOT (mode, op1)
32555 : gen_rtx_fmt_ee (code, mode, op1, op2));
32557 if (complement_final_p)
32558 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
32560 set_rtx = gen_rtx_SET (VOIDmode, dest, bool_rtx);
32562 /* Is this AND with an explicit clobber? */
32563 if (clobber_reg)
32565 rtx clobber = gen_rtx_CLOBBER (VOIDmode, clobber_reg);
32566 set_rtx = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set_rtx, clobber));
32569 emit_insn (set_rtx);
32570 return;
32573 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
32574 operations are split immediately during RTL generation to allow for more
32575 optimizations of the AND/IOR/XOR.
32577 OPERANDS is an array containing the destination and two input operands.
32578 CODE is the base operation (AND, IOR, XOR, NOT).
32579 MODE is the machine mode.
32580 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32581 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32582 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32583 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32584 formation of the AND instructions. */
32586 static void
32587 rs6000_split_logical_di (rtx operands[3],
32588 enum rtx_code code,
32589 bool complement_final_p,
32590 bool complement_op1_p,
32591 bool complement_op2_p,
32592 rtx clobber_reg)
32594 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
32595 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
32596 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
32597 enum hi_lo { hi = 0, lo = 1 };
32598 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
32599 size_t i;
32601 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
32602 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
32603 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
32604 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
32606 if (code == NOT)
32607 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
32608 else
32610 if (GET_CODE (operands[2]) != CONST_INT)
32612 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
32613 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
32615 else
32617 HOST_WIDE_INT value = INTVAL (operands[2]);
32618 HOST_WIDE_INT value_hi_lo[2];
32620 gcc_assert (!complement_final_p);
32621 gcc_assert (!complement_op1_p);
32622 gcc_assert (!complement_op2_p);
32624 value_hi_lo[hi] = value >> 32;
32625 value_hi_lo[lo] = value & lower_32bits;
32627 for (i = 0; i < 2; i++)
32629 HOST_WIDE_INT sub_value = value_hi_lo[i];
32631 if (sub_value & sign_bit)
32632 sub_value |= upper_32bits;
32634 op2_hi_lo[i] = GEN_INT (sub_value);
32636 /* If this is an AND instruction, check to see if we need to load
32637 the value in a register. */
32638 if (code == AND && sub_value != -1 && sub_value != 0
32639 && !and_operand (op2_hi_lo[i], SImode))
32640 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
32645 for (i = 0; i < 2; i++)
32647 /* Split large IOR/XOR operations. */
32648 if ((code == IOR || code == XOR)
32649 && GET_CODE (op2_hi_lo[i]) == CONST_INT
32650 && !complement_final_p
32651 && !complement_op1_p
32652 && !complement_op2_p
32653 && clobber_reg == NULL_RTX
32654 && !logical_const_operand (op2_hi_lo[i], SImode))
32656 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
32657 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
32658 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
32659 rtx tmp = gen_reg_rtx (SImode);
32661 /* Make sure the constant is sign extended. */
32662 if ((hi_16bits & sign_bit) != 0)
32663 hi_16bits |= upper_32bits;
32665 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
32666 code, SImode, false, false, false,
32667 NULL_RTX);
32669 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
32670 code, SImode, false, false, false,
32671 NULL_RTX);
32673 else
32674 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
32675 code, SImode, complement_final_p,
32676 complement_op1_p, complement_op2_p,
32677 clobber_reg);
32680 return;
32683 /* Split the insns that make up boolean operations operating on multiple GPR
32684 registers. The boolean MD patterns ensure that the inputs either are
32685 exactly the same as the output registers, or there is no overlap.
32687 OPERANDS is an array containing the destination and two input operands.
32688 CODE is the base operation (AND, IOR, XOR, NOT).
32689 MODE is the machine mode.
32690 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32691 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32692 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32693 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32694 formation of the AND instructions. */
32696 void
32697 rs6000_split_logical (rtx operands[3],
32698 enum rtx_code code,
32699 bool complement_final_p,
32700 bool complement_op1_p,
32701 bool complement_op2_p,
32702 rtx clobber_reg)
32704 enum machine_mode mode = GET_MODE (operands[0]);
32705 enum machine_mode sub_mode;
32706 rtx op0, op1, op2;
32707 int sub_size, regno0, regno1, nregs, i;
32709 /* If this is DImode, use the specialized version that can run before
32710 register allocation. */
32711 if (mode == DImode && !TARGET_POWERPC64)
32713 rs6000_split_logical_di (operands, code, complement_final_p,
32714 complement_op1_p, complement_op2_p,
32715 clobber_reg);
32716 return;
32719 op0 = operands[0];
32720 op1 = operands[1];
32721 op2 = (code == NOT) ? NULL_RTX : operands[2];
32722 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
32723 sub_size = GET_MODE_SIZE (sub_mode);
32724 regno0 = REGNO (op0);
32725 regno1 = REGNO (op1);
32727 gcc_assert (reload_completed);
32728 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
32729 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
32731 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
32732 gcc_assert (nregs > 1);
32734 if (op2 && REG_P (op2))
32735 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
32737 for (i = 0; i < nregs; i++)
32739 int offset = i * sub_size;
32740 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
32741 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
32742 rtx sub_op2 = ((code == NOT)
32743 ? NULL_RTX
32744 : simplify_subreg (sub_mode, op2, mode, offset));
32746 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
32747 complement_final_p, complement_op1_p,
32748 complement_op2_p, clobber_reg);
32751 return;
32755 /* Return true if the peephole2 can combine a load involving a combination of
32756 an addis instruction and a load with an offset that can be fused together on
32757 a power8. */
32759 bool
32760 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
32761 rtx addis_value, /* addis value. */
32762 rtx target, /* target register that is loaded. */
32763 rtx mem) /* bottom part of the memory addr. */
32765 rtx addr;
32766 rtx base_reg;
32768 /* Validate arguments. */
32769 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
32770 return false;
32772 if (!base_reg_operand (target, GET_MODE (target)))
32773 return false;
32775 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
32776 return false;
32778 /* Allow sign/zero extension. */
32779 if (GET_CODE (mem) == ZERO_EXTEND
32780 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
32781 mem = XEXP (mem, 0);
32783 if (!MEM_P (mem))
32784 return false;
32786 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
32787 return false;
32789 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
32790 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
32791 return false;
32793 /* Validate that the register used to load the high value is either the
32794 register being loaded, or we can safely replace its use.
32796 This function is only called from the peephole2 pass and we assume that
32797 there are 2 instructions in the peephole (addis and load), so we want to
32798 check if the target register was not used in the memory address and the
32799 register to hold the addis result is dead after the peephole. */
32800 if (REGNO (addis_reg) != REGNO (target))
32802 if (reg_mentioned_p (target, mem))
32803 return false;
32805 if (!peep2_reg_dead_p (2, addis_reg))
32806 return false;
32808 /* If the target register being loaded is the stack pointer, we must
32809 avoid loading any other value into it, even temporarily. */
32810 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
32811 return false;
32814 base_reg = XEXP (addr, 0);
32815 return REGNO (addis_reg) == REGNO (base_reg);
32818 /* During the peephole2 pass, adjust and expand the insns for a load fusion
32819 sequence. We adjust the addis register to use the target register. If the
32820 load sign extends, we adjust the code to do the zero extending load, and an
32821 explicit sign extension later since the fusion only covers zero extending
32822 loads.
32824 The operands are:
32825 operands[0] register set with addis (to be replaced with target)
32826 operands[1] value set via addis
32827 operands[2] target register being loaded
32828 operands[3] D-form memory reference using operands[0]. */
32830 void
32831 expand_fusion_gpr_load (rtx *operands)
32833 rtx addis_value = operands[1];
32834 rtx target = operands[2];
32835 rtx orig_mem = operands[3];
32836 rtx new_addr, new_mem, orig_addr, offset;
32837 enum rtx_code plus_or_lo_sum;
32838 enum machine_mode target_mode = GET_MODE (target);
32839 enum machine_mode extend_mode = target_mode;
32840 enum machine_mode ptr_mode = Pmode;
32841 enum rtx_code extend = UNKNOWN;
32843 if (GET_CODE (orig_mem) == ZERO_EXTEND
32844 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
32846 extend = GET_CODE (orig_mem);
32847 orig_mem = XEXP (orig_mem, 0);
32848 target_mode = GET_MODE (orig_mem);
32851 gcc_assert (MEM_P (orig_mem));
32853 orig_addr = XEXP (orig_mem, 0);
32854 plus_or_lo_sum = GET_CODE (orig_addr);
32855 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
32857 offset = XEXP (orig_addr, 1);
32858 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
32859 new_mem = replace_equiv_address_nv (orig_mem, new_addr);
32861 if (extend != UNKNOWN)
32862 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
32864 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
32865 UNSPEC_FUSION_GPR);
32866 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
32868 if (extend == SIGN_EXTEND)
32870 int sub_off = ((BYTES_BIG_ENDIAN)
32871 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
32872 : 0);
32873 rtx sign_reg
32874 = simplify_subreg (target_mode, target, extend_mode, sub_off);
32876 emit_insn (gen_rtx_SET (VOIDmode, target,
32877 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
32880 return;
32883 /* Return a string to fuse an addis instruction with a gpr load to the same
32884 register that we loaded up the addis instruction. The address that is used
32885 is the logical address that was formed during peephole2:
32886 (lo_sum (high) (low-part))
32888 The code is complicated, so we call output_asm_insn directly, and just
32889 return "". */
32891 const char *
32892 emit_fusion_gpr_load (rtx target, rtx mem)
32894 rtx addis_value;
32895 rtx fuse_ops[10];
32896 rtx addr;
32897 rtx load_offset;
32898 const char *addis_str = NULL;
32899 const char *load_str = NULL;
32900 const char *mode_name = NULL;
32901 char insn_template[80];
32902 enum machine_mode mode;
32903 const char *comment_str = ASM_COMMENT_START;
32905 if (GET_CODE (mem) == ZERO_EXTEND)
32906 mem = XEXP (mem, 0);
32908 gcc_assert (REG_P (target) && MEM_P (mem));
32910 if (*comment_str == ' ')
32911 comment_str++;
32913 addr = XEXP (mem, 0);
32914 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
32915 gcc_unreachable ();
32917 addis_value = XEXP (addr, 0);
32918 load_offset = XEXP (addr, 1);
32920 /* Now emit the load instruction to the same register. */
32921 mode = GET_MODE (mem);
32922 switch (mode)
32924 case QImode:
32925 mode_name = "char";
32926 load_str = "lbz";
32927 break;
32929 case HImode:
32930 mode_name = "short";
32931 load_str = "lhz";
32932 break;
32934 case SImode:
32935 mode_name = "int";
32936 load_str = "lwz";
32937 break;
32939 case DImode:
32940 gcc_assert (TARGET_POWERPC64);
32941 mode_name = "long";
32942 load_str = "ld";
32943 break;
32945 default:
32946 gcc_unreachable ();
32949 /* Emit the addis instruction. */
32950 fuse_ops[0] = target;
32951 if (satisfies_constraint_L (addis_value))
32953 fuse_ops[1] = addis_value;
32954 addis_str = "lis %0,%v1";
32957 else if (GET_CODE (addis_value) == PLUS)
32959 rtx op0 = XEXP (addis_value, 0);
32960 rtx op1 = XEXP (addis_value, 1);
32962 if (REG_P (op0) && CONST_INT_P (op1)
32963 && satisfies_constraint_L (op1))
32965 fuse_ops[1] = op0;
32966 fuse_ops[2] = op1;
32967 addis_str = "addis %0,%1,%v2";
32971 else if (GET_CODE (addis_value) == HIGH)
32973 rtx value = XEXP (addis_value, 0);
32974 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
32976 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
32977 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
32978 if (TARGET_ELF)
32979 addis_str = "addis %0,%2,%1@toc@ha";
32981 else if (TARGET_XCOFF)
32982 addis_str = "addis %0,%1@u(%2)";
32984 else
32985 gcc_unreachable ();
32988 else if (GET_CODE (value) == PLUS)
32990 rtx op0 = XEXP (value, 0);
32991 rtx op1 = XEXP (value, 1);
32993 if (GET_CODE (op0) == UNSPEC
32994 && XINT (op0, 1) == UNSPEC_TOCREL
32995 && CONST_INT_P (op1))
32997 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
32998 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
32999 fuse_ops[3] = op1;
33000 if (TARGET_ELF)
33001 addis_str = "addis %0,%2,%1+%3@toc@ha";
33003 else if (TARGET_XCOFF)
33004 addis_str = "addis %0,%1+%3@u(%2)";
33006 else
33007 gcc_unreachable ();
33011 else if (satisfies_constraint_L (value))
33013 fuse_ops[1] = value;
33014 addis_str = "lis %0,%v1";
33017 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33019 fuse_ops[1] = value;
33020 addis_str = "lis %0,%1@ha";
33024 if (!addis_str)
33025 fatal_insn ("Could not generate addis value for fusion", addis_value);
33027 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33028 comment_str, mode_name);
33029 output_asm_insn (insn_template, fuse_ops);
33031 /* Emit the D-form load instruction. */
33032 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33034 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33035 fuse_ops[1] = load_offset;
33036 output_asm_insn (insn_template, fuse_ops);
33039 else if (GET_CODE (load_offset) == UNSPEC
33040 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33042 if (TARGET_ELF)
33043 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33045 else if (TARGET_XCOFF)
33046 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33048 else
33049 gcc_unreachable ();
33051 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33052 output_asm_insn (insn_template, fuse_ops);
33055 else if (GET_CODE (load_offset) == PLUS
33056 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33057 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33058 && CONST_INT_P (XEXP (load_offset, 1)))
33060 rtx tocrel_unspec = XEXP (load_offset, 0);
33061 if (TARGET_ELF)
33062 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33064 else if (TARGET_XCOFF)
33065 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33067 else
33068 gcc_unreachable ();
33070 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33071 fuse_ops[2] = XEXP (load_offset, 1);
33072 output_asm_insn (insn_template, fuse_ops);
33075 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33077 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33079 fuse_ops[1] = load_offset;
33080 output_asm_insn (insn_template, fuse_ops);
33083 else
33084 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33086 return "";
33090 struct gcc_target targetm = TARGET_INITIALIZER;
33092 #include "gt-rs6000.h"