Only allow e500 double in SPE_SIMD_REGNO_P registers.
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
bloba5e5bcf1fef169a161a73d7c13097b992aa5ca69
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "tree.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "print-tree.h"
39 #include "varasm.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "except.h"
43 #include "hashtab.h"
44 #include "hash-set.h"
45 #include "vec.h"
46 #include "machmode.h"
47 #include "input.h"
48 #include "function.h"
49 #include "output.h"
50 #include "dbxout.h"
51 #include "basic-block.h"
52 #include "diagnostic-core.h"
53 #include "toplev.h"
54 #include "ggc.h"
55 #include "tm_p.h"
56 #include "target.h"
57 #include "target-def.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "reload.h"
61 #include "cfgloop.h"
62 #include "sched-int.h"
63 #include "hash-table.h"
64 #include "basic-block.h"
65 #include "tree-ssa-alias.h"
66 #include "internal-fn.h"
67 #include "gimple-fold.h"
68 #include "tree-eh.h"
69 #include "gimple-expr.h"
70 #include "is-a.h"
71 #include "gimple.h"
72 #include "gimplify.h"
73 #include "gimple-iterator.h"
74 #include "gimple-walk.h"
75 #include "intl.h"
76 #include "params.h"
77 #include "tm-constrs.h"
78 #include "ira.h"
79 #include "opts.h"
80 #include "tree-vectorizer.h"
81 #include "dumpfile.h"
82 #include "cgraph.h"
83 #include "target-globals.h"
84 #include "builtins.h"
85 #include "context.h"
86 #include "tree-pass.h"
87 #include "real.h"
88 #if TARGET_XCOFF
89 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
90 #endif
91 #if TARGET_MACHO
92 #include "gstab.h" /* for N_SLINE */
93 #endif
95 #ifndef TARGET_NO_PROTOTYPE
96 #define TARGET_NO_PROTOTYPE 0
97 #endif
99 #define min(A,B) ((A) < (B) ? (A) : (B))
100 #define max(A,B) ((A) > (B) ? (A) : (B))
102 /* Structure used to define the rs6000 stack */
103 typedef struct rs6000_stack {
104 int reload_completed; /* stack info won't change from here on */
105 int first_gp_reg_save; /* first callee saved GP register used */
106 int first_fp_reg_save; /* first callee saved FP register used */
107 int first_altivec_reg_save; /* first callee saved AltiVec register used */
108 int lr_save_p; /* true if the link reg needs to be saved */
109 int cr_save_p; /* true if the CR reg needs to be saved */
110 unsigned int vrsave_mask; /* mask of vec registers to save */
111 int push_p; /* true if we need to allocate stack space */
112 int calls_p; /* true if the function makes any calls */
113 int world_save_p; /* true if we're saving *everything*:
114 r13-r31, cr, f14-f31, vrsave, v20-v31 */
115 enum rs6000_abi abi; /* which ABI to use */
116 int gp_save_offset; /* offset to save GP regs from initial SP */
117 int fp_save_offset; /* offset to save FP regs from initial SP */
118 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
119 int lr_save_offset; /* offset to save LR from initial SP */
120 int cr_save_offset; /* offset to save CR from initial SP */
121 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
122 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
123 int varargs_save_offset; /* offset to save the varargs registers */
124 int ehrd_offset; /* offset to EH return data */
125 int ehcr_offset; /* offset to EH CR field data */
126 int reg_size; /* register size (4 or 8) */
127 HOST_WIDE_INT vars_size; /* variable save area size */
128 int parm_size; /* outgoing parameter size */
129 int save_size; /* save area size */
130 int fixed_size; /* fixed size of stack frame */
131 int gp_size; /* size of saved GP registers */
132 int fp_size; /* size of saved FP registers */
133 int altivec_size; /* size of saved AltiVec registers */
134 int cr_size; /* size to hold CR if not in save_size */
135 int vrsave_size; /* size to hold VRSAVE if not in save_size */
136 int altivec_padding_size; /* size of altivec alignment padding if
137 not in save_size */
138 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
139 int spe_padding_size;
140 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
141 int spe_64bit_regs_used;
142 int savres_strategy;
143 } rs6000_stack_t;
145 /* A C structure for machine-specific, per-function data.
146 This is added to the cfun structure. */
147 typedef struct GTY(()) machine_function
149 /* Whether the instruction chain has been scanned already. */
150 int insn_chain_scanned_p;
151 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
152 int ra_needs_full_frame;
153 /* Flags if __builtin_return_address (0) was used. */
154 int ra_need_lr;
155 /* Cache lr_save_p after expansion of builtin_eh_return. */
156 int lr_save_state;
157 /* Whether we need to save the TOC to the reserved stack location in the
158 function prologue. */
159 bool save_toc_in_prologue;
160 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
161 varargs save area. */
162 HOST_WIDE_INT varargs_save_offset;
163 /* Temporary stack slot to use for SDmode copies. This slot is
164 64-bits wide and is allocated early enough so that the offset
165 does not overflow the 16-bit load/store offset field. */
166 rtx sdmode_stack_slot;
167 /* Flag if r2 setup is needed with ELFv2 ABI. */
168 bool r2_setup_needed;
169 } machine_function;
171 /* Support targetm.vectorize.builtin_mask_for_load. */
172 static GTY(()) tree altivec_builtin_mask_for_load;
174 /* Set to nonzero once AIX common-mode calls have been defined. */
175 static GTY(()) int common_mode_defined;
177 /* Label number of label created for -mrelocatable, to call to so we can
178 get the address of the GOT section */
179 static int rs6000_pic_labelno;
181 #ifdef USING_ELFOS_H
182 /* Counter for labels which are to be placed in .fixup. */
183 int fixuplabelno = 0;
184 #endif
186 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
187 int dot_symbols;
189 /* Specify the machine mode that pointers have. After generation of rtl, the
190 compiler makes no further distinction between pointers and any other objects
191 of this machine mode. The type is unsigned since not all things that
192 include rs6000.h also include machmode.h. */
193 unsigned rs6000_pmode;
195 /* Width in bits of a pointer. */
196 unsigned rs6000_pointer_size;
198 #ifdef HAVE_AS_GNU_ATTRIBUTE
199 /* Flag whether floating point values have been passed/returned. */
200 static bool rs6000_passes_float;
201 /* Flag whether vector values have been passed/returned. */
202 static bool rs6000_passes_vector;
203 /* Flag whether small (<= 8 byte) structures have been returned. */
204 static bool rs6000_returns_struct;
205 #endif
207 /* Value is TRUE if register/mode pair is acceptable. */
208 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
210 /* Maximum number of registers needed for a given register class and mode. */
211 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
213 /* How many registers are needed for a given register and mode. */
214 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
216 /* Map register number to register class. */
217 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
219 static int dbg_cost_ctrl;
221 /* Built in types. */
222 tree rs6000_builtin_types[RS6000_BTI_MAX];
223 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
225 /* Flag to say the TOC is initialized */
226 int toc_initialized;
227 char toc_label_name[10];
229 /* Cached value of rs6000_variable_issue. This is cached in
230 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
231 static short cached_can_issue_more;
233 static GTY(()) section *read_only_data_section;
234 static GTY(()) section *private_data_section;
235 static GTY(()) section *tls_data_section;
236 static GTY(()) section *tls_private_data_section;
237 static GTY(()) section *read_only_private_data_section;
238 static GTY(()) section *sdata2_section;
239 static GTY(()) section *toc_section;
241 struct builtin_description
243 const HOST_WIDE_INT mask;
244 const enum insn_code icode;
245 const char *const name;
246 const enum rs6000_builtins code;
249 /* Describe the vector unit used for modes. */
250 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
251 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
253 /* Register classes for various constraints that are based on the target
254 switches. */
255 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
257 /* Describe the alignment of a vector. */
258 int rs6000_vector_align[NUM_MACHINE_MODES];
260 /* Map selected modes to types for builtins. */
261 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
263 /* What modes to automatically generate reciprocal divide estimate (fre) and
264 reciprocal sqrt (frsqrte) for. */
265 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
267 /* Masks to determine which reciprocal esitmate instructions to generate
268 automatically. */
269 enum rs6000_recip_mask {
270 RECIP_SF_DIV = 0x001, /* Use divide estimate */
271 RECIP_DF_DIV = 0x002,
272 RECIP_V4SF_DIV = 0x004,
273 RECIP_V2DF_DIV = 0x008,
275 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
276 RECIP_DF_RSQRT = 0x020,
277 RECIP_V4SF_RSQRT = 0x040,
278 RECIP_V2DF_RSQRT = 0x080,
280 /* Various combination of flags for -mrecip=xxx. */
281 RECIP_NONE = 0,
282 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
283 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
284 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
286 RECIP_HIGH_PRECISION = RECIP_ALL,
288 /* On low precision machines like the power5, don't enable double precision
289 reciprocal square root estimate, since it isn't accurate enough. */
290 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
293 /* -mrecip options. */
294 static struct
296 const char *string; /* option name */
297 unsigned int mask; /* mask bits to set */
298 } recip_options[] = {
299 { "all", RECIP_ALL },
300 { "none", RECIP_NONE },
301 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
302 | RECIP_V2DF_DIV) },
303 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
304 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
305 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
306 | RECIP_V2DF_RSQRT) },
307 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
308 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
311 /* Pointer to function (in rs6000-c.c) that can define or undefine target
312 macros that have changed. Languages that don't support the preprocessor
313 don't link in rs6000-c.c, so we can't call it directly. */
314 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
316 /* Simplfy register classes into simpler classifications. We assume
317 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
318 check for standard register classes (gpr/floating/altivec/vsx) and
319 floating/vector classes (float/altivec/vsx). */
321 enum rs6000_reg_type {
322 NO_REG_TYPE,
323 PSEUDO_REG_TYPE,
324 GPR_REG_TYPE,
325 VSX_REG_TYPE,
326 ALTIVEC_REG_TYPE,
327 FPR_REG_TYPE,
328 SPR_REG_TYPE,
329 CR_REG_TYPE,
330 SPE_ACC_TYPE,
331 SPEFSCR_REG_TYPE
334 /* Map register class to register type. */
335 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
337 /* First/last register type for the 'normal' register types (i.e. general
338 purpose, floating point, altivec, and VSX registers). */
339 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
341 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
344 /* Register classes we care about in secondary reload or go if legitimate
345 address. We only need to worry about GPR, FPR, and Altivec registers here,
346 along an ANY field that is the OR of the 3 register classes. */
348 enum rs6000_reload_reg_type {
349 RELOAD_REG_GPR, /* General purpose registers. */
350 RELOAD_REG_FPR, /* Traditional floating point regs. */
351 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
352 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
353 N_RELOAD_REG
356 /* For setting up register classes, loop through the 3 register classes mapping
357 into real registers, and skip the ANY class, which is just an OR of the
358 bits. */
359 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
360 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
362 /* Map reload register type to a register in the register class. */
363 struct reload_reg_map_type {
364 const char *name; /* Register class name. */
365 int reg; /* Register in the register class. */
368 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
369 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
370 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
371 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
372 { "Any", -1 }, /* RELOAD_REG_ANY. */
375 /* Mask bits for each register class, indexed per mode. Historically the
376 compiler has been more restrictive which types can do PRE_MODIFY instead of
377 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
378 typedef unsigned char addr_mask_type;
380 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
381 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
382 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
383 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
384 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
385 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
387 /* Register type masks based on the type, of valid addressing modes. */
388 struct rs6000_reg_addr {
389 enum insn_code reload_load; /* INSN to reload for loading. */
390 enum insn_code reload_store; /* INSN to reload for storing. */
391 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
392 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
393 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
394 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
395 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
398 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
400 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
401 static inline bool
402 mode_supports_pre_incdec_p (enum machine_mode mode)
404 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
405 != 0);
408 /* Helper function to say whether a mode supports PRE_MODIFY. */
409 static inline bool
410 mode_supports_pre_modify_p (enum machine_mode mode)
412 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
413 != 0);
417 /* Target cpu costs. */
419 struct processor_costs {
420 const int mulsi; /* cost of SImode multiplication. */
421 const int mulsi_const; /* cost of SImode multiplication by constant. */
422 const int mulsi_const9; /* cost of SImode mult by short constant. */
423 const int muldi; /* cost of DImode multiplication. */
424 const int divsi; /* cost of SImode division. */
425 const int divdi; /* cost of DImode division. */
426 const int fp; /* cost of simple SFmode and DFmode insns. */
427 const int dmul; /* cost of DFmode multiplication (and fmadd). */
428 const int sdiv; /* cost of SFmode division (fdivs). */
429 const int ddiv; /* cost of DFmode division (fdiv). */
430 const int cache_line_size; /* cache line size in bytes. */
431 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
432 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
433 const int simultaneous_prefetches; /* number of parallel prefetch
434 operations. */
437 const struct processor_costs *rs6000_cost;
439 /* Processor costs (relative to an add) */
441 /* Instruction size costs on 32bit processors. */
442 static const
443 struct processor_costs size32_cost = {
444 COSTS_N_INSNS (1), /* mulsi */
445 COSTS_N_INSNS (1), /* mulsi_const */
446 COSTS_N_INSNS (1), /* mulsi_const9 */
447 COSTS_N_INSNS (1), /* muldi */
448 COSTS_N_INSNS (1), /* divsi */
449 COSTS_N_INSNS (1), /* divdi */
450 COSTS_N_INSNS (1), /* fp */
451 COSTS_N_INSNS (1), /* dmul */
452 COSTS_N_INSNS (1), /* sdiv */
453 COSTS_N_INSNS (1), /* ddiv */
460 /* Instruction size costs on 64bit processors. */
461 static const
462 struct processor_costs size64_cost = {
463 COSTS_N_INSNS (1), /* mulsi */
464 COSTS_N_INSNS (1), /* mulsi_const */
465 COSTS_N_INSNS (1), /* mulsi_const9 */
466 COSTS_N_INSNS (1), /* muldi */
467 COSTS_N_INSNS (1), /* divsi */
468 COSTS_N_INSNS (1), /* divdi */
469 COSTS_N_INSNS (1), /* fp */
470 COSTS_N_INSNS (1), /* dmul */
471 COSTS_N_INSNS (1), /* sdiv */
472 COSTS_N_INSNS (1), /* ddiv */
473 128,
479 /* Instruction costs on RS64A processors. */
480 static const
481 struct processor_costs rs64a_cost = {
482 COSTS_N_INSNS (20), /* mulsi */
483 COSTS_N_INSNS (12), /* mulsi_const */
484 COSTS_N_INSNS (8), /* mulsi_const9 */
485 COSTS_N_INSNS (34), /* muldi */
486 COSTS_N_INSNS (65), /* divsi */
487 COSTS_N_INSNS (67), /* divdi */
488 COSTS_N_INSNS (4), /* fp */
489 COSTS_N_INSNS (4), /* dmul */
490 COSTS_N_INSNS (31), /* sdiv */
491 COSTS_N_INSNS (31), /* ddiv */
492 128, /* cache line size */
493 128, /* l1 cache */
494 2048, /* l2 cache */
495 1, /* streams */
498 /* Instruction costs on MPCCORE processors. */
499 static const
500 struct processor_costs mpccore_cost = {
501 COSTS_N_INSNS (2), /* mulsi */
502 COSTS_N_INSNS (2), /* mulsi_const */
503 COSTS_N_INSNS (2), /* mulsi_const9 */
504 COSTS_N_INSNS (2), /* muldi */
505 COSTS_N_INSNS (6), /* divsi */
506 COSTS_N_INSNS (6), /* divdi */
507 COSTS_N_INSNS (4), /* fp */
508 COSTS_N_INSNS (5), /* dmul */
509 COSTS_N_INSNS (10), /* sdiv */
510 COSTS_N_INSNS (17), /* ddiv */
511 32, /* cache line size */
512 4, /* l1 cache */
513 16, /* l2 cache */
514 1, /* streams */
517 /* Instruction costs on PPC403 processors. */
518 static const
519 struct processor_costs ppc403_cost = {
520 COSTS_N_INSNS (4), /* mulsi */
521 COSTS_N_INSNS (4), /* mulsi_const */
522 COSTS_N_INSNS (4), /* mulsi_const9 */
523 COSTS_N_INSNS (4), /* muldi */
524 COSTS_N_INSNS (33), /* divsi */
525 COSTS_N_INSNS (33), /* divdi */
526 COSTS_N_INSNS (11), /* fp */
527 COSTS_N_INSNS (11), /* dmul */
528 COSTS_N_INSNS (11), /* sdiv */
529 COSTS_N_INSNS (11), /* ddiv */
530 32, /* cache line size */
531 4, /* l1 cache */
532 16, /* l2 cache */
533 1, /* streams */
536 /* Instruction costs on PPC405 processors. */
537 static const
538 struct processor_costs ppc405_cost = {
539 COSTS_N_INSNS (5), /* mulsi */
540 COSTS_N_INSNS (4), /* mulsi_const */
541 COSTS_N_INSNS (3), /* mulsi_const9 */
542 COSTS_N_INSNS (5), /* muldi */
543 COSTS_N_INSNS (35), /* divsi */
544 COSTS_N_INSNS (35), /* divdi */
545 COSTS_N_INSNS (11), /* fp */
546 COSTS_N_INSNS (11), /* dmul */
547 COSTS_N_INSNS (11), /* sdiv */
548 COSTS_N_INSNS (11), /* ddiv */
549 32, /* cache line size */
550 16, /* l1 cache */
551 128, /* l2 cache */
552 1, /* streams */
555 /* Instruction costs on PPC440 processors. */
556 static const
557 struct processor_costs ppc440_cost = {
558 COSTS_N_INSNS (3), /* mulsi */
559 COSTS_N_INSNS (2), /* mulsi_const */
560 COSTS_N_INSNS (2), /* mulsi_const9 */
561 COSTS_N_INSNS (3), /* muldi */
562 COSTS_N_INSNS (34), /* divsi */
563 COSTS_N_INSNS (34), /* divdi */
564 COSTS_N_INSNS (5), /* fp */
565 COSTS_N_INSNS (5), /* dmul */
566 COSTS_N_INSNS (19), /* sdiv */
567 COSTS_N_INSNS (33), /* ddiv */
568 32, /* cache line size */
569 32, /* l1 cache */
570 256, /* l2 cache */
571 1, /* streams */
574 /* Instruction costs on PPC476 processors. */
575 static const
576 struct processor_costs ppc476_cost = {
577 COSTS_N_INSNS (4), /* mulsi */
578 COSTS_N_INSNS (4), /* mulsi_const */
579 COSTS_N_INSNS (4), /* mulsi_const9 */
580 COSTS_N_INSNS (4), /* muldi */
581 COSTS_N_INSNS (11), /* divsi */
582 COSTS_N_INSNS (11), /* divdi */
583 COSTS_N_INSNS (6), /* fp */
584 COSTS_N_INSNS (6), /* dmul */
585 COSTS_N_INSNS (19), /* sdiv */
586 COSTS_N_INSNS (33), /* ddiv */
587 32, /* l1 cache line size */
588 32, /* l1 cache */
589 512, /* l2 cache */
590 1, /* streams */
593 /* Instruction costs on PPC601 processors. */
594 static const
595 struct processor_costs ppc601_cost = {
596 COSTS_N_INSNS (5), /* mulsi */
597 COSTS_N_INSNS (5), /* mulsi_const */
598 COSTS_N_INSNS (5), /* mulsi_const9 */
599 COSTS_N_INSNS (5), /* muldi */
600 COSTS_N_INSNS (36), /* divsi */
601 COSTS_N_INSNS (36), /* divdi */
602 COSTS_N_INSNS (4), /* fp */
603 COSTS_N_INSNS (5), /* dmul */
604 COSTS_N_INSNS (17), /* sdiv */
605 COSTS_N_INSNS (31), /* ddiv */
606 32, /* cache line size */
607 32, /* l1 cache */
608 256, /* l2 cache */
609 1, /* streams */
612 /* Instruction costs on PPC603 processors. */
613 static const
614 struct processor_costs ppc603_cost = {
615 COSTS_N_INSNS (5), /* mulsi */
616 COSTS_N_INSNS (3), /* mulsi_const */
617 COSTS_N_INSNS (2), /* mulsi_const9 */
618 COSTS_N_INSNS (5), /* muldi */
619 COSTS_N_INSNS (37), /* divsi */
620 COSTS_N_INSNS (37), /* divdi */
621 COSTS_N_INSNS (3), /* fp */
622 COSTS_N_INSNS (4), /* dmul */
623 COSTS_N_INSNS (18), /* sdiv */
624 COSTS_N_INSNS (33), /* ddiv */
625 32, /* cache line size */
626 8, /* l1 cache */
627 64, /* l2 cache */
628 1, /* streams */
631 /* Instruction costs on PPC604 processors. */
632 static const
633 struct processor_costs ppc604_cost = {
634 COSTS_N_INSNS (4), /* mulsi */
635 COSTS_N_INSNS (4), /* mulsi_const */
636 COSTS_N_INSNS (4), /* mulsi_const9 */
637 COSTS_N_INSNS (4), /* muldi */
638 COSTS_N_INSNS (20), /* divsi */
639 COSTS_N_INSNS (20), /* divdi */
640 COSTS_N_INSNS (3), /* fp */
641 COSTS_N_INSNS (3), /* dmul */
642 COSTS_N_INSNS (18), /* sdiv */
643 COSTS_N_INSNS (32), /* ddiv */
644 32, /* cache line size */
645 16, /* l1 cache */
646 512, /* l2 cache */
647 1, /* streams */
650 /* Instruction costs on PPC604e processors. */
651 static const
652 struct processor_costs ppc604e_cost = {
653 COSTS_N_INSNS (2), /* mulsi */
654 COSTS_N_INSNS (2), /* mulsi_const */
655 COSTS_N_INSNS (2), /* mulsi_const9 */
656 COSTS_N_INSNS (2), /* muldi */
657 COSTS_N_INSNS (20), /* divsi */
658 COSTS_N_INSNS (20), /* divdi */
659 COSTS_N_INSNS (3), /* fp */
660 COSTS_N_INSNS (3), /* dmul */
661 COSTS_N_INSNS (18), /* sdiv */
662 COSTS_N_INSNS (32), /* ddiv */
663 32, /* cache line size */
664 32, /* l1 cache */
665 1024, /* l2 cache */
666 1, /* streams */
669 /* Instruction costs on PPC620 processors. */
670 static const
671 struct processor_costs ppc620_cost = {
672 COSTS_N_INSNS (5), /* mulsi */
673 COSTS_N_INSNS (4), /* mulsi_const */
674 COSTS_N_INSNS (3), /* mulsi_const9 */
675 COSTS_N_INSNS (7), /* muldi */
676 COSTS_N_INSNS (21), /* divsi */
677 COSTS_N_INSNS (37), /* divdi */
678 COSTS_N_INSNS (3), /* fp */
679 COSTS_N_INSNS (3), /* dmul */
680 COSTS_N_INSNS (18), /* sdiv */
681 COSTS_N_INSNS (32), /* ddiv */
682 128, /* cache line size */
683 32, /* l1 cache */
684 1024, /* l2 cache */
685 1, /* streams */
688 /* Instruction costs on PPC630 processors. */
689 static const
690 struct processor_costs ppc630_cost = {
691 COSTS_N_INSNS (5), /* mulsi */
692 COSTS_N_INSNS (4), /* mulsi_const */
693 COSTS_N_INSNS (3), /* mulsi_const9 */
694 COSTS_N_INSNS (7), /* muldi */
695 COSTS_N_INSNS (21), /* divsi */
696 COSTS_N_INSNS (37), /* divdi */
697 COSTS_N_INSNS (3), /* fp */
698 COSTS_N_INSNS (3), /* dmul */
699 COSTS_N_INSNS (17), /* sdiv */
700 COSTS_N_INSNS (21), /* ddiv */
701 128, /* cache line size */
702 64, /* l1 cache */
703 1024, /* l2 cache */
704 1, /* streams */
707 /* Instruction costs on Cell processor. */
708 /* COSTS_N_INSNS (1) ~ one add. */
709 static const
710 struct processor_costs ppccell_cost = {
711 COSTS_N_INSNS (9/2)+2, /* mulsi */
712 COSTS_N_INSNS (6/2), /* mulsi_const */
713 COSTS_N_INSNS (6/2), /* mulsi_const9 */
714 COSTS_N_INSNS (15/2)+2, /* muldi */
715 COSTS_N_INSNS (38/2), /* divsi */
716 COSTS_N_INSNS (70/2), /* divdi */
717 COSTS_N_INSNS (10/2), /* fp */
718 COSTS_N_INSNS (10/2), /* dmul */
719 COSTS_N_INSNS (74/2), /* sdiv */
720 COSTS_N_INSNS (74/2), /* ddiv */
721 128, /* cache line size */
722 32, /* l1 cache */
723 512, /* l2 cache */
724 6, /* streams */
727 /* Instruction costs on PPC750 and PPC7400 processors. */
728 static const
729 struct processor_costs ppc750_cost = {
730 COSTS_N_INSNS (5), /* mulsi */
731 COSTS_N_INSNS (3), /* mulsi_const */
732 COSTS_N_INSNS (2), /* mulsi_const9 */
733 COSTS_N_INSNS (5), /* muldi */
734 COSTS_N_INSNS (17), /* divsi */
735 COSTS_N_INSNS (17), /* divdi */
736 COSTS_N_INSNS (3), /* fp */
737 COSTS_N_INSNS (3), /* dmul */
738 COSTS_N_INSNS (17), /* sdiv */
739 COSTS_N_INSNS (31), /* ddiv */
740 32, /* cache line size */
741 32, /* l1 cache */
742 512, /* l2 cache */
743 1, /* streams */
746 /* Instruction costs on PPC7450 processors. */
747 static const
748 struct processor_costs ppc7450_cost = {
749 COSTS_N_INSNS (4), /* mulsi */
750 COSTS_N_INSNS (3), /* mulsi_const */
751 COSTS_N_INSNS (3), /* mulsi_const9 */
752 COSTS_N_INSNS (4), /* muldi */
753 COSTS_N_INSNS (23), /* divsi */
754 COSTS_N_INSNS (23), /* divdi */
755 COSTS_N_INSNS (5), /* fp */
756 COSTS_N_INSNS (5), /* dmul */
757 COSTS_N_INSNS (21), /* sdiv */
758 COSTS_N_INSNS (35), /* ddiv */
759 32, /* cache line size */
760 32, /* l1 cache */
761 1024, /* l2 cache */
762 1, /* streams */
765 /* Instruction costs on PPC8540 processors. */
766 static const
767 struct processor_costs ppc8540_cost = {
768 COSTS_N_INSNS (4), /* mulsi */
769 COSTS_N_INSNS (4), /* mulsi_const */
770 COSTS_N_INSNS (4), /* mulsi_const9 */
771 COSTS_N_INSNS (4), /* muldi */
772 COSTS_N_INSNS (19), /* divsi */
773 COSTS_N_INSNS (19), /* divdi */
774 COSTS_N_INSNS (4), /* fp */
775 COSTS_N_INSNS (4), /* dmul */
776 COSTS_N_INSNS (29), /* sdiv */
777 COSTS_N_INSNS (29), /* ddiv */
778 32, /* cache line size */
779 32, /* l1 cache */
780 256, /* l2 cache */
781 1, /* prefetch streams /*/
784 /* Instruction costs on E300C2 and E300C3 cores. */
785 static const
786 struct processor_costs ppce300c2c3_cost = {
787 COSTS_N_INSNS (4), /* mulsi */
788 COSTS_N_INSNS (4), /* mulsi_const */
789 COSTS_N_INSNS (4), /* mulsi_const9 */
790 COSTS_N_INSNS (4), /* muldi */
791 COSTS_N_INSNS (19), /* divsi */
792 COSTS_N_INSNS (19), /* divdi */
793 COSTS_N_INSNS (3), /* fp */
794 COSTS_N_INSNS (4), /* dmul */
795 COSTS_N_INSNS (18), /* sdiv */
796 COSTS_N_INSNS (33), /* ddiv */
798 16, /* l1 cache */
799 16, /* l2 cache */
800 1, /* prefetch streams /*/
803 /* Instruction costs on PPCE500MC processors. */
804 static const
805 struct processor_costs ppce500mc_cost = {
806 COSTS_N_INSNS (4), /* mulsi */
807 COSTS_N_INSNS (4), /* mulsi_const */
808 COSTS_N_INSNS (4), /* mulsi_const9 */
809 COSTS_N_INSNS (4), /* muldi */
810 COSTS_N_INSNS (14), /* divsi */
811 COSTS_N_INSNS (14), /* divdi */
812 COSTS_N_INSNS (8), /* fp */
813 COSTS_N_INSNS (10), /* dmul */
814 COSTS_N_INSNS (36), /* sdiv */
815 COSTS_N_INSNS (66), /* ddiv */
816 64, /* cache line size */
817 32, /* l1 cache */
818 128, /* l2 cache */
819 1, /* prefetch streams /*/
822 /* Instruction costs on PPCE500MC64 processors. */
823 static const
824 struct processor_costs ppce500mc64_cost = {
825 COSTS_N_INSNS (4), /* mulsi */
826 COSTS_N_INSNS (4), /* mulsi_const */
827 COSTS_N_INSNS (4), /* mulsi_const9 */
828 COSTS_N_INSNS (4), /* muldi */
829 COSTS_N_INSNS (14), /* divsi */
830 COSTS_N_INSNS (14), /* divdi */
831 COSTS_N_INSNS (4), /* fp */
832 COSTS_N_INSNS (10), /* dmul */
833 COSTS_N_INSNS (36), /* sdiv */
834 COSTS_N_INSNS (66), /* ddiv */
835 64, /* cache line size */
836 32, /* l1 cache */
837 128, /* l2 cache */
838 1, /* prefetch streams /*/
841 /* Instruction costs on PPCE5500 processors. */
842 static const
843 struct processor_costs ppce5500_cost = {
844 COSTS_N_INSNS (5), /* mulsi */
845 COSTS_N_INSNS (5), /* mulsi_const */
846 COSTS_N_INSNS (4), /* mulsi_const9 */
847 COSTS_N_INSNS (5), /* muldi */
848 COSTS_N_INSNS (14), /* divsi */
849 COSTS_N_INSNS (14), /* divdi */
850 COSTS_N_INSNS (7), /* fp */
851 COSTS_N_INSNS (10), /* dmul */
852 COSTS_N_INSNS (36), /* sdiv */
853 COSTS_N_INSNS (66), /* ddiv */
854 64, /* cache line size */
855 32, /* l1 cache */
856 128, /* l2 cache */
857 1, /* prefetch streams /*/
860 /* Instruction costs on PPCE6500 processors. */
861 static const
862 struct processor_costs ppce6500_cost = {
863 COSTS_N_INSNS (5), /* mulsi */
864 COSTS_N_INSNS (5), /* mulsi_const */
865 COSTS_N_INSNS (4), /* mulsi_const9 */
866 COSTS_N_INSNS (5), /* muldi */
867 COSTS_N_INSNS (14), /* divsi */
868 COSTS_N_INSNS (14), /* divdi */
869 COSTS_N_INSNS (7), /* fp */
870 COSTS_N_INSNS (10), /* dmul */
871 COSTS_N_INSNS (36), /* sdiv */
872 COSTS_N_INSNS (66), /* ddiv */
873 64, /* cache line size */
874 32, /* l1 cache */
875 128, /* l2 cache */
876 1, /* prefetch streams /*/
879 /* Instruction costs on AppliedMicro Titan processors. */
880 static const
881 struct processor_costs titan_cost = {
882 COSTS_N_INSNS (5), /* mulsi */
883 COSTS_N_INSNS (5), /* mulsi_const */
884 COSTS_N_INSNS (5), /* mulsi_const9 */
885 COSTS_N_INSNS (5), /* muldi */
886 COSTS_N_INSNS (18), /* divsi */
887 COSTS_N_INSNS (18), /* divdi */
888 COSTS_N_INSNS (10), /* fp */
889 COSTS_N_INSNS (10), /* dmul */
890 COSTS_N_INSNS (46), /* sdiv */
891 COSTS_N_INSNS (72), /* ddiv */
892 32, /* cache line size */
893 32, /* l1 cache */
894 512, /* l2 cache */
895 1, /* prefetch streams /*/
898 /* Instruction costs on POWER4 and POWER5 processors. */
899 static const
900 struct processor_costs power4_cost = {
901 COSTS_N_INSNS (3), /* mulsi */
902 COSTS_N_INSNS (2), /* mulsi_const */
903 COSTS_N_INSNS (2), /* mulsi_const9 */
904 COSTS_N_INSNS (4), /* muldi */
905 COSTS_N_INSNS (18), /* divsi */
906 COSTS_N_INSNS (34), /* divdi */
907 COSTS_N_INSNS (3), /* fp */
908 COSTS_N_INSNS (3), /* dmul */
909 COSTS_N_INSNS (17), /* sdiv */
910 COSTS_N_INSNS (17), /* ddiv */
911 128, /* cache line size */
912 32, /* l1 cache */
913 1024, /* l2 cache */
914 8, /* prefetch streams /*/
917 /* Instruction costs on POWER6 processors. */
918 static const
919 struct processor_costs power6_cost = {
920 COSTS_N_INSNS (8), /* mulsi */
921 COSTS_N_INSNS (8), /* mulsi_const */
922 COSTS_N_INSNS (8), /* mulsi_const9 */
923 COSTS_N_INSNS (8), /* muldi */
924 COSTS_N_INSNS (22), /* divsi */
925 COSTS_N_INSNS (28), /* divdi */
926 COSTS_N_INSNS (3), /* fp */
927 COSTS_N_INSNS (3), /* dmul */
928 COSTS_N_INSNS (13), /* sdiv */
929 COSTS_N_INSNS (16), /* ddiv */
930 128, /* cache line size */
931 64, /* l1 cache */
932 2048, /* l2 cache */
933 16, /* prefetch streams */
936 /* Instruction costs on POWER7 processors. */
937 static const
938 struct processor_costs power7_cost = {
939 COSTS_N_INSNS (2), /* mulsi */
940 COSTS_N_INSNS (2), /* mulsi_const */
941 COSTS_N_INSNS (2), /* mulsi_const9 */
942 COSTS_N_INSNS (2), /* muldi */
943 COSTS_N_INSNS (18), /* divsi */
944 COSTS_N_INSNS (34), /* divdi */
945 COSTS_N_INSNS (3), /* fp */
946 COSTS_N_INSNS (3), /* dmul */
947 COSTS_N_INSNS (13), /* sdiv */
948 COSTS_N_INSNS (16), /* ddiv */
949 128, /* cache line size */
950 32, /* l1 cache */
951 256, /* l2 cache */
952 12, /* prefetch streams */
955 /* Instruction costs on POWER8 processors. */
956 static const
957 struct processor_costs power8_cost = {
958 COSTS_N_INSNS (3), /* mulsi */
959 COSTS_N_INSNS (3), /* mulsi_const */
960 COSTS_N_INSNS (3), /* mulsi_const9 */
961 COSTS_N_INSNS (3), /* muldi */
962 COSTS_N_INSNS (19), /* divsi */
963 COSTS_N_INSNS (35), /* divdi */
964 COSTS_N_INSNS (3), /* fp */
965 COSTS_N_INSNS (3), /* dmul */
966 COSTS_N_INSNS (14), /* sdiv */
967 COSTS_N_INSNS (17), /* ddiv */
968 128, /* cache line size */
969 32, /* l1 cache */
970 256, /* l2 cache */
971 12, /* prefetch streams */
974 /* Instruction costs on POWER A2 processors. */
975 static const
976 struct processor_costs ppca2_cost = {
977 COSTS_N_INSNS (16), /* mulsi */
978 COSTS_N_INSNS (16), /* mulsi_const */
979 COSTS_N_INSNS (16), /* mulsi_const9 */
980 COSTS_N_INSNS (16), /* muldi */
981 COSTS_N_INSNS (22), /* divsi */
982 COSTS_N_INSNS (28), /* divdi */
983 COSTS_N_INSNS (3), /* fp */
984 COSTS_N_INSNS (3), /* dmul */
985 COSTS_N_INSNS (59), /* sdiv */
986 COSTS_N_INSNS (72), /* ddiv */
988 16, /* l1 cache */
989 2048, /* l2 cache */
990 16, /* prefetch streams */
994 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
995 #undef RS6000_BUILTIN_1
996 #undef RS6000_BUILTIN_2
997 #undef RS6000_BUILTIN_3
998 #undef RS6000_BUILTIN_A
999 #undef RS6000_BUILTIN_D
1000 #undef RS6000_BUILTIN_E
1001 #undef RS6000_BUILTIN_H
1002 #undef RS6000_BUILTIN_P
1003 #undef RS6000_BUILTIN_Q
1004 #undef RS6000_BUILTIN_S
1005 #undef RS6000_BUILTIN_X
1007 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1008 { NAME, ICODE, MASK, ATTR },
1010 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1011 { NAME, ICODE, MASK, ATTR },
1013 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1014 { NAME, ICODE, MASK, ATTR },
1016 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1017 { NAME, ICODE, MASK, ATTR },
1019 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1020 { NAME, ICODE, MASK, ATTR },
1022 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1023 { NAME, ICODE, MASK, ATTR },
1025 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1026 { NAME, ICODE, MASK, ATTR },
1028 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1029 { NAME, ICODE, MASK, ATTR },
1031 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1032 { NAME, ICODE, MASK, ATTR },
1034 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1035 { NAME, ICODE, MASK, ATTR },
1037 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1038 { NAME, ICODE, MASK, ATTR },
1040 struct rs6000_builtin_info_type {
1041 const char *name;
1042 const enum insn_code icode;
1043 const HOST_WIDE_INT mask;
1044 const unsigned attr;
1047 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1049 #include "rs6000-builtin.def"
1052 #undef RS6000_BUILTIN_1
1053 #undef RS6000_BUILTIN_2
1054 #undef RS6000_BUILTIN_3
1055 #undef RS6000_BUILTIN_A
1056 #undef RS6000_BUILTIN_D
1057 #undef RS6000_BUILTIN_E
1058 #undef RS6000_BUILTIN_H
1059 #undef RS6000_BUILTIN_P
1060 #undef RS6000_BUILTIN_Q
1061 #undef RS6000_BUILTIN_S
1062 #undef RS6000_BUILTIN_X
1064 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1065 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1068 static bool rs6000_debug_legitimate_address_p (enum machine_mode, rtx, bool);
1069 static bool spe_func_has_64bit_regs_p (void);
1070 static struct machine_function * rs6000_init_machine_status (void);
1071 static int rs6000_ra_ever_killed (void);
1072 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1073 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1074 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1075 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1076 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1077 static int rs6000_memory_move_cost (enum machine_mode, reg_class_t, bool);
1078 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1079 static int rs6000_debug_address_cost (rtx, enum machine_mode, addr_space_t,
1080 bool);
1081 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1082 static bool is_microcoded_insn (rtx_insn *);
1083 static bool is_nonpipeline_insn (rtx_insn *);
1084 static bool is_cracked_insn (rtx_insn *);
1085 static bool is_load_insn (rtx, rtx *);
1086 static bool is_store_insn (rtx, rtx *);
1087 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1088 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1089 static bool insn_must_be_first_in_group (rtx_insn *);
1090 static bool insn_must_be_last_in_group (rtx_insn *);
1091 static void altivec_init_builtins (void);
1092 static tree builtin_function_type (enum machine_mode, enum machine_mode,
1093 enum machine_mode, enum machine_mode,
1094 enum rs6000_builtins, const char *name);
1095 static void rs6000_common_init_builtins (void);
1096 static void paired_init_builtins (void);
1097 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1098 static void spe_init_builtins (void);
1099 static void htm_init_builtins (void);
1100 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1101 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1102 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1103 static rs6000_stack_t *rs6000_stack_info (void);
1104 static void is_altivec_return_reg (rtx, void *);
1105 int easy_vector_constant (rtx, enum machine_mode);
1106 static rtx rs6000_debug_legitimize_address (rtx, rtx, enum machine_mode);
1107 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1108 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1109 bool, bool);
1110 #if TARGET_MACHO
1111 static void macho_branch_islands (void);
1112 #endif
1113 static rtx rs6000_legitimize_reload_address (rtx, enum machine_mode, int, int,
1114 int, int *);
1115 static rtx rs6000_debug_legitimize_reload_address (rtx, enum machine_mode, int,
1116 int, int, int *);
1117 static bool rs6000_mode_dependent_address (const_rtx);
1118 static bool rs6000_debug_mode_dependent_address (const_rtx);
1119 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1120 enum machine_mode, rtx);
1121 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1122 enum machine_mode,
1123 rtx);
1124 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1125 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1126 enum reg_class);
1127 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1128 enum machine_mode);
1129 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1130 enum reg_class,
1131 enum machine_mode);
1132 static bool rs6000_cannot_change_mode_class (enum machine_mode,
1133 enum machine_mode,
1134 enum reg_class);
1135 static bool rs6000_debug_cannot_change_mode_class (enum machine_mode,
1136 enum machine_mode,
1137 enum reg_class);
1138 static bool rs6000_save_toc_in_prologue_p (void);
1140 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, enum machine_mode, int, int,
1141 int, int *)
1142 = rs6000_legitimize_reload_address;
1144 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1145 = rs6000_mode_dependent_address;
1147 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1148 enum machine_mode, rtx)
1149 = rs6000_secondary_reload_class;
1151 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1152 = rs6000_preferred_reload_class;
1154 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1155 enum machine_mode)
1156 = rs6000_secondary_memory_needed;
1158 bool (*rs6000_cannot_change_mode_class_ptr) (enum machine_mode,
1159 enum machine_mode,
1160 enum reg_class)
1161 = rs6000_cannot_change_mode_class;
1163 const int INSN_NOT_AVAILABLE = -1;
1165 static void rs6000_print_isa_options (FILE *, int, const char *,
1166 HOST_WIDE_INT);
1167 static void rs6000_print_builtin_options (FILE *, int, const char *,
1168 HOST_WIDE_INT);
1170 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1171 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1172 enum rs6000_reg_type,
1173 enum machine_mode,
1174 secondary_reload_info *,
1175 bool);
1176 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1178 /* Hash table stuff for keeping track of TOC entries. */
1180 struct GTY((for_user)) toc_hash_struct
1182 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1183 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1184 rtx key;
1185 enum machine_mode key_mode;
1186 int labelno;
1189 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1191 static hashval_t hash (toc_hash_struct *);
1192 static bool equal (toc_hash_struct *, toc_hash_struct *);
1195 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1197 /* Hash table to keep track of the argument types for builtin functions. */
1199 struct GTY((for_user)) builtin_hash_struct
1201 tree type;
1202 enum machine_mode mode[4]; /* return value + 3 arguments. */
1203 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1206 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1208 static hashval_t hash (builtin_hash_struct *);
1209 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1212 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1215 /* Default register names. */
1216 char rs6000_reg_names[][8] =
1218 "0", "1", "2", "3", "4", "5", "6", "7",
1219 "8", "9", "10", "11", "12", "13", "14", "15",
1220 "16", "17", "18", "19", "20", "21", "22", "23",
1221 "24", "25", "26", "27", "28", "29", "30", "31",
1222 "0", "1", "2", "3", "4", "5", "6", "7",
1223 "8", "9", "10", "11", "12", "13", "14", "15",
1224 "16", "17", "18", "19", "20", "21", "22", "23",
1225 "24", "25", "26", "27", "28", "29", "30", "31",
1226 "mq", "lr", "ctr","ap",
1227 "0", "1", "2", "3", "4", "5", "6", "7",
1228 "ca",
1229 /* AltiVec registers. */
1230 "0", "1", "2", "3", "4", "5", "6", "7",
1231 "8", "9", "10", "11", "12", "13", "14", "15",
1232 "16", "17", "18", "19", "20", "21", "22", "23",
1233 "24", "25", "26", "27", "28", "29", "30", "31",
1234 "vrsave", "vscr",
1235 /* SPE registers. */
1236 "spe_acc", "spefscr",
1237 /* Soft frame pointer. */
1238 "sfp",
1239 /* HTM SPR registers. */
1240 "tfhar", "tfiar", "texasr",
1241 /* SPE High registers. */
1242 "0", "1", "2", "3", "4", "5", "6", "7",
1243 "8", "9", "10", "11", "12", "13", "14", "15",
1244 "16", "17", "18", "19", "20", "21", "22", "23",
1245 "24", "25", "26", "27", "28", "29", "30", "31"
1248 #ifdef TARGET_REGNAMES
1249 static const char alt_reg_names[][8] =
1251 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1252 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1253 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1254 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1255 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1256 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1257 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1258 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1259 "mq", "lr", "ctr", "ap",
1260 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1261 "ca",
1262 /* AltiVec registers. */
1263 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1264 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1265 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1266 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1267 "vrsave", "vscr",
1268 /* SPE registers. */
1269 "spe_acc", "spefscr",
1270 /* Soft frame pointer. */
1271 "sfp",
1272 /* HTM SPR registers. */
1273 "tfhar", "tfiar", "texasr",
1274 /* SPE High registers. */
1275 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1276 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1277 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1278 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1280 #endif
1282 /* Table of valid machine attributes. */
1284 static const struct attribute_spec rs6000_attribute_table[] =
1286 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1287 affects_type_identity } */
1288 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1289 false },
1290 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1291 false },
1292 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1293 false },
1294 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1295 false },
1296 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1297 false },
1298 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1299 SUBTARGET_ATTRIBUTE_TABLE,
1300 #endif
1301 { NULL, 0, 0, false, false, false, NULL, false }
1304 #ifndef TARGET_PROFILE_KERNEL
1305 #define TARGET_PROFILE_KERNEL 0
1306 #endif
1308 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1309 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1311 /* Initialize the GCC target structure. */
1312 #undef TARGET_ATTRIBUTE_TABLE
1313 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1314 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1315 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1316 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1317 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1319 #undef TARGET_ASM_ALIGNED_DI_OP
1320 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1322 /* Default unaligned ops are only provided for ELF. Find the ops needed
1323 for non-ELF systems. */
1324 #ifndef OBJECT_FORMAT_ELF
1325 #if TARGET_XCOFF
1326 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1327 64-bit targets. */
1328 #undef TARGET_ASM_UNALIGNED_HI_OP
1329 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1330 #undef TARGET_ASM_UNALIGNED_SI_OP
1331 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1332 #undef TARGET_ASM_UNALIGNED_DI_OP
1333 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1334 #else
1335 /* For Darwin. */
1336 #undef TARGET_ASM_UNALIGNED_HI_OP
1337 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1338 #undef TARGET_ASM_UNALIGNED_SI_OP
1339 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1340 #undef TARGET_ASM_UNALIGNED_DI_OP
1341 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1342 #undef TARGET_ASM_ALIGNED_DI_OP
1343 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1344 #endif
1345 #endif
1347 /* This hook deals with fixups for relocatable code and DI-mode objects
1348 in 64-bit code. */
1349 #undef TARGET_ASM_INTEGER
1350 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1352 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1353 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1354 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1355 #endif
1357 #undef TARGET_SET_UP_BY_PROLOGUE
1358 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1360 #undef TARGET_HAVE_TLS
1361 #define TARGET_HAVE_TLS HAVE_AS_TLS
1363 #undef TARGET_CANNOT_FORCE_CONST_MEM
1364 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1366 #undef TARGET_DELEGITIMIZE_ADDRESS
1367 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1369 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1370 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1372 #undef TARGET_ASM_FUNCTION_PROLOGUE
1373 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1374 #undef TARGET_ASM_FUNCTION_EPILOGUE
1375 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1377 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1378 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1380 #undef TARGET_LEGITIMIZE_ADDRESS
1381 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1383 #undef TARGET_SCHED_VARIABLE_ISSUE
1384 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1386 #undef TARGET_SCHED_ISSUE_RATE
1387 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1388 #undef TARGET_SCHED_ADJUST_COST
1389 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1390 #undef TARGET_SCHED_ADJUST_PRIORITY
1391 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1392 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1393 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1394 #undef TARGET_SCHED_INIT
1395 #define TARGET_SCHED_INIT rs6000_sched_init
1396 #undef TARGET_SCHED_FINISH
1397 #define TARGET_SCHED_FINISH rs6000_sched_finish
1398 #undef TARGET_SCHED_REORDER
1399 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1400 #undef TARGET_SCHED_REORDER2
1401 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1403 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1404 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1406 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1407 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1409 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1410 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1411 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1412 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1413 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1414 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1415 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1416 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1418 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1419 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1420 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1421 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1422 rs6000_builtin_support_vector_misalignment
1423 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1424 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1425 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1426 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1427 rs6000_builtin_vectorization_cost
1428 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1429 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1430 rs6000_preferred_simd_mode
1431 #undef TARGET_VECTORIZE_INIT_COST
1432 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1433 #undef TARGET_VECTORIZE_ADD_STMT_COST
1434 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1435 #undef TARGET_VECTORIZE_FINISH_COST
1436 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1437 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1438 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1440 #undef TARGET_INIT_BUILTINS
1441 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1442 #undef TARGET_BUILTIN_DECL
1443 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1445 #undef TARGET_EXPAND_BUILTIN
1446 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1448 #undef TARGET_MANGLE_TYPE
1449 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1451 #undef TARGET_INIT_LIBFUNCS
1452 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1454 #if TARGET_MACHO
1455 #undef TARGET_BINDS_LOCAL_P
1456 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1457 #endif
1459 #undef TARGET_MS_BITFIELD_LAYOUT_P
1460 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1462 #undef TARGET_ASM_OUTPUT_MI_THUNK
1463 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1465 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1466 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1468 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1469 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1471 #undef TARGET_REGISTER_MOVE_COST
1472 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1473 #undef TARGET_MEMORY_MOVE_COST
1474 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1475 #undef TARGET_RTX_COSTS
1476 #define TARGET_RTX_COSTS rs6000_rtx_costs
1477 #undef TARGET_ADDRESS_COST
1478 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1480 #undef TARGET_DWARF_REGISTER_SPAN
1481 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1483 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1484 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1486 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1487 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1489 /* On rs6000, function arguments are promoted, as are function return
1490 values. */
1491 #undef TARGET_PROMOTE_FUNCTION_MODE
1492 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
1494 #undef TARGET_RETURN_IN_MEMORY
1495 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1497 #undef TARGET_RETURN_IN_MSB
1498 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1500 #undef TARGET_SETUP_INCOMING_VARARGS
1501 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1503 /* Always strict argument naming on rs6000. */
1504 #undef TARGET_STRICT_ARGUMENT_NAMING
1505 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1506 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1507 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1508 #undef TARGET_SPLIT_COMPLEX_ARG
1509 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1510 #undef TARGET_MUST_PASS_IN_STACK
1511 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1512 #undef TARGET_PASS_BY_REFERENCE
1513 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1514 #undef TARGET_ARG_PARTIAL_BYTES
1515 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1516 #undef TARGET_FUNCTION_ARG_ADVANCE
1517 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1518 #undef TARGET_FUNCTION_ARG
1519 #define TARGET_FUNCTION_ARG rs6000_function_arg
1520 #undef TARGET_FUNCTION_ARG_BOUNDARY
1521 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1523 #undef TARGET_BUILD_BUILTIN_VA_LIST
1524 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1526 #undef TARGET_EXPAND_BUILTIN_VA_START
1527 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1529 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1530 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1532 #undef TARGET_EH_RETURN_FILTER_MODE
1533 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1535 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1536 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1538 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1539 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1541 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1542 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1544 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1545 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1547 #undef TARGET_OPTION_OVERRIDE
1548 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1550 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1551 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1552 rs6000_builtin_vectorized_function
1554 #if !TARGET_MACHO
1555 #undef TARGET_STACK_PROTECT_FAIL
1556 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1557 #endif
1559 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1560 The PowerPC architecture requires only weak consistency among
1561 processors--that is, memory accesses between processors need not be
1562 sequentially consistent and memory accesses among processors can occur
1563 in any order. The ability to order memory accesses weakly provides
1564 opportunities for more efficient use of the system bus. Unless a
1565 dependency exists, the 604e allows read operations to precede store
1566 operations. */
1567 #undef TARGET_RELAXED_ORDERING
1568 #define TARGET_RELAXED_ORDERING true
1570 #ifdef HAVE_AS_TLS
1571 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1572 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1573 #endif
1575 /* Use a 32-bit anchor range. This leads to sequences like:
1577 addis tmp,anchor,high
1578 add dest,tmp,low
1580 where tmp itself acts as an anchor, and can be shared between
1581 accesses to the same 64k page. */
1582 #undef TARGET_MIN_ANCHOR_OFFSET
1583 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1584 #undef TARGET_MAX_ANCHOR_OFFSET
1585 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1586 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1587 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1588 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1589 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1591 #undef TARGET_BUILTIN_RECIPROCAL
1592 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1594 #undef TARGET_EXPAND_TO_RTL_HOOK
1595 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1597 #undef TARGET_INSTANTIATE_DECLS
1598 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1600 #undef TARGET_SECONDARY_RELOAD
1601 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1603 #undef TARGET_LEGITIMATE_ADDRESS_P
1604 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1606 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1607 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1609 #undef TARGET_LRA_P
1610 #define TARGET_LRA_P rs6000_lra_p
1612 #undef TARGET_CAN_ELIMINATE
1613 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1615 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1616 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1618 #undef TARGET_TRAMPOLINE_INIT
1619 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1621 #undef TARGET_FUNCTION_VALUE
1622 #define TARGET_FUNCTION_VALUE rs6000_function_value
1624 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1625 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1627 #undef TARGET_OPTION_SAVE
1628 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1630 #undef TARGET_OPTION_RESTORE
1631 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1633 #undef TARGET_OPTION_PRINT
1634 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1636 #undef TARGET_CAN_INLINE_P
1637 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1639 #undef TARGET_SET_CURRENT_FUNCTION
1640 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1642 #undef TARGET_LEGITIMATE_CONSTANT_P
1643 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1645 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1646 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1648 #undef TARGET_CAN_USE_DOLOOP_P
1649 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1651 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1652 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1655 /* Processor table. */
1656 struct rs6000_ptt
1658 const char *const name; /* Canonical processor name. */
1659 const enum processor_type processor; /* Processor type enum value. */
1660 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1663 static struct rs6000_ptt const processor_target_table[] =
1665 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1666 #include "rs6000-cpus.def"
1667 #undef RS6000_CPU
1670 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1671 name is invalid. */
1673 static int
1674 rs6000_cpu_name_lookup (const char *name)
1676 size_t i;
1678 if (name != NULL)
1680 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1681 if (! strcmp (name, processor_target_table[i].name))
1682 return (int)i;
1685 return -1;
1689 /* Return number of consecutive hard regs needed starting at reg REGNO
1690 to hold something of mode MODE.
1691 This is ordinarily the length in words of a value of mode MODE
1692 but can be less for certain modes in special long registers.
1694 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1695 scalar instructions. The upper 32 bits are only available to the
1696 SIMD instructions.
1698 POWER and PowerPC GPRs hold 32 bits worth;
1699 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1701 static int
1702 rs6000_hard_regno_nregs_internal (int regno, enum machine_mode mode)
1704 unsigned HOST_WIDE_INT reg_size;
1706 /* TF/TD modes are special in that they always take 2 registers. */
1707 if (FP_REGNO_P (regno))
1708 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1709 ? UNITS_PER_VSX_WORD
1710 : UNITS_PER_FP_WORD);
1712 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1713 reg_size = UNITS_PER_SPE_WORD;
1715 else if (ALTIVEC_REGNO_P (regno))
1716 reg_size = UNITS_PER_ALTIVEC_WORD;
1718 /* The value returned for SCmode in the E500 double case is 2 for
1719 ABI compatibility; storing an SCmode value in a single register
1720 would require function_arg and rs6000_spe_function_arg to handle
1721 SCmode so as to pass the value correctly in a pair of
1722 registers. */
1723 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1724 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1725 reg_size = UNITS_PER_FP_WORD;
1727 else
1728 reg_size = UNITS_PER_WORD;
1730 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1733 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1734 MODE. */
1735 static int
1736 rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
1738 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1740 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1741 register combinations, and use PTImode where we need to deal with quad
1742 word memory operations. Don't allow quad words in the argument or frame
1743 pointer registers, just registers 0..31. */
1744 if (mode == PTImode)
1745 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1746 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1747 && ((regno & 1) == 0));
1749 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1750 implementations. Don't allow an item to be split between a FP register
1751 and an Altivec register. Allow TImode in all VSX registers if the user
1752 asked for it. */
1753 if (TARGET_VSX && VSX_REGNO_P (regno)
1754 && (VECTOR_MEM_VSX_P (mode)
1755 || reg_addr[mode].scalar_in_vmx_p
1756 || (TARGET_VSX_TIMODE && mode == TImode)
1757 || (TARGET_VADDUQM && mode == V1TImode)))
1759 if (FP_REGNO_P (regno))
1760 return FP_REGNO_P (last_regno);
1762 if (ALTIVEC_REGNO_P (regno))
1764 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1765 return 0;
1767 return ALTIVEC_REGNO_P (last_regno);
1771 /* The GPRs can hold any mode, but values bigger than one register
1772 cannot go past R31. */
1773 if (INT_REGNO_P (regno))
1774 return INT_REGNO_P (last_regno);
1776 /* The float registers (except for VSX vector modes) can only hold floating
1777 modes and DImode. */
1778 if (FP_REGNO_P (regno))
1780 if (SCALAR_FLOAT_MODE_P (mode)
1781 && (mode != TDmode || (regno % 2) == 0)
1782 && FP_REGNO_P (last_regno))
1783 return 1;
1785 if (GET_MODE_CLASS (mode) == MODE_INT
1786 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1787 return 1;
1789 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1790 && PAIRED_VECTOR_MODE (mode))
1791 return 1;
1793 return 0;
1796 /* The CR register can only hold CC modes. */
1797 if (CR_REGNO_P (regno))
1798 return GET_MODE_CLASS (mode) == MODE_CC;
1800 if (CA_REGNO_P (regno))
1801 return mode == Pmode || mode == SImode;
1803 /* AltiVec only in AldyVec registers. */
1804 if (ALTIVEC_REGNO_P (regno))
1805 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1806 || mode == V1TImode);
1808 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1809 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1810 return 1;
1812 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1813 and it must be able to fit within the register set. */
1815 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1818 /* Print interesting facts about registers. */
1819 static void
1820 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1822 int r, m;
1824 for (r = first_regno; r <= last_regno; ++r)
1826 const char *comma = "";
1827 int len;
1829 if (first_regno == last_regno)
1830 fprintf (stderr, "%s:\t", reg_name);
1831 else
1832 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1834 len = 8;
1835 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1836 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1838 if (len > 70)
1840 fprintf (stderr, ",\n\t");
1841 len = 8;
1842 comma = "";
1845 if (rs6000_hard_regno_nregs[m][r] > 1)
1846 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1847 rs6000_hard_regno_nregs[m][r]);
1848 else
1849 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1851 comma = ", ";
1854 if (call_used_regs[r])
1856 if (len > 70)
1858 fprintf (stderr, ",\n\t");
1859 len = 8;
1860 comma = "";
1863 len += fprintf (stderr, "%s%s", comma, "call-used");
1864 comma = ", ";
1867 if (fixed_regs[r])
1869 if (len > 70)
1871 fprintf (stderr, ",\n\t");
1872 len = 8;
1873 comma = "";
1876 len += fprintf (stderr, "%s%s", comma, "fixed");
1877 comma = ", ";
1880 if (len > 70)
1882 fprintf (stderr, ",\n\t");
1883 comma = "";
1886 len += fprintf (stderr, "%sreg-class = %s", comma,
1887 reg_class_names[(int)rs6000_regno_regclass[r]]);
1888 comma = ", ";
1890 if (len > 70)
1892 fprintf (stderr, ",\n\t");
1893 comma = "";
1896 fprintf (stderr, "%sregno = %d\n", comma, r);
1900 static const char *
1901 rs6000_debug_vector_unit (enum rs6000_vector v)
1903 const char *ret;
1905 switch (v)
1907 case VECTOR_NONE: ret = "none"; break;
1908 case VECTOR_ALTIVEC: ret = "altivec"; break;
1909 case VECTOR_VSX: ret = "vsx"; break;
1910 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1911 case VECTOR_PAIRED: ret = "paired"; break;
1912 case VECTOR_SPE: ret = "spe"; break;
1913 case VECTOR_OTHER: ret = "other"; break;
1914 default: ret = "unknown"; break;
1917 return ret;
1920 /* Print the address masks in a human readble fashion. */
1921 DEBUG_FUNCTION void
1922 rs6000_debug_print_mode (ssize_t m)
1924 ssize_t rc;
1926 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
1927 for (rc = 0; rc < N_RELOAD_REG; rc++)
1929 addr_mask_type mask = reg_addr[m].addr_mask[rc];
1930 fprintf (stderr,
1931 " %s: %c%c%c%c%c%c",
1932 reload_reg_map[rc].name,
1933 (mask & RELOAD_REG_VALID) != 0 ? 'v' : ' ',
1934 (mask & RELOAD_REG_MULTIPLE) != 0 ? 'm' : ' ',
1935 (mask & RELOAD_REG_INDEXED) != 0 ? 'i' : ' ',
1936 (mask & RELOAD_REG_OFFSET) != 0 ? 'o' : ' ',
1937 (mask & RELOAD_REG_PRE_INCDEC) != 0 ? '+' : ' ',
1938 (mask & RELOAD_REG_PRE_MODIFY) != 0 ? '+' : ' ');
1941 if (rs6000_vector_unit[m] != VECTOR_NONE
1942 || rs6000_vector_mem[m] != VECTOR_NONE
1943 || (reg_addr[m].reload_store != CODE_FOR_nothing)
1944 || (reg_addr[m].reload_load != CODE_FOR_nothing)
1945 || reg_addr[m].scalar_in_vmx_p)
1947 fprintf (stderr,
1948 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
1949 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
1950 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
1951 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
1952 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
1953 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
1956 fputs ("\n", stderr);
1959 #define DEBUG_FMT_ID "%-32s= "
1960 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
1961 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
1962 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
1964 /* Print various interesting information with -mdebug=reg. */
1965 static void
1966 rs6000_debug_reg_global (void)
1968 static const char *const tf[2] = { "false", "true" };
1969 const char *nl = (const char *)0;
1970 int m;
1971 size_t m1, m2, v;
1972 char costly_num[20];
1973 char nop_num[20];
1974 char flags_buffer[40];
1975 const char *costly_str;
1976 const char *nop_str;
1977 const char *trace_str;
1978 const char *abi_str;
1979 const char *cmodel_str;
1980 struct cl_target_option cl_opts;
1982 /* Modes we want tieable information on. */
1983 static const enum machine_mode print_tieable_modes[] = {
1984 QImode,
1985 HImode,
1986 SImode,
1987 DImode,
1988 TImode,
1989 PTImode,
1990 SFmode,
1991 DFmode,
1992 TFmode,
1993 SDmode,
1994 DDmode,
1995 TDmode,
1996 V8QImode,
1997 V4HImode,
1998 V2SImode,
1999 V16QImode,
2000 V8HImode,
2001 V4SImode,
2002 V2DImode,
2003 V1TImode,
2004 V32QImode,
2005 V16HImode,
2006 V8SImode,
2007 V4DImode,
2008 V2TImode,
2009 V2SFmode,
2010 V4SFmode,
2011 V2DFmode,
2012 V8SFmode,
2013 V4DFmode,
2014 CCmode,
2015 CCUNSmode,
2016 CCEQmode,
2019 /* Virtual regs we are interested in. */
2020 const static struct {
2021 int regno; /* register number. */
2022 const char *name; /* register name. */
2023 } virtual_regs[] = {
2024 { STACK_POINTER_REGNUM, "stack pointer:" },
2025 { TOC_REGNUM, "toc: " },
2026 { STATIC_CHAIN_REGNUM, "static chain: " },
2027 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2028 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2029 { ARG_POINTER_REGNUM, "arg pointer: " },
2030 { FRAME_POINTER_REGNUM, "frame pointer:" },
2031 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2032 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2033 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2034 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2035 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2036 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2037 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2038 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2039 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2042 fputs ("\nHard register information:\n", stderr);
2043 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2044 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2045 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2046 LAST_ALTIVEC_REGNO,
2047 "vs");
2048 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2049 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2050 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2051 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2052 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2053 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2054 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2055 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2057 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2058 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2059 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2061 fprintf (stderr,
2062 "\n"
2063 "d reg_class = %s\n"
2064 "f reg_class = %s\n"
2065 "v reg_class = %s\n"
2066 "wa reg_class = %s\n"
2067 "wd reg_class = %s\n"
2068 "wf reg_class = %s\n"
2069 "wg reg_class = %s\n"
2070 "wh reg_class = %s\n"
2071 "wi reg_class = %s\n"
2072 "wj reg_class = %s\n"
2073 "wk reg_class = %s\n"
2074 "wl reg_class = %s\n"
2075 "wm reg_class = %s\n"
2076 "wr reg_class = %s\n"
2077 "ws reg_class = %s\n"
2078 "wt reg_class = %s\n"
2079 "wu reg_class = %s\n"
2080 "wv reg_class = %s\n"
2081 "ww reg_class = %s\n"
2082 "wx reg_class = %s\n"
2083 "wy reg_class = %s\n"
2084 "wz reg_class = %s\n"
2085 "\n",
2086 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2087 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2088 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2089 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2090 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2091 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2092 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2093 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2094 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2095 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2096 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2097 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2098 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2099 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2100 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2101 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2102 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2103 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2104 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2105 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2106 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2107 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2109 nl = "\n";
2110 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2111 rs6000_debug_print_mode (m);
2113 fputs ("\n", stderr);
2115 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2117 enum machine_mode mode1 = print_tieable_modes[m1];
2118 bool first_time = true;
2120 nl = (const char *)0;
2121 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2123 enum machine_mode mode2 = print_tieable_modes[m2];
2124 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2126 if (first_time)
2128 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2129 nl = "\n";
2130 first_time = false;
2133 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2137 if (!first_time)
2138 fputs ("\n", stderr);
2141 if (nl)
2142 fputs (nl, stderr);
2144 if (rs6000_recip_control)
2146 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2148 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2149 if (rs6000_recip_bits[m])
2151 fprintf (stderr,
2152 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2153 GET_MODE_NAME (m),
2154 (RS6000_RECIP_AUTO_RE_P (m)
2155 ? "auto"
2156 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2157 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2158 ? "auto"
2159 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2162 fputs ("\n", stderr);
2165 if (rs6000_cpu_index >= 0)
2167 const char *name = processor_target_table[rs6000_cpu_index].name;
2168 HOST_WIDE_INT flags
2169 = processor_target_table[rs6000_cpu_index].target_enable;
2171 sprintf (flags_buffer, "-mcpu=%s flags", name);
2172 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2174 else
2175 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2177 if (rs6000_tune_index >= 0)
2179 const char *name = processor_target_table[rs6000_tune_index].name;
2180 HOST_WIDE_INT flags
2181 = processor_target_table[rs6000_tune_index].target_enable;
2183 sprintf (flags_buffer, "-mtune=%s flags", name);
2184 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2186 else
2187 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2189 cl_target_option_save (&cl_opts, &global_options);
2190 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2191 rs6000_isa_flags);
2193 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2194 rs6000_isa_flags_explicit);
2196 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2197 rs6000_builtin_mask);
2199 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2201 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2202 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2204 switch (rs6000_sched_costly_dep)
2206 case max_dep_latency:
2207 costly_str = "max_dep_latency";
2208 break;
2210 case no_dep_costly:
2211 costly_str = "no_dep_costly";
2212 break;
2214 case all_deps_costly:
2215 costly_str = "all_deps_costly";
2216 break;
2218 case true_store_to_load_dep_costly:
2219 costly_str = "true_store_to_load_dep_costly";
2220 break;
2222 case store_to_load_dep_costly:
2223 costly_str = "store_to_load_dep_costly";
2224 break;
2226 default:
2227 costly_str = costly_num;
2228 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2229 break;
2232 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2234 switch (rs6000_sched_insert_nops)
2236 case sched_finish_regroup_exact:
2237 nop_str = "sched_finish_regroup_exact";
2238 break;
2240 case sched_finish_pad_groups:
2241 nop_str = "sched_finish_pad_groups";
2242 break;
2244 case sched_finish_none:
2245 nop_str = "sched_finish_none";
2246 break;
2248 default:
2249 nop_str = nop_num;
2250 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2251 break;
2254 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2256 switch (rs6000_sdata)
2258 default:
2259 case SDATA_NONE:
2260 break;
2262 case SDATA_DATA:
2263 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2264 break;
2266 case SDATA_SYSV:
2267 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2268 break;
2270 case SDATA_EABI:
2271 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2272 break;
2276 switch (rs6000_traceback)
2278 case traceback_default: trace_str = "default"; break;
2279 case traceback_none: trace_str = "none"; break;
2280 case traceback_part: trace_str = "part"; break;
2281 case traceback_full: trace_str = "full"; break;
2282 default: trace_str = "unknown"; break;
2285 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2287 switch (rs6000_current_cmodel)
2289 case CMODEL_SMALL: cmodel_str = "small"; break;
2290 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2291 case CMODEL_LARGE: cmodel_str = "large"; break;
2292 default: cmodel_str = "unknown"; break;
2295 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2297 switch (rs6000_current_abi)
2299 case ABI_NONE: abi_str = "none"; break;
2300 case ABI_AIX: abi_str = "aix"; break;
2301 case ABI_ELFv2: abi_str = "ELFv2"; break;
2302 case ABI_V4: abi_str = "V4"; break;
2303 case ABI_DARWIN: abi_str = "darwin"; break;
2304 default: abi_str = "unknown"; break;
2307 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2309 if (rs6000_altivec_abi)
2310 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2312 if (rs6000_spe_abi)
2313 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2315 if (rs6000_darwin64_abi)
2316 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2318 if (rs6000_float_gprs)
2319 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2321 fprintf (stderr, DEBUG_FMT_S, "fprs",
2322 (TARGET_FPRS ? "true" : "false"));
2324 fprintf (stderr, DEBUG_FMT_S, "single_float",
2325 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2327 fprintf (stderr, DEBUG_FMT_S, "double_float",
2328 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2330 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2331 (TARGET_SOFT_FLOAT ? "true" : "false"));
2333 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2334 (TARGET_E500_SINGLE ? "true" : "false"));
2336 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2337 (TARGET_E500_DOUBLE ? "true" : "false"));
2339 if (TARGET_LINK_STACK)
2340 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2342 if (targetm.lra_p ())
2343 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2345 if (TARGET_P8_FUSION)
2346 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2347 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2349 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2350 TARGET_SECURE_PLT ? "secure" : "bss");
2351 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2352 aix_struct_return ? "aix" : "sysv");
2353 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2354 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2355 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2356 tf[!!rs6000_align_branch_targets]);
2357 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2358 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2359 rs6000_long_double_type_size);
2360 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2361 (int)rs6000_sched_restricted_insns_priority);
2362 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2363 (int)END_BUILTINS);
2364 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2365 (int)RS6000_BUILTIN_COUNT);
2367 if (TARGET_VSX)
2368 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2369 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2373 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2374 legitimate address support to figure out the appropriate addressing to
2375 use. */
2377 static void
2378 rs6000_setup_reg_addr_masks (void)
2380 ssize_t rc, reg, m, nregs;
2381 addr_mask_type any_addr_mask, addr_mask;
2383 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2385 enum machine_mode m2 = (enum machine_mode)m;
2387 /* SDmode is special in that we want to access it only via REG+REG
2388 addressing on power7 and above, since we want to use the LFIWZX and
2389 STFIWZX instructions to load it. */
2390 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2392 any_addr_mask = 0;
2393 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2395 addr_mask = 0;
2396 reg = reload_reg_map[rc].reg;
2398 /* Can mode values go in the GPR/FPR/Altivec registers? */
2399 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2401 nregs = rs6000_hard_regno_nregs[m][reg];
2402 addr_mask |= RELOAD_REG_VALID;
2404 /* Indicate if the mode takes more than 1 physical register. If
2405 it takes a single register, indicate it can do REG+REG
2406 addressing. */
2407 if (nregs > 1 || m == BLKmode)
2408 addr_mask |= RELOAD_REG_MULTIPLE;
2409 else
2410 addr_mask |= RELOAD_REG_INDEXED;
2412 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2413 addressing. Restrict addressing on SPE for 64-bit types
2414 because of the SUBREG hackery used to address 64-bit floats in
2415 '32-bit' GPRs. To simplify secondary reload, don't allow
2416 update forms on scalar floating point types that can go in the
2417 upper registers. */
2419 if (TARGET_UPDATE
2420 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2421 && GET_MODE_SIZE (m2) <= 8
2422 && !VECTOR_MODE_P (m2)
2423 && !COMPLEX_MODE_P (m2)
2424 && !indexed_only_p
2425 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8)
2426 && !reg_addr[m2].scalar_in_vmx_p)
2428 addr_mask |= RELOAD_REG_PRE_INCDEC;
2430 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2431 we don't allow PRE_MODIFY for some multi-register
2432 operations. */
2433 switch (m)
2435 default:
2436 addr_mask |= RELOAD_REG_PRE_MODIFY;
2437 break;
2439 case DImode:
2440 if (TARGET_POWERPC64)
2441 addr_mask |= RELOAD_REG_PRE_MODIFY;
2442 break;
2444 case DFmode:
2445 case DDmode:
2446 if (TARGET_DF_INSN)
2447 addr_mask |= RELOAD_REG_PRE_MODIFY;
2448 break;
2453 /* GPR and FPR registers can do REG+OFFSET addressing, except
2454 possibly for SDmode. */
2455 if ((addr_mask != 0) && !indexed_only_p
2456 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2457 addr_mask |= RELOAD_REG_OFFSET;
2459 reg_addr[m].addr_mask[rc] = addr_mask;
2460 any_addr_mask |= addr_mask;
2463 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2468 /* Initialize the various global tables that are based on register size. */
2469 static void
2470 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2472 ssize_t r, m, c;
2473 int align64;
2474 int align32;
2476 /* Precalculate REGNO_REG_CLASS. */
2477 rs6000_regno_regclass[0] = GENERAL_REGS;
2478 for (r = 1; r < 32; ++r)
2479 rs6000_regno_regclass[r] = BASE_REGS;
2481 for (r = 32; r < 64; ++r)
2482 rs6000_regno_regclass[r] = FLOAT_REGS;
2484 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2485 rs6000_regno_regclass[r] = NO_REGS;
2487 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2488 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2490 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2491 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2492 rs6000_regno_regclass[r] = CR_REGS;
2494 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2495 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2496 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2497 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2498 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2499 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2500 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2501 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2502 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2503 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2504 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2505 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2507 /* Precalculate register class to simpler reload register class. We don't
2508 need all of the register classes that are combinations of different
2509 classes, just the simple ones that have constraint letters. */
2510 for (c = 0; c < N_REG_CLASSES; c++)
2511 reg_class_to_reg_type[c] = NO_REG_TYPE;
2513 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2514 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2515 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2516 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2517 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2518 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2519 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2520 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2521 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2522 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2523 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2524 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2526 if (TARGET_VSX)
2528 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2529 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2531 else
2533 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2534 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2537 /* Precalculate the valid memory formats as well as the vector information,
2538 this must be set up before the rs6000_hard_regno_nregs_internal calls
2539 below. */
2540 gcc_assert ((int)VECTOR_NONE == 0);
2541 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2542 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2544 gcc_assert ((int)CODE_FOR_nothing == 0);
2545 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2547 gcc_assert ((int)NO_REGS == 0);
2548 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2550 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2551 believes it can use native alignment or still uses 128-bit alignment. */
2552 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2554 align64 = 64;
2555 align32 = 32;
2557 else
2559 align64 = 128;
2560 align32 = 128;
2563 /* V2DF mode, VSX only. */
2564 if (TARGET_VSX)
2566 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2567 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2568 rs6000_vector_align[V2DFmode] = align64;
2571 /* V4SF mode, either VSX or Altivec. */
2572 if (TARGET_VSX)
2574 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2575 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2576 rs6000_vector_align[V4SFmode] = align32;
2578 else if (TARGET_ALTIVEC)
2580 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2581 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2582 rs6000_vector_align[V4SFmode] = align32;
2585 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2586 and stores. */
2587 if (TARGET_ALTIVEC)
2589 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2590 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2591 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2592 rs6000_vector_align[V4SImode] = align32;
2593 rs6000_vector_align[V8HImode] = align32;
2594 rs6000_vector_align[V16QImode] = align32;
2596 if (TARGET_VSX)
2598 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2599 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2600 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2602 else
2604 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2605 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2606 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2610 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2611 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2612 if (TARGET_VSX)
2614 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2615 rs6000_vector_unit[V2DImode]
2616 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2617 rs6000_vector_align[V2DImode] = align64;
2619 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2620 rs6000_vector_unit[V1TImode]
2621 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2622 rs6000_vector_align[V1TImode] = 128;
2625 /* DFmode, see if we want to use the VSX unit. */
2626 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2628 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2629 rs6000_vector_mem[DFmode]
2630 = (TARGET_UPPER_REGS_DF ? VECTOR_VSX : VECTOR_NONE);
2631 rs6000_vector_align[DFmode] = align64;
2634 /* Allow TImode in VSX register and set the VSX memory macros. */
2635 if (TARGET_VSX && TARGET_VSX_TIMODE)
2637 rs6000_vector_mem[TImode] = VECTOR_VSX;
2638 rs6000_vector_align[TImode] = align64;
2641 /* TODO add SPE and paired floating point vector support. */
2643 /* Register class constraints for the constraints that depend on compile
2644 switches. When the VSX code was added, different constraints were added
2645 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2646 of the VSX registers are used. The register classes for scalar floating
2647 point types is set, based on whether we allow that type into the upper
2648 (Altivec) registers. GCC has register classes to target the Altivec
2649 registers for load/store operations, to select using a VSX memory
2650 operation instead of the traditional floating point operation. The
2651 constraints are:
2653 d - Register class to use with traditional DFmode instructions.
2654 f - Register class to use with traditional SFmode instructions.
2655 v - Altivec register.
2656 wa - Any VSX register.
2657 wc - Reserved to represent individual CR bits (used in LLVM).
2658 wd - Preferred register class for V2DFmode.
2659 wf - Preferred register class for V4SFmode.
2660 wg - Float register for power6x move insns.
2661 wh - FP register for direct move instructions.
2662 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2663 wj - FP or VSX register to hold 64-bit integers for direct moves.
2664 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2665 wl - Float register if we can do 32-bit signed int loads.
2666 wm - VSX register for ISA 2.07 direct move operations.
2667 wn - always NO_REGS.
2668 wr - GPR if 64-bit mode is permitted.
2669 ws - Register class to do ISA 2.06 DF operations.
2670 wt - VSX register for TImode in VSX registers.
2671 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2672 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2673 ww - Register class to do SF conversions in with VSX operations.
2674 wx - Float register if we can do 32-bit int stores.
2675 wy - Register class to do ISA 2.07 SF operations.
2676 wz - Float register if we can do 32-bit unsigned int loads. */
2678 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2679 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2681 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2682 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2684 if (TARGET_VSX)
2686 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2687 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2688 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2689 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2691 if (TARGET_VSX_TIMODE)
2692 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2694 if (TARGET_UPPER_REGS_DF) /* DFmode */
2696 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2697 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2699 else
2700 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2703 /* Add conditional constraints based on various options, to allow us to
2704 collapse multiple insn patterns. */
2705 if (TARGET_ALTIVEC)
2706 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2708 if (TARGET_MFPGPR) /* DFmode */
2709 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2711 if (TARGET_LFIWAX)
2712 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2714 if (TARGET_DIRECT_MOVE)
2716 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2717 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2718 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2719 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2720 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2721 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2724 if (TARGET_POWERPC64)
2725 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2727 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2729 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2730 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2731 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2733 else if (TARGET_P8_VECTOR)
2735 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2736 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2738 else if (TARGET_VSX)
2739 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2741 if (TARGET_STFIWX)
2742 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2744 if (TARGET_LFIWZX)
2745 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2747 /* Set up the reload helper and direct move functions. */
2748 if (TARGET_VSX || TARGET_ALTIVEC)
2750 if (TARGET_64BIT)
2752 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2753 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2754 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2755 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2756 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2757 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2758 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2759 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2760 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2761 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2762 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2763 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2764 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2765 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2766 if (TARGET_VSX && TARGET_UPPER_REGS_DF)
2768 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2769 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2770 reg_addr[DFmode].scalar_in_vmx_p = true;
2771 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2772 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2774 if (TARGET_P8_VECTOR)
2776 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2777 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2778 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2779 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2780 if (TARGET_UPPER_REGS_SF)
2781 reg_addr[SFmode].scalar_in_vmx_p = true;
2783 if (TARGET_VSX_TIMODE)
2785 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2786 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2788 if (TARGET_DIRECT_MOVE)
2790 if (TARGET_POWERPC64)
2792 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2793 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2794 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2795 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2796 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2797 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2798 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2799 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2800 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2802 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2803 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2804 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2805 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2806 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2807 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2808 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2809 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2810 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2812 else
2814 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2815 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2816 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2820 else
2822 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2823 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2824 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2825 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2826 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2827 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2828 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2829 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2830 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2831 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2832 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2833 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2834 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2835 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2836 if (TARGET_VSX && TARGET_UPPER_REGS_DF)
2838 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2839 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2840 reg_addr[DFmode].scalar_in_vmx_p = true;
2841 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2842 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2844 if (TARGET_P8_VECTOR)
2846 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2847 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2848 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2849 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2850 if (TARGET_UPPER_REGS_SF)
2851 reg_addr[SFmode].scalar_in_vmx_p = true;
2853 if (TARGET_VSX_TIMODE)
2855 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2856 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2861 /* Precalculate HARD_REGNO_NREGS. */
2862 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2863 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2864 rs6000_hard_regno_nregs[m][r]
2865 = rs6000_hard_regno_nregs_internal (r, (enum machine_mode)m);
2867 /* Precalculate HARD_REGNO_MODE_OK. */
2868 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2869 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2870 if (rs6000_hard_regno_mode_ok (r, (enum machine_mode)m))
2871 rs6000_hard_regno_mode_ok_p[m][r] = true;
2873 /* Precalculate CLASS_MAX_NREGS sizes. */
2874 for (c = 0; c < LIM_REG_CLASSES; ++c)
2876 int reg_size;
2878 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2879 reg_size = UNITS_PER_VSX_WORD;
2881 else if (c == ALTIVEC_REGS)
2882 reg_size = UNITS_PER_ALTIVEC_WORD;
2884 else if (c == FLOAT_REGS)
2885 reg_size = UNITS_PER_FP_WORD;
2887 else
2888 reg_size = UNITS_PER_WORD;
2890 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2892 enum machine_mode m2 = (enum machine_mode)m;
2893 int reg_size2 = reg_size;
2895 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2896 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2897 && (m == TDmode || m == TFmode))
2898 reg_size2 = UNITS_PER_FP_WORD;
2900 rs6000_class_max_nregs[m][c]
2901 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2905 if (TARGET_E500_DOUBLE)
2906 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2908 /* Calculate which modes to automatically generate code to use a the
2909 reciprocal divide and square root instructions. In the future, possibly
2910 automatically generate the instructions even if the user did not specify
2911 -mrecip. The older machines double precision reciprocal sqrt estimate is
2912 not accurate enough. */
2913 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2914 if (TARGET_FRES)
2915 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2916 if (TARGET_FRE)
2917 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2918 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2919 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2920 if (VECTOR_UNIT_VSX_P (V2DFmode))
2921 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2923 if (TARGET_FRSQRTES)
2924 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2925 if (TARGET_FRSQRTE)
2926 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2927 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2928 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2929 if (VECTOR_UNIT_VSX_P (V2DFmode))
2930 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2932 if (rs6000_recip_control)
2934 if (!flag_finite_math_only)
2935 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
2936 if (flag_trapping_math)
2937 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
2938 if (!flag_reciprocal_math)
2939 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
2940 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
2942 if (RS6000_RECIP_HAVE_RE_P (SFmode)
2943 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
2944 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2946 if (RS6000_RECIP_HAVE_RE_P (DFmode)
2947 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
2948 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2950 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
2951 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
2952 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2954 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
2955 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
2956 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2958 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
2959 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
2960 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2962 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
2963 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
2964 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2966 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
2967 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
2968 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2970 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
2971 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
2972 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
2976 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2977 legitimate address support to figure out the appropriate addressing to
2978 use. */
2979 rs6000_setup_reg_addr_masks ();
2981 if (global_init_p || TARGET_DEBUG_TARGET)
2983 if (TARGET_DEBUG_REG)
2984 rs6000_debug_reg_global ();
2986 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
2987 fprintf (stderr,
2988 "SImode variable mult cost = %d\n"
2989 "SImode constant mult cost = %d\n"
2990 "SImode short constant mult cost = %d\n"
2991 "DImode multipliciation cost = %d\n"
2992 "SImode division cost = %d\n"
2993 "DImode division cost = %d\n"
2994 "Simple fp operation cost = %d\n"
2995 "DFmode multiplication cost = %d\n"
2996 "SFmode division cost = %d\n"
2997 "DFmode division cost = %d\n"
2998 "cache line size = %d\n"
2999 "l1 cache size = %d\n"
3000 "l2 cache size = %d\n"
3001 "simultaneous prefetches = %d\n"
3002 "\n",
3003 rs6000_cost->mulsi,
3004 rs6000_cost->mulsi_const,
3005 rs6000_cost->mulsi_const9,
3006 rs6000_cost->muldi,
3007 rs6000_cost->divsi,
3008 rs6000_cost->divdi,
3009 rs6000_cost->fp,
3010 rs6000_cost->dmul,
3011 rs6000_cost->sdiv,
3012 rs6000_cost->ddiv,
3013 rs6000_cost->cache_line_size,
3014 rs6000_cost->l1_cache_size,
3015 rs6000_cost->l2_cache_size,
3016 rs6000_cost->simultaneous_prefetches);
3020 #if TARGET_MACHO
3021 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3023 static void
3024 darwin_rs6000_override_options (void)
3026 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3027 off. */
3028 rs6000_altivec_abi = 1;
3029 TARGET_ALTIVEC_VRSAVE = 1;
3030 rs6000_current_abi = ABI_DARWIN;
3032 if (DEFAULT_ABI == ABI_DARWIN
3033 && TARGET_64BIT)
3034 darwin_one_byte_bool = 1;
3036 if (TARGET_64BIT && ! TARGET_POWERPC64)
3038 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3039 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3041 if (flag_mkernel)
3043 rs6000_default_long_calls = 1;
3044 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3047 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3048 Altivec. */
3049 if (!flag_mkernel && !flag_apple_kext
3050 && TARGET_64BIT
3051 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3052 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3054 /* Unless the user (not the configurer) has explicitly overridden
3055 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3056 G4 unless targeting the kernel. */
3057 if (!flag_mkernel
3058 && !flag_apple_kext
3059 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3060 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3061 && ! global_options_set.x_rs6000_cpu_index)
3063 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3066 #endif
3068 /* If not otherwise specified by a target, make 'long double' equivalent to
3069 'double'. */
3071 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3072 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3073 #endif
3075 /* Return the builtin mask of the various options used that could affect which
3076 builtins were used. In the past we used target_flags, but we've run out of
3077 bits, and some options like SPE and PAIRED are no longer in
3078 target_flags. */
3080 HOST_WIDE_INT
3081 rs6000_builtin_mask_calculate (void)
3083 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3084 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3085 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3086 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3087 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3088 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3089 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3090 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3091 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3092 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3093 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3094 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3095 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3096 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3097 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3098 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3101 /* Override command line options. Mostly we process the processor type and
3102 sometimes adjust other TARGET_ options. */
3104 static bool
3105 rs6000_option_override_internal (bool global_init_p)
3107 bool ret = true;
3108 bool have_cpu = false;
3110 /* The default cpu requested at configure time, if any. */
3111 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3113 HOST_WIDE_INT set_masks;
3114 int cpu_index;
3115 int tune_index;
3116 struct cl_target_option *main_target_opt
3117 = ((global_init_p || target_option_default_node == NULL)
3118 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3120 /* Remember the explicit arguments. */
3121 if (global_init_p)
3122 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3124 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3125 library functions, so warn about it. The flag may be useful for
3126 performance studies from time to time though, so don't disable it
3127 entirely. */
3128 if (global_options_set.x_rs6000_alignment_flags
3129 && rs6000_alignment_flags == MASK_ALIGN_POWER
3130 && DEFAULT_ABI == ABI_DARWIN
3131 && TARGET_64BIT)
3132 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3133 " it is incompatible with the installed C and C++ libraries");
3135 /* Numerous experiment shows that IRA based loop pressure
3136 calculation works better for RTL loop invariant motion on targets
3137 with enough (>= 32) registers. It is an expensive optimization.
3138 So it is on only for peak performance. */
3139 if (optimize >= 3 && global_init_p
3140 && !global_options_set.x_flag_ira_loop_pressure)
3141 flag_ira_loop_pressure = 1;
3143 /* Set the pointer size. */
3144 if (TARGET_64BIT)
3146 rs6000_pmode = (int)DImode;
3147 rs6000_pointer_size = 64;
3149 else
3151 rs6000_pmode = (int)SImode;
3152 rs6000_pointer_size = 32;
3155 /* Some OSs don't support saving the high part of 64-bit registers on context
3156 switch. Other OSs don't support saving Altivec registers. On those OSs,
3157 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3158 if the user wants either, the user must explicitly specify them and we
3159 won't interfere with the user's specification. */
3161 set_masks = POWERPC_MASKS;
3162 #ifdef OS_MISSING_POWERPC64
3163 if (OS_MISSING_POWERPC64)
3164 set_masks &= ~OPTION_MASK_POWERPC64;
3165 #endif
3166 #ifdef OS_MISSING_ALTIVEC
3167 if (OS_MISSING_ALTIVEC)
3168 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3169 #endif
3171 /* Don't override by the processor default if given explicitly. */
3172 set_masks &= ~rs6000_isa_flags_explicit;
3174 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3175 the cpu in a target attribute or pragma, but did not specify a tuning
3176 option, use the cpu for the tuning option rather than the option specified
3177 with -mtune on the command line. Process a '--with-cpu' configuration
3178 request as an implicit --cpu. */
3179 if (rs6000_cpu_index >= 0)
3181 cpu_index = rs6000_cpu_index;
3182 have_cpu = true;
3184 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3186 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3187 have_cpu = true;
3189 else if (implicit_cpu)
3191 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3192 have_cpu = true;
3194 else
3196 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3197 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3198 have_cpu = false;
3201 gcc_assert (cpu_index >= 0);
3203 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3204 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3205 with those from the cpu, except for options that were explicitly set. If
3206 we don't have a cpu, do not override the target bits set in
3207 TARGET_DEFAULT. */
3208 if (have_cpu)
3210 rs6000_isa_flags &= ~set_masks;
3211 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3212 & set_masks);
3214 else
3215 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3216 & ~rs6000_isa_flags_explicit);
3218 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3219 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3220 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3221 to using rs6000_isa_flags, we need to do the initialization here. */
3222 if (!have_cpu)
3223 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3225 if (rs6000_tune_index >= 0)
3226 tune_index = rs6000_tune_index;
3227 else if (have_cpu)
3228 rs6000_tune_index = tune_index = cpu_index;
3229 else
3231 size_t i;
3232 enum processor_type tune_proc
3233 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3235 tune_index = -1;
3236 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3237 if (processor_target_table[i].processor == tune_proc)
3239 rs6000_tune_index = tune_index = i;
3240 break;
3244 gcc_assert (tune_index >= 0);
3245 rs6000_cpu = processor_target_table[tune_index].processor;
3247 /* Pick defaults for SPE related control flags. Do this early to make sure
3248 that the TARGET_ macros are representative ASAP. */
3250 int spe_capable_cpu =
3251 (rs6000_cpu == PROCESSOR_PPC8540
3252 || rs6000_cpu == PROCESSOR_PPC8548);
3254 if (!global_options_set.x_rs6000_spe_abi)
3255 rs6000_spe_abi = spe_capable_cpu;
3257 if (!global_options_set.x_rs6000_spe)
3258 rs6000_spe = spe_capable_cpu;
3260 if (!global_options_set.x_rs6000_float_gprs)
3261 rs6000_float_gprs =
3262 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3263 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3264 : 0);
3267 if (global_options_set.x_rs6000_spe_abi
3268 && rs6000_spe_abi
3269 && !TARGET_SPE_ABI)
3270 error ("not configured for SPE ABI");
3272 if (global_options_set.x_rs6000_spe
3273 && rs6000_spe
3274 && !TARGET_SPE)
3275 error ("not configured for SPE instruction set");
3277 if (main_target_opt != NULL
3278 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3279 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3280 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3281 error ("target attribute or pragma changes SPE ABI");
3283 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3284 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3285 || rs6000_cpu == PROCESSOR_PPCE5500)
3287 if (TARGET_ALTIVEC)
3288 error ("AltiVec not supported in this target");
3289 if (TARGET_SPE)
3290 error ("SPE not supported in this target");
3292 if (rs6000_cpu == PROCESSOR_PPCE6500)
3294 if (TARGET_SPE)
3295 error ("SPE not supported in this target");
3298 /* Disable Cell microcode if we are optimizing for the Cell
3299 and not optimizing for size. */
3300 if (rs6000_gen_cell_microcode == -1)
3301 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3302 && !optimize_size);
3304 /* If we are optimizing big endian systems for space and it's OK to
3305 use instructions that would be microcoded on the Cell, use the
3306 load/store multiple and string instructions. */
3307 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3308 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3309 | OPTION_MASK_STRING);
3311 /* Don't allow -mmultiple or -mstring on little endian systems
3312 unless the cpu is a 750, because the hardware doesn't support the
3313 instructions used in little endian mode, and causes an alignment
3314 trap. The 750 does not cause an alignment trap (except when the
3315 target is unaligned). */
3317 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3319 if (TARGET_MULTIPLE)
3321 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3322 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3323 warning (0, "-mmultiple is not supported on little endian systems");
3326 if (TARGET_STRING)
3328 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3329 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3330 warning (0, "-mstring is not supported on little endian systems");
3334 /* If little-endian, default to -mstrict-align on older processors.
3335 Testing for htm matches power8 and later. */
3336 if (!BYTES_BIG_ENDIAN
3337 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3338 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3340 /* -maltivec={le,be} implies -maltivec. */
3341 if (rs6000_altivec_element_order != 0)
3342 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3344 /* Disallow -maltivec=le in big endian mode for now. This is not
3345 known to be useful for anyone. */
3346 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3348 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3349 rs6000_altivec_element_order = 0;
3352 /* Add some warnings for VSX. */
3353 if (TARGET_VSX)
3355 const char *msg = NULL;
3356 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3357 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3359 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3360 msg = N_("-mvsx requires hardware floating point");
3361 else
3363 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3364 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3367 else if (TARGET_PAIRED_FLOAT)
3368 msg = N_("-mvsx and -mpaired are incompatible");
3369 else if (TARGET_AVOID_XFORM > 0)
3370 msg = N_("-mvsx needs indexed addressing");
3371 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3372 & OPTION_MASK_ALTIVEC))
3374 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3375 msg = N_("-mvsx and -mno-altivec are incompatible");
3376 else
3377 msg = N_("-mno-altivec disables vsx");
3380 if (msg)
3382 warning (0, msg);
3383 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3384 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3388 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3389 the -mcpu setting to enable options that conflict. */
3390 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3391 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3392 | OPTION_MASK_ALTIVEC
3393 | OPTION_MASK_VSX)) != 0)
3394 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3395 | OPTION_MASK_DIRECT_MOVE)
3396 & ~rs6000_isa_flags_explicit);
3398 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3399 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3401 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3402 unless the user explicitly used the -mno-<option> to disable the code. */
3403 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3404 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3405 else if (TARGET_VSX)
3406 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3407 else if (TARGET_POPCNTD)
3408 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3409 else if (TARGET_DFP)
3410 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3411 else if (TARGET_CMPB)
3412 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3413 else if (TARGET_FPRND)
3414 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3415 else if (TARGET_POPCNTB)
3416 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3417 else if (TARGET_ALTIVEC)
3418 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3420 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3422 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3423 error ("-mcrypto requires -maltivec");
3424 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3427 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3429 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3430 error ("-mdirect-move requires -mvsx");
3431 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3434 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3436 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3437 error ("-mpower8-vector requires -maltivec");
3438 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3441 if (TARGET_P8_VECTOR && !TARGET_VSX)
3443 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3444 error ("-mpower8-vector requires -mvsx");
3445 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3448 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3450 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3451 error ("-mvsx-timode requires -mvsx");
3452 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3455 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3457 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3458 error ("-mhard-dfp requires -mhard-float");
3459 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3462 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3463 silently turn off quad memory mode. */
3464 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3466 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3467 warning (0, N_("-mquad-memory requires 64-bit mode"));
3469 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3470 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3472 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3473 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3476 /* Non-atomic quad memory load/store are disabled for little endian, since
3477 the words are reversed, but atomic operations can still be done by
3478 swapping the words. */
3479 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3481 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3482 warning (0, N_("-mquad-memory is not available in little endian mode"));
3484 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3487 /* Assume if the user asked for normal quad memory instructions, they want
3488 the atomic versions as well, unless they explicity told us not to use quad
3489 word atomic instructions. */
3490 if (TARGET_QUAD_MEMORY
3491 && !TARGET_QUAD_MEMORY_ATOMIC
3492 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3493 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3495 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3496 generating power8 instructions. */
3497 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3498 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3499 & OPTION_MASK_P8_FUSION);
3501 /* Power8 does not fuse sign extended loads with the addis. If we are
3502 optimizing at high levels for speed, convert a sign extended load into a
3503 zero extending load, and an explicit sign extension. */
3504 if (TARGET_P8_FUSION
3505 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3506 && optimize_function_for_speed_p (cfun)
3507 && optimize >= 3)
3508 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3510 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3511 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3513 /* E500mc does "better" if we inline more aggressively. Respect the
3514 user's opinion, though. */
3515 if (rs6000_block_move_inline_limit == 0
3516 && (rs6000_cpu == PROCESSOR_PPCE500MC
3517 || rs6000_cpu == PROCESSOR_PPCE500MC64
3518 || rs6000_cpu == PROCESSOR_PPCE5500
3519 || rs6000_cpu == PROCESSOR_PPCE6500))
3520 rs6000_block_move_inline_limit = 128;
3522 /* store_one_arg depends on expand_block_move to handle at least the
3523 size of reg_parm_stack_space. */
3524 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3525 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3527 if (global_init_p)
3529 /* If the appropriate debug option is enabled, replace the target hooks
3530 with debug versions that call the real version and then prints
3531 debugging information. */
3532 if (TARGET_DEBUG_COST)
3534 targetm.rtx_costs = rs6000_debug_rtx_costs;
3535 targetm.address_cost = rs6000_debug_address_cost;
3536 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3539 if (TARGET_DEBUG_ADDR)
3541 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3542 targetm.legitimize_address = rs6000_debug_legitimize_address;
3543 rs6000_secondary_reload_class_ptr
3544 = rs6000_debug_secondary_reload_class;
3545 rs6000_secondary_memory_needed_ptr
3546 = rs6000_debug_secondary_memory_needed;
3547 rs6000_cannot_change_mode_class_ptr
3548 = rs6000_debug_cannot_change_mode_class;
3549 rs6000_preferred_reload_class_ptr
3550 = rs6000_debug_preferred_reload_class;
3551 rs6000_legitimize_reload_address_ptr
3552 = rs6000_debug_legitimize_reload_address;
3553 rs6000_mode_dependent_address_ptr
3554 = rs6000_debug_mode_dependent_address;
3557 if (rs6000_veclibabi_name)
3559 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3560 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3561 else
3563 error ("unknown vectorization library ABI type (%s) for "
3564 "-mveclibabi= switch", rs6000_veclibabi_name);
3565 ret = false;
3570 if (!global_options_set.x_rs6000_long_double_type_size)
3572 if (main_target_opt != NULL
3573 && (main_target_opt->x_rs6000_long_double_type_size
3574 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3575 error ("target attribute or pragma changes long double size");
3576 else
3577 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3580 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3581 if (!global_options_set.x_rs6000_ieeequad)
3582 rs6000_ieeequad = 1;
3583 #endif
3585 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3586 target attribute or pragma which automatically enables both options,
3587 unless the altivec ABI was set. This is set by default for 64-bit, but
3588 not for 32-bit. */
3589 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3590 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3591 & ~rs6000_isa_flags_explicit);
3593 /* Enable Altivec ABI for AIX -maltivec. */
3594 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3596 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3597 error ("target attribute or pragma changes AltiVec ABI");
3598 else
3599 rs6000_altivec_abi = 1;
3602 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3603 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3604 be explicitly overridden in either case. */
3605 if (TARGET_ELF)
3607 if (!global_options_set.x_rs6000_altivec_abi
3608 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3610 if (main_target_opt != NULL &&
3611 !main_target_opt->x_rs6000_altivec_abi)
3612 error ("target attribute or pragma changes AltiVec ABI");
3613 else
3614 rs6000_altivec_abi = 1;
3618 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3619 So far, the only darwin64 targets are also MACH-O. */
3620 if (TARGET_MACHO
3621 && DEFAULT_ABI == ABI_DARWIN
3622 && TARGET_64BIT)
3624 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3625 error ("target attribute or pragma changes darwin64 ABI");
3626 else
3628 rs6000_darwin64_abi = 1;
3629 /* Default to natural alignment, for better performance. */
3630 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3634 /* Place FP constants in the constant pool instead of TOC
3635 if section anchors enabled. */
3636 if (flag_section_anchors
3637 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3638 TARGET_NO_FP_IN_TOC = 1;
3640 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3641 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3643 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3644 SUBTARGET_OVERRIDE_OPTIONS;
3645 #endif
3646 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3647 SUBSUBTARGET_OVERRIDE_OPTIONS;
3648 #endif
3649 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3650 SUB3TARGET_OVERRIDE_OPTIONS;
3651 #endif
3653 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3654 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3656 /* For the E500 family of cores, reset the single/double FP flags to let us
3657 check that they remain constant across attributes or pragmas. Also,
3658 clear a possible request for string instructions, not supported and which
3659 we might have silently queried above for -Os.
3661 For other families, clear ISEL in case it was set implicitly.
3664 switch (rs6000_cpu)
3666 case PROCESSOR_PPC8540:
3667 case PROCESSOR_PPC8548:
3668 case PROCESSOR_PPCE500MC:
3669 case PROCESSOR_PPCE500MC64:
3670 case PROCESSOR_PPCE5500:
3671 case PROCESSOR_PPCE6500:
3673 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3674 rs6000_double_float = TARGET_E500_DOUBLE;
3676 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3678 break;
3680 default:
3682 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3683 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3685 break;
3688 if (main_target_opt)
3690 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3691 error ("target attribute or pragma changes single precision floating "
3692 "point");
3693 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3694 error ("target attribute or pragma changes double precision floating "
3695 "point");
3698 /* Detect invalid option combinations with E500. */
3699 CHECK_E500_OPTIONS;
3701 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3702 && rs6000_cpu != PROCESSOR_POWER5
3703 && rs6000_cpu != PROCESSOR_POWER6
3704 && rs6000_cpu != PROCESSOR_POWER7
3705 && rs6000_cpu != PROCESSOR_POWER8
3706 && rs6000_cpu != PROCESSOR_PPCA2
3707 && rs6000_cpu != PROCESSOR_CELL
3708 && rs6000_cpu != PROCESSOR_PPC476);
3709 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3710 || rs6000_cpu == PROCESSOR_POWER5
3711 || rs6000_cpu == PROCESSOR_POWER7
3712 || rs6000_cpu == PROCESSOR_POWER8);
3713 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3714 || rs6000_cpu == PROCESSOR_POWER5
3715 || rs6000_cpu == PROCESSOR_POWER6
3716 || rs6000_cpu == PROCESSOR_POWER7
3717 || rs6000_cpu == PROCESSOR_POWER8
3718 || rs6000_cpu == PROCESSOR_PPCE500MC
3719 || rs6000_cpu == PROCESSOR_PPCE500MC64
3720 || rs6000_cpu == PROCESSOR_PPCE5500
3721 || rs6000_cpu == PROCESSOR_PPCE6500);
3723 /* Allow debug switches to override the above settings. These are set to -1
3724 in rs6000.opt to indicate the user hasn't directly set the switch. */
3725 if (TARGET_ALWAYS_HINT >= 0)
3726 rs6000_always_hint = TARGET_ALWAYS_HINT;
3728 if (TARGET_SCHED_GROUPS >= 0)
3729 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3731 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3732 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3734 rs6000_sched_restricted_insns_priority
3735 = (rs6000_sched_groups ? 1 : 0);
3737 /* Handle -msched-costly-dep option. */
3738 rs6000_sched_costly_dep
3739 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3741 if (rs6000_sched_costly_dep_str)
3743 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3744 rs6000_sched_costly_dep = no_dep_costly;
3745 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3746 rs6000_sched_costly_dep = all_deps_costly;
3747 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3748 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3749 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3750 rs6000_sched_costly_dep = store_to_load_dep_costly;
3751 else
3752 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3753 atoi (rs6000_sched_costly_dep_str));
3756 /* Handle -minsert-sched-nops option. */
3757 rs6000_sched_insert_nops
3758 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3760 if (rs6000_sched_insert_nops_str)
3762 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3763 rs6000_sched_insert_nops = sched_finish_none;
3764 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3765 rs6000_sched_insert_nops = sched_finish_pad_groups;
3766 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3767 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3768 else
3769 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3770 atoi (rs6000_sched_insert_nops_str));
3773 if (global_init_p)
3775 #ifdef TARGET_REGNAMES
3776 /* If the user desires alternate register names, copy in the
3777 alternate names now. */
3778 if (TARGET_REGNAMES)
3779 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3780 #endif
3782 /* Set aix_struct_return last, after the ABI is determined.
3783 If -maix-struct-return or -msvr4-struct-return was explicitly
3784 used, don't override with the ABI default. */
3785 if (!global_options_set.x_aix_struct_return)
3786 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3788 #if 0
3789 /* IBM XL compiler defaults to unsigned bitfields. */
3790 if (TARGET_XL_COMPAT)
3791 flag_signed_bitfields = 0;
3792 #endif
3794 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3795 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3797 if (TARGET_TOC)
3798 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3800 /* We can only guarantee the availability of DI pseudo-ops when
3801 assembling for 64-bit targets. */
3802 if (!TARGET_64BIT)
3804 targetm.asm_out.aligned_op.di = NULL;
3805 targetm.asm_out.unaligned_op.di = NULL;
3809 /* Set branch target alignment, if not optimizing for size. */
3810 if (!optimize_size)
3812 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3813 aligned 8byte to avoid misprediction by the branch predictor. */
3814 if (rs6000_cpu == PROCESSOR_TITAN
3815 || rs6000_cpu == PROCESSOR_CELL)
3817 if (align_functions <= 0)
3818 align_functions = 8;
3819 if (align_jumps <= 0)
3820 align_jumps = 8;
3821 if (align_loops <= 0)
3822 align_loops = 8;
3824 if (rs6000_align_branch_targets)
3826 if (align_functions <= 0)
3827 align_functions = 16;
3828 if (align_jumps <= 0)
3829 align_jumps = 16;
3830 if (align_loops <= 0)
3832 can_override_loop_align = 1;
3833 align_loops = 16;
3836 if (align_jumps_max_skip <= 0)
3837 align_jumps_max_skip = 15;
3838 if (align_loops_max_skip <= 0)
3839 align_loops_max_skip = 15;
3842 /* Arrange to save and restore machine status around nested functions. */
3843 init_machine_status = rs6000_init_machine_status;
3845 /* We should always be splitting complex arguments, but we can't break
3846 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3847 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3848 targetm.calls.split_complex_arg = NULL;
3851 /* Initialize rs6000_cost with the appropriate target costs. */
3852 if (optimize_size)
3853 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3854 else
3855 switch (rs6000_cpu)
3857 case PROCESSOR_RS64A:
3858 rs6000_cost = &rs64a_cost;
3859 break;
3861 case PROCESSOR_MPCCORE:
3862 rs6000_cost = &mpccore_cost;
3863 break;
3865 case PROCESSOR_PPC403:
3866 rs6000_cost = &ppc403_cost;
3867 break;
3869 case PROCESSOR_PPC405:
3870 rs6000_cost = &ppc405_cost;
3871 break;
3873 case PROCESSOR_PPC440:
3874 rs6000_cost = &ppc440_cost;
3875 break;
3877 case PROCESSOR_PPC476:
3878 rs6000_cost = &ppc476_cost;
3879 break;
3881 case PROCESSOR_PPC601:
3882 rs6000_cost = &ppc601_cost;
3883 break;
3885 case PROCESSOR_PPC603:
3886 rs6000_cost = &ppc603_cost;
3887 break;
3889 case PROCESSOR_PPC604:
3890 rs6000_cost = &ppc604_cost;
3891 break;
3893 case PROCESSOR_PPC604e:
3894 rs6000_cost = &ppc604e_cost;
3895 break;
3897 case PROCESSOR_PPC620:
3898 rs6000_cost = &ppc620_cost;
3899 break;
3901 case PROCESSOR_PPC630:
3902 rs6000_cost = &ppc630_cost;
3903 break;
3905 case PROCESSOR_CELL:
3906 rs6000_cost = &ppccell_cost;
3907 break;
3909 case PROCESSOR_PPC750:
3910 case PROCESSOR_PPC7400:
3911 rs6000_cost = &ppc750_cost;
3912 break;
3914 case PROCESSOR_PPC7450:
3915 rs6000_cost = &ppc7450_cost;
3916 break;
3918 case PROCESSOR_PPC8540:
3919 case PROCESSOR_PPC8548:
3920 rs6000_cost = &ppc8540_cost;
3921 break;
3923 case PROCESSOR_PPCE300C2:
3924 case PROCESSOR_PPCE300C3:
3925 rs6000_cost = &ppce300c2c3_cost;
3926 break;
3928 case PROCESSOR_PPCE500MC:
3929 rs6000_cost = &ppce500mc_cost;
3930 break;
3932 case PROCESSOR_PPCE500MC64:
3933 rs6000_cost = &ppce500mc64_cost;
3934 break;
3936 case PROCESSOR_PPCE5500:
3937 rs6000_cost = &ppce5500_cost;
3938 break;
3940 case PROCESSOR_PPCE6500:
3941 rs6000_cost = &ppce6500_cost;
3942 break;
3944 case PROCESSOR_TITAN:
3945 rs6000_cost = &titan_cost;
3946 break;
3948 case PROCESSOR_POWER4:
3949 case PROCESSOR_POWER5:
3950 rs6000_cost = &power4_cost;
3951 break;
3953 case PROCESSOR_POWER6:
3954 rs6000_cost = &power6_cost;
3955 break;
3957 case PROCESSOR_POWER7:
3958 rs6000_cost = &power7_cost;
3959 break;
3961 case PROCESSOR_POWER8:
3962 rs6000_cost = &power8_cost;
3963 break;
3965 case PROCESSOR_PPCA2:
3966 rs6000_cost = &ppca2_cost;
3967 break;
3969 default:
3970 gcc_unreachable ();
3973 if (global_init_p)
3975 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3976 rs6000_cost->simultaneous_prefetches,
3977 global_options.x_param_values,
3978 global_options_set.x_param_values);
3979 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
3980 global_options.x_param_values,
3981 global_options_set.x_param_values);
3982 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3983 rs6000_cost->cache_line_size,
3984 global_options.x_param_values,
3985 global_options_set.x_param_values);
3986 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
3987 global_options.x_param_values,
3988 global_options_set.x_param_values);
3990 /* Increase loop peeling limits based on performance analysis. */
3991 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
3992 global_options.x_param_values,
3993 global_options_set.x_param_values);
3994 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
3995 global_options.x_param_values,
3996 global_options_set.x_param_values);
3998 /* If using typedef char *va_list, signal that
3999 __builtin_va_start (&ap, 0) can be optimized to
4000 ap = __builtin_next_arg (0). */
4001 if (DEFAULT_ABI != ABI_V4)
4002 targetm.expand_builtin_va_start = NULL;
4005 /* Set up single/double float flags.
4006 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4007 then set both flags. */
4008 if (TARGET_HARD_FLOAT && TARGET_FPRS
4009 && rs6000_single_float == 0 && rs6000_double_float == 0)
4010 rs6000_single_float = rs6000_double_float = 1;
4012 /* If not explicitly specified via option, decide whether to generate indexed
4013 load/store instructions. */
4014 if (TARGET_AVOID_XFORM == -1)
4015 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4016 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4017 need indexed accesses and the type used is the scalar type of the element
4018 being loaded or stored. */
4019 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4020 && !TARGET_ALTIVEC);
4022 /* Set the -mrecip options. */
4023 if (rs6000_recip_name)
4025 char *p = ASTRDUP (rs6000_recip_name);
4026 char *q;
4027 unsigned int mask, i;
4028 bool invert;
4030 while ((q = strtok (p, ",")) != NULL)
4032 p = NULL;
4033 if (*q == '!')
4035 invert = true;
4036 q++;
4038 else
4039 invert = false;
4041 if (!strcmp (q, "default"))
4042 mask = ((TARGET_RECIP_PRECISION)
4043 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4044 else
4046 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4047 if (!strcmp (q, recip_options[i].string))
4049 mask = recip_options[i].mask;
4050 break;
4053 if (i == ARRAY_SIZE (recip_options))
4055 error ("unknown option for -mrecip=%s", q);
4056 invert = false;
4057 mask = 0;
4058 ret = false;
4062 if (invert)
4063 rs6000_recip_control &= ~mask;
4064 else
4065 rs6000_recip_control |= mask;
4069 /* Set the builtin mask of the various options used that could affect which
4070 builtins were used. In the past we used target_flags, but we've run out
4071 of bits, and some options like SPE and PAIRED are no longer in
4072 target_flags. */
4073 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4074 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4076 fprintf (stderr,
4077 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4078 rs6000_builtin_mask);
4079 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4082 /* Initialize all of the registers. */
4083 rs6000_init_hard_regno_mode_ok (global_init_p);
4085 /* Save the initial options in case the user does function specific options */
4086 if (global_init_p)
4087 target_option_default_node = target_option_current_node
4088 = build_target_option_node (&global_options);
4090 /* If not explicitly specified via option, decide whether to generate the
4091 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4092 if (TARGET_LINK_STACK == -1)
4093 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4095 return ret;
4098 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4099 define the target cpu type. */
4101 static void
4102 rs6000_option_override (void)
4104 (void) rs6000_option_override_internal (true);
4106 /* Register machine-specific passes. This needs to be done at start-up.
4107 It's convenient to do it here (like i386 does). */
4108 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4110 static struct register_pass_info analyze_swaps_info
4111 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4113 register_pass (&analyze_swaps_info);
4117 /* Implement targetm.vectorize.builtin_mask_for_load. */
4118 static tree
4119 rs6000_builtin_mask_for_load (void)
4121 if (TARGET_ALTIVEC || TARGET_VSX)
4122 return altivec_builtin_mask_for_load;
4123 else
4124 return 0;
4127 /* Implement LOOP_ALIGN. */
4129 rs6000_loop_align (rtx label)
4131 basic_block bb;
4132 int ninsns;
4134 /* Don't override loop alignment if -falign-loops was specified. */
4135 if (!can_override_loop_align)
4136 return align_loops_log;
4138 bb = BLOCK_FOR_INSN (label);
4139 ninsns = num_loop_insns(bb->loop_father);
4141 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4142 if (ninsns > 4 && ninsns <= 8
4143 && (rs6000_cpu == PROCESSOR_POWER4
4144 || rs6000_cpu == PROCESSOR_POWER5
4145 || rs6000_cpu == PROCESSOR_POWER6
4146 || rs6000_cpu == PROCESSOR_POWER7
4147 || rs6000_cpu == PROCESSOR_POWER8))
4148 return 5;
4149 else
4150 return align_loops_log;
4153 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4154 static int
4155 rs6000_loop_align_max_skip (rtx_insn *label)
4157 return (1 << rs6000_loop_align (label)) - 1;
4160 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4161 after applying N number of iterations. This routine does not determine
4162 how may iterations are required to reach desired alignment. */
4164 static bool
4165 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4167 if (is_packed)
4168 return false;
4170 if (TARGET_32BIT)
4172 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4173 return true;
4175 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4176 return true;
4178 return false;
4180 else
4182 if (TARGET_MACHO)
4183 return false;
4185 /* Assuming that all other types are naturally aligned. CHECKME! */
4186 return true;
4190 /* Return true if the vector misalignment factor is supported by the
4191 target. */
4192 static bool
4193 rs6000_builtin_support_vector_misalignment (enum machine_mode mode,
4194 const_tree type,
4195 int misalignment,
4196 bool is_packed)
4198 if (TARGET_VSX)
4200 /* Return if movmisalign pattern is not supported for this mode. */
4201 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4202 return false;
4204 if (misalignment == -1)
4206 /* Misalignment factor is unknown at compile time but we know
4207 it's word aligned. */
4208 if (rs6000_vector_alignment_reachable (type, is_packed))
4210 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4212 if (element_size == 64 || element_size == 32)
4213 return true;
4216 return false;
4219 /* VSX supports word-aligned vector. */
4220 if (misalignment % 4 == 0)
4221 return true;
4223 return false;
4226 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4227 static int
4228 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4229 tree vectype, int misalign)
4231 unsigned elements;
4232 tree elem_type;
4234 switch (type_of_cost)
4236 case scalar_stmt:
4237 case scalar_load:
4238 case scalar_store:
4239 case vector_stmt:
4240 case vector_load:
4241 case vector_store:
4242 case vec_to_scalar:
4243 case scalar_to_vec:
4244 case cond_branch_not_taken:
4245 return 1;
4247 case vec_perm:
4248 if (TARGET_VSX)
4249 return 3;
4250 else
4251 return 1;
4253 case vec_promote_demote:
4254 if (TARGET_VSX)
4255 return 4;
4256 else
4257 return 1;
4259 case cond_branch_taken:
4260 return 3;
4262 case unaligned_load:
4263 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4265 elements = TYPE_VECTOR_SUBPARTS (vectype);
4266 if (elements == 2)
4267 /* Double word aligned. */
4268 return 2;
4270 if (elements == 4)
4272 switch (misalign)
4274 case 8:
4275 /* Double word aligned. */
4276 return 2;
4278 case -1:
4279 /* Unknown misalignment. */
4280 case 4:
4281 case 12:
4282 /* Word aligned. */
4283 return 22;
4285 default:
4286 gcc_unreachable ();
4291 if (TARGET_ALTIVEC)
4292 /* Misaligned loads are not supported. */
4293 gcc_unreachable ();
4295 return 2;
4297 case unaligned_store:
4298 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4300 elements = TYPE_VECTOR_SUBPARTS (vectype);
4301 if (elements == 2)
4302 /* Double word aligned. */
4303 return 2;
4305 if (elements == 4)
4307 switch (misalign)
4309 case 8:
4310 /* Double word aligned. */
4311 return 2;
4313 case -1:
4314 /* Unknown misalignment. */
4315 case 4:
4316 case 12:
4317 /* Word aligned. */
4318 return 23;
4320 default:
4321 gcc_unreachable ();
4326 if (TARGET_ALTIVEC)
4327 /* Misaligned stores are not supported. */
4328 gcc_unreachable ();
4330 return 2;
4332 case vec_construct:
4333 elements = TYPE_VECTOR_SUBPARTS (vectype);
4334 elem_type = TREE_TYPE (vectype);
4335 /* 32-bit vectors loaded into registers are stored as double
4336 precision, so we need n/2 converts in addition to the usual
4337 n/2 merges to construct a vector of short floats from them. */
4338 if (SCALAR_FLOAT_TYPE_P (elem_type)
4339 && TYPE_PRECISION (elem_type) == 32)
4340 return elements + 1;
4341 else
4342 return elements / 2 + 1;
4344 default:
4345 gcc_unreachable ();
4349 /* Implement targetm.vectorize.preferred_simd_mode. */
4351 static enum machine_mode
4352 rs6000_preferred_simd_mode (enum machine_mode mode)
4354 if (TARGET_VSX)
4355 switch (mode)
4357 case DFmode:
4358 return V2DFmode;
4359 default:;
4361 if (TARGET_ALTIVEC || TARGET_VSX)
4362 switch (mode)
4364 case SFmode:
4365 return V4SFmode;
4366 case TImode:
4367 return V1TImode;
4368 case DImode:
4369 return V2DImode;
4370 case SImode:
4371 return V4SImode;
4372 case HImode:
4373 return V8HImode;
4374 case QImode:
4375 return V16QImode;
4376 default:;
4378 if (TARGET_SPE)
4379 switch (mode)
4381 case SFmode:
4382 return V2SFmode;
4383 case SImode:
4384 return V2SImode;
4385 default:;
4387 if (TARGET_PAIRED_FLOAT
4388 && mode == SFmode)
4389 return V2SFmode;
4390 return word_mode;
4393 typedef struct _rs6000_cost_data
4395 struct loop *loop_info;
4396 unsigned cost[3];
4397 } rs6000_cost_data;
4399 /* Test for likely overcommitment of vector hardware resources. If a
4400 loop iteration is relatively large, and too large a percentage of
4401 instructions in the loop are vectorized, the cost model may not
4402 adequately reflect delays from unavailable vector resources.
4403 Penalize the loop body cost for this case. */
4405 static void
4406 rs6000_density_test (rs6000_cost_data *data)
4408 const int DENSITY_PCT_THRESHOLD = 85;
4409 const int DENSITY_SIZE_THRESHOLD = 70;
4410 const int DENSITY_PENALTY = 10;
4411 struct loop *loop = data->loop_info;
4412 basic_block *bbs = get_loop_body (loop);
4413 int nbbs = loop->num_nodes;
4414 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4415 int i, density_pct;
4417 for (i = 0; i < nbbs; i++)
4419 basic_block bb = bbs[i];
4420 gimple_stmt_iterator gsi;
4422 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4424 gimple stmt = gsi_stmt (gsi);
4425 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4427 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4428 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4429 not_vec_cost++;
4433 free (bbs);
4434 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4436 if (density_pct > DENSITY_PCT_THRESHOLD
4437 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4439 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4440 if (dump_enabled_p ())
4441 dump_printf_loc (MSG_NOTE, vect_location,
4442 "density %d%%, cost %d exceeds threshold, penalizing "
4443 "loop body cost by %d%%", density_pct,
4444 vec_cost + not_vec_cost, DENSITY_PENALTY);
4448 /* Implement targetm.vectorize.init_cost. */
4450 static void *
4451 rs6000_init_cost (struct loop *loop_info)
4453 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4454 data->loop_info = loop_info;
4455 data->cost[vect_prologue] = 0;
4456 data->cost[vect_body] = 0;
4457 data->cost[vect_epilogue] = 0;
4458 return data;
4461 /* Implement targetm.vectorize.add_stmt_cost. */
4463 static unsigned
4464 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4465 struct _stmt_vec_info *stmt_info, int misalign,
4466 enum vect_cost_model_location where)
4468 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4469 unsigned retval = 0;
4471 if (flag_vect_cost_model)
4473 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4474 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4475 misalign);
4476 /* Statements in an inner loop relative to the loop being
4477 vectorized are weighted more heavily. The value here is
4478 arbitrary and could potentially be improved with analysis. */
4479 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4480 count *= 50; /* FIXME. */
4482 retval = (unsigned) (count * stmt_cost);
4483 cost_data->cost[where] += retval;
4486 return retval;
4489 /* Implement targetm.vectorize.finish_cost. */
4491 static void
4492 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4493 unsigned *body_cost, unsigned *epilogue_cost)
4495 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4497 if (cost_data->loop_info)
4498 rs6000_density_test (cost_data);
4500 *prologue_cost = cost_data->cost[vect_prologue];
4501 *body_cost = cost_data->cost[vect_body];
4502 *epilogue_cost = cost_data->cost[vect_epilogue];
4505 /* Implement targetm.vectorize.destroy_cost_data. */
4507 static void
4508 rs6000_destroy_cost_data (void *data)
4510 free (data);
4513 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4514 library with vectorized intrinsics. */
4516 static tree
4517 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4519 char name[32];
4520 const char *suffix = NULL;
4521 tree fntype, new_fndecl, bdecl = NULL_TREE;
4522 int n_args = 1;
4523 const char *bname;
4524 enum machine_mode el_mode, in_mode;
4525 int n, in_n;
4527 /* Libmass is suitable for unsafe math only as it does not correctly support
4528 parts of IEEE with the required precision such as denormals. Only support
4529 it if we have VSX to use the simd d2 or f4 functions.
4530 XXX: Add variable length support. */
4531 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4532 return NULL_TREE;
4534 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4535 n = TYPE_VECTOR_SUBPARTS (type_out);
4536 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4537 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4538 if (el_mode != in_mode
4539 || n != in_n)
4540 return NULL_TREE;
4542 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4544 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4545 switch (fn)
4547 case BUILT_IN_ATAN2:
4548 case BUILT_IN_HYPOT:
4549 case BUILT_IN_POW:
4550 n_args = 2;
4551 /* fall through */
4553 case BUILT_IN_ACOS:
4554 case BUILT_IN_ACOSH:
4555 case BUILT_IN_ASIN:
4556 case BUILT_IN_ASINH:
4557 case BUILT_IN_ATAN:
4558 case BUILT_IN_ATANH:
4559 case BUILT_IN_CBRT:
4560 case BUILT_IN_COS:
4561 case BUILT_IN_COSH:
4562 case BUILT_IN_ERF:
4563 case BUILT_IN_ERFC:
4564 case BUILT_IN_EXP2:
4565 case BUILT_IN_EXP:
4566 case BUILT_IN_EXPM1:
4567 case BUILT_IN_LGAMMA:
4568 case BUILT_IN_LOG10:
4569 case BUILT_IN_LOG1P:
4570 case BUILT_IN_LOG2:
4571 case BUILT_IN_LOG:
4572 case BUILT_IN_SIN:
4573 case BUILT_IN_SINH:
4574 case BUILT_IN_SQRT:
4575 case BUILT_IN_TAN:
4576 case BUILT_IN_TANH:
4577 bdecl = builtin_decl_implicit (fn);
4578 suffix = "d2"; /* pow -> powd2 */
4579 if (el_mode != DFmode
4580 || n != 2
4581 || !bdecl)
4582 return NULL_TREE;
4583 break;
4585 case BUILT_IN_ATAN2F:
4586 case BUILT_IN_HYPOTF:
4587 case BUILT_IN_POWF:
4588 n_args = 2;
4589 /* fall through */
4591 case BUILT_IN_ACOSF:
4592 case BUILT_IN_ACOSHF:
4593 case BUILT_IN_ASINF:
4594 case BUILT_IN_ASINHF:
4595 case BUILT_IN_ATANF:
4596 case BUILT_IN_ATANHF:
4597 case BUILT_IN_CBRTF:
4598 case BUILT_IN_COSF:
4599 case BUILT_IN_COSHF:
4600 case BUILT_IN_ERFF:
4601 case BUILT_IN_ERFCF:
4602 case BUILT_IN_EXP2F:
4603 case BUILT_IN_EXPF:
4604 case BUILT_IN_EXPM1F:
4605 case BUILT_IN_LGAMMAF:
4606 case BUILT_IN_LOG10F:
4607 case BUILT_IN_LOG1PF:
4608 case BUILT_IN_LOG2F:
4609 case BUILT_IN_LOGF:
4610 case BUILT_IN_SINF:
4611 case BUILT_IN_SINHF:
4612 case BUILT_IN_SQRTF:
4613 case BUILT_IN_TANF:
4614 case BUILT_IN_TANHF:
4615 bdecl = builtin_decl_implicit (fn);
4616 suffix = "4"; /* powf -> powf4 */
4617 if (el_mode != SFmode
4618 || n != 4
4619 || !bdecl)
4620 return NULL_TREE;
4621 break;
4623 default:
4624 return NULL_TREE;
4627 else
4628 return NULL_TREE;
4630 gcc_assert (suffix != NULL);
4631 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4632 if (!bname)
4633 return NULL_TREE;
4635 strcpy (name, bname + sizeof ("__builtin_") - 1);
4636 strcat (name, suffix);
4638 if (n_args == 1)
4639 fntype = build_function_type_list (type_out, type_in, NULL);
4640 else if (n_args == 2)
4641 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4642 else
4643 gcc_unreachable ();
4645 /* Build a function declaration for the vectorized function. */
4646 new_fndecl = build_decl (BUILTINS_LOCATION,
4647 FUNCTION_DECL, get_identifier (name), fntype);
4648 TREE_PUBLIC (new_fndecl) = 1;
4649 DECL_EXTERNAL (new_fndecl) = 1;
4650 DECL_IS_NOVOPS (new_fndecl) = 1;
4651 TREE_READONLY (new_fndecl) = 1;
4653 return new_fndecl;
4656 /* Returns a function decl for a vectorized version of the builtin function
4657 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4658 if it is not available. */
4660 static tree
4661 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4662 tree type_in)
4664 enum machine_mode in_mode, out_mode;
4665 int in_n, out_n;
4667 if (TARGET_DEBUG_BUILTIN)
4668 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4669 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4670 GET_MODE_NAME (TYPE_MODE (type_out)),
4671 GET_MODE_NAME (TYPE_MODE (type_in)));
4673 if (TREE_CODE (type_out) != VECTOR_TYPE
4674 || TREE_CODE (type_in) != VECTOR_TYPE
4675 || !TARGET_VECTORIZE_BUILTINS)
4676 return NULL_TREE;
4678 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4679 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4680 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4681 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4683 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4685 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4686 switch (fn)
4688 case BUILT_IN_CLZIMAX:
4689 case BUILT_IN_CLZLL:
4690 case BUILT_IN_CLZL:
4691 case BUILT_IN_CLZ:
4692 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4694 if (out_mode == QImode && out_n == 16)
4695 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4696 else if (out_mode == HImode && out_n == 8)
4697 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4698 else if (out_mode == SImode && out_n == 4)
4699 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4700 else if (out_mode == DImode && out_n == 2)
4701 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4703 break;
4704 case BUILT_IN_COPYSIGN:
4705 if (VECTOR_UNIT_VSX_P (V2DFmode)
4706 && out_mode == DFmode && out_n == 2
4707 && in_mode == DFmode && in_n == 2)
4708 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4709 break;
4710 case BUILT_IN_COPYSIGNF:
4711 if (out_mode != SFmode || out_n != 4
4712 || in_mode != SFmode || in_n != 4)
4713 break;
4714 if (VECTOR_UNIT_VSX_P (V4SFmode))
4715 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4716 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4717 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4718 break;
4719 case BUILT_IN_POPCOUNTIMAX:
4720 case BUILT_IN_POPCOUNTLL:
4721 case BUILT_IN_POPCOUNTL:
4722 case BUILT_IN_POPCOUNT:
4723 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4725 if (out_mode == QImode && out_n == 16)
4726 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4727 else if (out_mode == HImode && out_n == 8)
4728 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4729 else if (out_mode == SImode && out_n == 4)
4730 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4731 else if (out_mode == DImode && out_n == 2)
4732 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4734 break;
4735 case BUILT_IN_SQRT:
4736 if (VECTOR_UNIT_VSX_P (V2DFmode)
4737 && out_mode == DFmode && out_n == 2
4738 && in_mode == DFmode && in_n == 2)
4739 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4740 break;
4741 case BUILT_IN_SQRTF:
4742 if (VECTOR_UNIT_VSX_P (V4SFmode)
4743 && out_mode == SFmode && out_n == 4
4744 && in_mode == SFmode && in_n == 4)
4745 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4746 break;
4747 case BUILT_IN_CEIL:
4748 if (VECTOR_UNIT_VSX_P (V2DFmode)
4749 && out_mode == DFmode && out_n == 2
4750 && in_mode == DFmode && in_n == 2)
4751 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4752 break;
4753 case BUILT_IN_CEILF:
4754 if (out_mode != SFmode || out_n != 4
4755 || in_mode != SFmode || in_n != 4)
4756 break;
4757 if (VECTOR_UNIT_VSX_P (V4SFmode))
4758 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4759 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4760 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4761 break;
4762 case BUILT_IN_FLOOR:
4763 if (VECTOR_UNIT_VSX_P (V2DFmode)
4764 && out_mode == DFmode && out_n == 2
4765 && in_mode == DFmode && in_n == 2)
4766 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4767 break;
4768 case BUILT_IN_FLOORF:
4769 if (out_mode != SFmode || out_n != 4
4770 || in_mode != SFmode || in_n != 4)
4771 break;
4772 if (VECTOR_UNIT_VSX_P (V4SFmode))
4773 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4774 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4775 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4776 break;
4777 case BUILT_IN_FMA:
4778 if (VECTOR_UNIT_VSX_P (V2DFmode)
4779 && out_mode == DFmode && out_n == 2
4780 && in_mode == DFmode && in_n == 2)
4781 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4782 break;
4783 case BUILT_IN_FMAF:
4784 if (VECTOR_UNIT_VSX_P (V4SFmode)
4785 && out_mode == SFmode && out_n == 4
4786 && in_mode == SFmode && in_n == 4)
4787 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4788 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4789 && out_mode == SFmode && out_n == 4
4790 && in_mode == SFmode && in_n == 4)
4791 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4792 break;
4793 case BUILT_IN_TRUNC:
4794 if (VECTOR_UNIT_VSX_P (V2DFmode)
4795 && out_mode == DFmode && out_n == 2
4796 && in_mode == DFmode && in_n == 2)
4797 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4798 break;
4799 case BUILT_IN_TRUNCF:
4800 if (out_mode != SFmode || out_n != 4
4801 || in_mode != SFmode || in_n != 4)
4802 break;
4803 if (VECTOR_UNIT_VSX_P (V4SFmode))
4804 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4805 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4806 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4807 break;
4808 case BUILT_IN_NEARBYINT:
4809 if (VECTOR_UNIT_VSX_P (V2DFmode)
4810 && flag_unsafe_math_optimizations
4811 && out_mode == DFmode && out_n == 2
4812 && in_mode == DFmode && in_n == 2)
4813 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4814 break;
4815 case BUILT_IN_NEARBYINTF:
4816 if (VECTOR_UNIT_VSX_P (V4SFmode)
4817 && flag_unsafe_math_optimizations
4818 && out_mode == SFmode && out_n == 4
4819 && in_mode == SFmode && in_n == 4)
4820 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4821 break;
4822 case BUILT_IN_RINT:
4823 if (VECTOR_UNIT_VSX_P (V2DFmode)
4824 && !flag_trapping_math
4825 && out_mode == DFmode && out_n == 2
4826 && in_mode == DFmode && in_n == 2)
4827 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4828 break;
4829 case BUILT_IN_RINTF:
4830 if (VECTOR_UNIT_VSX_P (V4SFmode)
4831 && !flag_trapping_math
4832 && out_mode == SFmode && out_n == 4
4833 && in_mode == SFmode && in_n == 4)
4834 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4835 break;
4836 default:
4837 break;
4841 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4843 enum rs6000_builtins fn
4844 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4845 switch (fn)
4847 case RS6000_BUILTIN_RSQRTF:
4848 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4849 && out_mode == SFmode && out_n == 4
4850 && in_mode == SFmode && in_n == 4)
4851 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4852 break;
4853 case RS6000_BUILTIN_RSQRT:
4854 if (VECTOR_UNIT_VSX_P (V2DFmode)
4855 && out_mode == DFmode && out_n == 2
4856 && in_mode == DFmode && in_n == 2)
4857 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4858 break;
4859 case RS6000_BUILTIN_RECIPF:
4860 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4861 && out_mode == SFmode && out_n == 4
4862 && in_mode == SFmode && in_n == 4)
4863 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4864 break;
4865 case RS6000_BUILTIN_RECIP:
4866 if (VECTOR_UNIT_VSX_P (V2DFmode)
4867 && out_mode == DFmode && out_n == 2
4868 && in_mode == DFmode && in_n == 2)
4869 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
4870 break;
4871 default:
4872 break;
4876 /* Generate calls to libmass if appropriate. */
4877 if (rs6000_veclib_handler)
4878 return rs6000_veclib_handler (fndecl, type_out, type_in);
4880 return NULL_TREE;
4883 /* Default CPU string for rs6000*_file_start functions. */
4884 static const char *rs6000_default_cpu;
4886 /* Do anything needed at the start of the asm file. */
4888 static void
4889 rs6000_file_start (void)
4891 char buffer[80];
4892 const char *start = buffer;
4893 FILE *file = asm_out_file;
4895 rs6000_default_cpu = TARGET_CPU_DEFAULT;
4897 default_file_start ();
4899 if (flag_verbose_asm)
4901 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
4903 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
4905 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
4906 start = "";
4909 if (global_options_set.x_rs6000_cpu_index)
4911 fprintf (file, "%s -mcpu=%s", start,
4912 processor_target_table[rs6000_cpu_index].name);
4913 start = "";
4916 if (global_options_set.x_rs6000_tune_index)
4918 fprintf (file, "%s -mtune=%s", start,
4919 processor_target_table[rs6000_tune_index].name);
4920 start = "";
4923 if (PPC405_ERRATUM77)
4925 fprintf (file, "%s PPC405CR_ERRATUM77", start);
4926 start = "";
4929 #ifdef USING_ELFOS_H
4930 switch (rs6000_sdata)
4932 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
4933 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
4934 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
4935 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
4938 if (rs6000_sdata && g_switch_value)
4940 fprintf (file, "%s -G %d", start,
4941 g_switch_value);
4942 start = "";
4944 #endif
4946 if (*start == '\0')
4947 putc ('\n', file);
4950 if (DEFAULT_ABI == ABI_ELFv2)
4951 fprintf (file, "\t.abiversion 2\n");
4953 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
4954 || (TARGET_ELF && flag_pic == 2))
4956 switch_to_section (toc_section);
4957 switch_to_section (text_section);
4962 /* Return nonzero if this function is known to have a null epilogue. */
4965 direct_return (void)
4967 if (reload_completed)
4969 rs6000_stack_t *info = rs6000_stack_info ();
4971 if (info->first_gp_reg_save == 32
4972 && info->first_fp_reg_save == 64
4973 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
4974 && ! info->lr_save_p
4975 && ! info->cr_save_p
4976 && info->vrsave_mask == 0
4977 && ! info->push_p)
4978 return 1;
4981 return 0;
4984 /* Return the number of instructions it takes to form a constant in an
4985 integer register. */
4988 num_insns_constant_wide (HOST_WIDE_INT value)
4990 /* signed constant loadable with addi */
4991 if ((unsigned HOST_WIDE_INT) (value + 0x8000) < 0x10000)
4992 return 1;
4994 /* constant loadable with addis */
4995 else if ((value & 0xffff) == 0
4996 && (value >> 31 == -1 || value >> 31 == 0))
4997 return 1;
4999 else if (TARGET_POWERPC64)
5001 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5002 HOST_WIDE_INT high = value >> 31;
5004 if (high == 0 || high == -1)
5005 return 2;
5007 high >>= 1;
5009 if (low == 0)
5010 return num_insns_constant_wide (high) + 1;
5011 else if (high == 0)
5012 return num_insns_constant_wide (low) + 1;
5013 else
5014 return (num_insns_constant_wide (high)
5015 + num_insns_constant_wide (low) + 1);
5018 else
5019 return 2;
5023 num_insns_constant (rtx op, enum machine_mode mode)
5025 HOST_WIDE_INT low, high;
5027 switch (GET_CODE (op))
5029 case CONST_INT:
5030 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5031 && mask64_operand (op, mode))
5032 return 2;
5033 else
5034 return num_insns_constant_wide (INTVAL (op));
5036 case CONST_WIDE_INT:
5038 int i;
5039 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5040 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5041 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5042 return ins;
5045 case CONST_DOUBLE:
5046 if (mode == SFmode || mode == SDmode)
5048 long l;
5049 REAL_VALUE_TYPE rv;
5051 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5052 if (DECIMAL_FLOAT_MODE_P (mode))
5053 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5054 else
5055 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5056 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5059 long l[2];
5060 REAL_VALUE_TYPE rv;
5062 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5063 if (DECIMAL_FLOAT_MODE_P (mode))
5064 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5065 else
5066 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5067 high = l[WORDS_BIG_ENDIAN == 0];
5068 low = l[WORDS_BIG_ENDIAN != 0];
5070 if (TARGET_32BIT)
5071 return (num_insns_constant_wide (low)
5072 + num_insns_constant_wide (high));
5073 else
5075 if ((high == 0 && low >= 0)
5076 || (high == -1 && low < 0))
5077 return num_insns_constant_wide (low);
5079 else if (mask64_operand (op, mode))
5080 return 2;
5082 else if (low == 0)
5083 return num_insns_constant_wide (high) + 1;
5085 else
5086 return (num_insns_constant_wide (high)
5087 + num_insns_constant_wide (low) + 1);
5090 default:
5091 gcc_unreachable ();
5095 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5096 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5097 corresponding element of the vector, but for V4SFmode and V2SFmode,
5098 the corresponding "float" is interpreted as an SImode integer. */
5100 HOST_WIDE_INT
5101 const_vector_elt_as_int (rtx op, unsigned int elt)
5103 rtx tmp;
5105 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5106 gcc_assert (GET_MODE (op) != V2DImode
5107 && GET_MODE (op) != V2DFmode);
5109 tmp = CONST_VECTOR_ELT (op, elt);
5110 if (GET_MODE (op) == V4SFmode
5111 || GET_MODE (op) == V2SFmode)
5112 tmp = gen_lowpart (SImode, tmp);
5113 return INTVAL (tmp);
5116 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5117 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5118 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5119 all items are set to the same value and contain COPIES replicas of the
5120 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5121 operand and the others are set to the value of the operand's msb. */
5123 static bool
5124 vspltis_constant (rtx op, unsigned step, unsigned copies)
5126 enum machine_mode mode = GET_MODE (op);
5127 enum machine_mode inner = GET_MODE_INNER (mode);
5129 unsigned i;
5130 unsigned nunits;
5131 unsigned bitsize;
5132 unsigned mask;
5134 HOST_WIDE_INT val;
5135 HOST_WIDE_INT splat_val;
5136 HOST_WIDE_INT msb_val;
5138 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5139 return false;
5141 nunits = GET_MODE_NUNITS (mode);
5142 bitsize = GET_MODE_BITSIZE (inner);
5143 mask = GET_MODE_MASK (inner);
5145 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5146 splat_val = val;
5147 msb_val = val >= 0 ? 0 : -1;
5149 /* Construct the value to be splatted, if possible. If not, return 0. */
5150 for (i = 2; i <= copies; i *= 2)
5152 HOST_WIDE_INT small_val;
5153 bitsize /= 2;
5154 small_val = splat_val >> bitsize;
5155 mask >>= bitsize;
5156 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5157 return false;
5158 splat_val = small_val;
5161 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5162 if (EASY_VECTOR_15 (splat_val))
5165 /* Also check if we can splat, and then add the result to itself. Do so if
5166 the value is positive, of if the splat instruction is using OP's mode;
5167 for splat_val < 0, the splat and the add should use the same mode. */
5168 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5169 && (splat_val >= 0 || (step == 1 && copies == 1)))
5172 /* Also check if are loading up the most significant bit which can be done by
5173 loading up -1 and shifting the value left by -1. */
5174 else if (EASY_VECTOR_MSB (splat_val, inner))
5177 else
5178 return false;
5180 /* Check if VAL is present in every STEP-th element, and the
5181 other elements are filled with its most significant bit. */
5182 for (i = 1; i < nunits; ++i)
5184 HOST_WIDE_INT desired_val;
5185 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5186 if ((i & (step - 1)) == 0)
5187 desired_val = val;
5188 else
5189 desired_val = msb_val;
5191 if (desired_val != const_vector_elt_as_int (op, elt))
5192 return false;
5195 return true;
5199 /* Return true if OP is of the given MODE and can be synthesized
5200 with a vspltisb, vspltish or vspltisw. */
5202 bool
5203 easy_altivec_constant (rtx op, enum machine_mode mode)
5205 unsigned step, copies;
5207 if (mode == VOIDmode)
5208 mode = GET_MODE (op);
5209 else if (mode != GET_MODE (op))
5210 return false;
5212 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5213 constants. */
5214 if (mode == V2DFmode)
5215 return zero_constant (op, mode);
5217 else if (mode == V2DImode)
5219 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5220 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5221 return false;
5223 if (zero_constant (op, mode))
5224 return true;
5226 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5227 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5228 return true;
5230 return false;
5233 /* V1TImode is a special container for TImode. Ignore for now. */
5234 else if (mode == V1TImode)
5235 return false;
5237 /* Start with a vspltisw. */
5238 step = GET_MODE_NUNITS (mode) / 4;
5239 copies = 1;
5241 if (vspltis_constant (op, step, copies))
5242 return true;
5244 /* Then try with a vspltish. */
5245 if (step == 1)
5246 copies <<= 1;
5247 else
5248 step >>= 1;
5250 if (vspltis_constant (op, step, copies))
5251 return true;
5253 /* And finally a vspltisb. */
5254 if (step == 1)
5255 copies <<= 1;
5256 else
5257 step >>= 1;
5259 if (vspltis_constant (op, step, copies))
5260 return true;
5262 return false;
5265 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5266 result is OP. Abort if it is not possible. */
5269 gen_easy_altivec_constant (rtx op)
5271 enum machine_mode mode = GET_MODE (op);
5272 int nunits = GET_MODE_NUNITS (mode);
5273 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5274 unsigned step = nunits / 4;
5275 unsigned copies = 1;
5277 /* Start with a vspltisw. */
5278 if (vspltis_constant (op, step, copies))
5279 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5281 /* Then try with a vspltish. */
5282 if (step == 1)
5283 copies <<= 1;
5284 else
5285 step >>= 1;
5287 if (vspltis_constant (op, step, copies))
5288 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5290 /* And finally a vspltisb. */
5291 if (step == 1)
5292 copies <<= 1;
5293 else
5294 step >>= 1;
5296 if (vspltis_constant (op, step, copies))
5297 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5299 gcc_unreachable ();
5302 const char *
5303 output_vec_const_move (rtx *operands)
5305 int cst, cst2;
5306 enum machine_mode mode;
5307 rtx dest, vec;
5309 dest = operands[0];
5310 vec = operands[1];
5311 mode = GET_MODE (dest);
5313 if (TARGET_VSX)
5315 if (zero_constant (vec, mode))
5316 return "xxlxor %x0,%x0,%x0";
5318 if ((mode == V2DImode || mode == V1TImode)
5319 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5320 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5321 return "vspltisw %0,-1";
5324 if (TARGET_ALTIVEC)
5326 rtx splat_vec;
5327 if (zero_constant (vec, mode))
5328 return "vxor %0,%0,%0";
5330 splat_vec = gen_easy_altivec_constant (vec);
5331 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5332 operands[1] = XEXP (splat_vec, 0);
5333 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5334 return "#";
5336 switch (GET_MODE (splat_vec))
5338 case V4SImode:
5339 return "vspltisw %0,%1";
5341 case V8HImode:
5342 return "vspltish %0,%1";
5344 case V16QImode:
5345 return "vspltisb %0,%1";
5347 default:
5348 gcc_unreachable ();
5352 gcc_assert (TARGET_SPE);
5354 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5355 pattern of V1DI, V4HI, and V2SF.
5357 FIXME: We should probably return # and add post reload
5358 splitters for these, but this way is so easy ;-). */
5359 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5360 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5361 operands[1] = CONST_VECTOR_ELT (vec, 0);
5362 operands[2] = CONST_VECTOR_ELT (vec, 1);
5363 if (cst == cst2)
5364 return "li %0,%1\n\tevmergelo %0,%0,%0";
5365 else if (WORDS_BIG_ENDIAN)
5366 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5367 else
5368 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5371 /* Initialize TARGET of vector PAIRED to VALS. */
5373 void
5374 paired_expand_vector_init (rtx target, rtx vals)
5376 enum machine_mode mode = GET_MODE (target);
5377 int n_elts = GET_MODE_NUNITS (mode);
5378 int n_var = 0;
5379 rtx x, new_rtx, tmp, constant_op, op1, op2;
5380 int i;
5382 for (i = 0; i < n_elts; ++i)
5384 x = XVECEXP (vals, 0, i);
5385 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5386 ++n_var;
5388 if (n_var == 0)
5390 /* Load from constant pool. */
5391 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5392 return;
5395 if (n_var == 2)
5397 /* The vector is initialized only with non-constants. */
5398 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5399 XVECEXP (vals, 0, 1));
5401 emit_move_insn (target, new_rtx);
5402 return;
5405 /* One field is non-constant and the other one is a constant. Load the
5406 constant from the constant pool and use ps_merge instruction to
5407 construct the whole vector. */
5408 op1 = XVECEXP (vals, 0, 0);
5409 op2 = XVECEXP (vals, 0, 1);
5411 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5413 tmp = gen_reg_rtx (GET_MODE (constant_op));
5414 emit_move_insn (tmp, constant_op);
5416 if (CONSTANT_P (op1))
5417 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5418 else
5419 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5421 emit_move_insn (target, new_rtx);
5424 void
5425 paired_expand_vector_move (rtx operands[])
5427 rtx op0 = operands[0], op1 = operands[1];
5429 emit_move_insn (op0, op1);
5432 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5433 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5434 operands for the relation operation COND. This is a recursive
5435 function. */
5437 static void
5438 paired_emit_vector_compare (enum rtx_code rcode,
5439 rtx dest, rtx op0, rtx op1,
5440 rtx cc_op0, rtx cc_op1)
5442 rtx tmp = gen_reg_rtx (V2SFmode);
5443 rtx tmp1, max, min;
5445 gcc_assert (TARGET_PAIRED_FLOAT);
5446 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5448 switch (rcode)
5450 case LT:
5451 case LTU:
5452 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5453 return;
5454 case GE:
5455 case GEU:
5456 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5457 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5458 return;
5459 case LE:
5460 case LEU:
5461 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5462 return;
5463 case GT:
5464 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5465 return;
5466 case EQ:
5467 tmp1 = gen_reg_rtx (V2SFmode);
5468 max = gen_reg_rtx (V2SFmode);
5469 min = gen_reg_rtx (V2SFmode);
5470 gen_reg_rtx (V2SFmode);
5472 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5473 emit_insn (gen_selv2sf4
5474 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5475 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5476 emit_insn (gen_selv2sf4
5477 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5478 emit_insn (gen_subv2sf3 (tmp1, min, max));
5479 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5480 return;
5481 case NE:
5482 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5483 return;
5484 case UNLE:
5485 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5486 return;
5487 case UNLT:
5488 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5489 return;
5490 case UNGE:
5491 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5492 return;
5493 case UNGT:
5494 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5495 return;
5496 default:
5497 gcc_unreachable ();
5500 return;
5503 /* Emit vector conditional expression.
5504 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5505 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5508 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5509 rtx cond, rtx cc_op0, rtx cc_op1)
5511 enum rtx_code rcode = GET_CODE (cond);
5513 if (!TARGET_PAIRED_FLOAT)
5514 return 0;
5516 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5518 return 1;
5521 /* Initialize vector TARGET to VALS. */
5523 void
5524 rs6000_expand_vector_init (rtx target, rtx vals)
5526 enum machine_mode mode = GET_MODE (target);
5527 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5528 int n_elts = GET_MODE_NUNITS (mode);
5529 int n_var = 0, one_var = -1;
5530 bool all_same = true, all_const_zero = true;
5531 rtx x, mem;
5532 int i;
5534 for (i = 0; i < n_elts; ++i)
5536 x = XVECEXP (vals, 0, i);
5537 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5538 ++n_var, one_var = i;
5539 else if (x != CONST0_RTX (inner_mode))
5540 all_const_zero = false;
5542 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5543 all_same = false;
5546 if (n_var == 0)
5548 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5549 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5550 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5552 /* Zero register. */
5553 emit_insn (gen_rtx_SET (VOIDmode, target,
5554 gen_rtx_XOR (mode, target, target)));
5555 return;
5557 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5559 /* Splat immediate. */
5560 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5561 return;
5563 else
5565 /* Load from constant pool. */
5566 emit_move_insn (target, const_vec);
5567 return;
5571 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5572 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5574 rtx op0 = XVECEXP (vals, 0, 0);
5575 rtx op1 = XVECEXP (vals, 0, 1);
5576 if (all_same)
5578 if (!MEM_P (op0) && !REG_P (op0))
5579 op0 = force_reg (inner_mode, op0);
5580 if (mode == V2DFmode)
5581 emit_insn (gen_vsx_splat_v2df (target, op0));
5582 else
5583 emit_insn (gen_vsx_splat_v2di (target, op0));
5585 else
5587 op0 = force_reg (inner_mode, op0);
5588 op1 = force_reg (inner_mode, op1);
5589 if (mode == V2DFmode)
5590 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5591 else
5592 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5594 return;
5597 /* With single precision floating point on VSX, know that internally single
5598 precision is actually represented as a double, and either make 2 V2DF
5599 vectors, and convert these vectors to single precision, or do one
5600 conversion, and splat the result to the other elements. */
5601 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5603 if (all_same)
5605 rtx freg = gen_reg_rtx (V4SFmode);
5606 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5607 rtx cvt = ((TARGET_XSCVDPSPN)
5608 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5609 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5611 emit_insn (cvt);
5612 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5614 else
5616 rtx dbl_even = gen_reg_rtx (V2DFmode);
5617 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5618 rtx flt_even = gen_reg_rtx (V4SFmode);
5619 rtx flt_odd = gen_reg_rtx (V4SFmode);
5620 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5621 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5622 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5623 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5625 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5626 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5627 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5628 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5629 rs6000_expand_extract_even (target, flt_even, flt_odd);
5631 return;
5634 /* Store value to stack temp. Load vector element. Splat. However, splat
5635 of 64-bit items is not supported on Altivec. */
5636 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5638 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5639 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5640 XVECEXP (vals, 0, 0));
5641 x = gen_rtx_UNSPEC (VOIDmode,
5642 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5643 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5644 gen_rtvec (2,
5645 gen_rtx_SET (VOIDmode,
5646 target, mem),
5647 x)));
5648 x = gen_rtx_VEC_SELECT (inner_mode, target,
5649 gen_rtx_PARALLEL (VOIDmode,
5650 gen_rtvec (1, const0_rtx)));
5651 emit_insn (gen_rtx_SET (VOIDmode, target,
5652 gen_rtx_VEC_DUPLICATE (mode, x)));
5653 return;
5656 /* One field is non-constant. Load constant then overwrite
5657 varying field. */
5658 if (n_var == 1)
5660 rtx copy = copy_rtx (vals);
5662 /* Load constant part of vector, substitute neighboring value for
5663 varying element. */
5664 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5665 rs6000_expand_vector_init (target, copy);
5667 /* Insert variable. */
5668 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5669 return;
5672 /* Construct the vector in memory one field at a time
5673 and load the whole vector. */
5674 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5675 for (i = 0; i < n_elts; i++)
5676 emit_move_insn (adjust_address_nv (mem, inner_mode,
5677 i * GET_MODE_SIZE (inner_mode)),
5678 XVECEXP (vals, 0, i));
5679 emit_move_insn (target, mem);
5682 /* Set field ELT of TARGET to VAL. */
5684 void
5685 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5687 enum machine_mode mode = GET_MODE (target);
5688 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5689 rtx reg = gen_reg_rtx (mode);
5690 rtx mask, mem, x;
5691 int width = GET_MODE_SIZE (inner_mode);
5692 int i;
5694 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5696 rtx (*set_func) (rtx, rtx, rtx, rtx)
5697 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5698 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5699 return;
5702 /* Simplify setting single element vectors like V1TImode. */
5703 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5705 emit_move_insn (target, gen_lowpart (mode, val));
5706 return;
5709 /* Load single variable value. */
5710 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5711 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5712 x = gen_rtx_UNSPEC (VOIDmode,
5713 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5714 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5715 gen_rtvec (2,
5716 gen_rtx_SET (VOIDmode,
5717 reg, mem),
5718 x)));
5720 /* Linear sequence. */
5721 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5722 for (i = 0; i < 16; ++i)
5723 XVECEXP (mask, 0, i) = GEN_INT (i);
5725 /* Set permute mask to insert element into target. */
5726 for (i = 0; i < width; ++i)
5727 XVECEXP (mask, 0, elt*width + i)
5728 = GEN_INT (i + 0x10);
5729 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5731 if (BYTES_BIG_ENDIAN)
5732 x = gen_rtx_UNSPEC (mode,
5733 gen_rtvec (3, target, reg,
5734 force_reg (V16QImode, x)),
5735 UNSPEC_VPERM);
5736 else
5738 /* Invert selector. We prefer to generate VNAND on P8 so
5739 that future fusion opportunities can kick in, but must
5740 generate VNOR elsewhere. */
5741 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5742 rtx iorx = (TARGET_P8_VECTOR
5743 ? gen_rtx_IOR (V16QImode, notx, notx)
5744 : gen_rtx_AND (V16QImode, notx, notx));
5745 rtx tmp = gen_reg_rtx (V16QImode);
5746 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5748 /* Permute with operands reversed and adjusted selector. */
5749 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5750 UNSPEC_VPERM);
5753 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5756 /* Extract field ELT from VEC into TARGET. */
5758 void
5759 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5761 enum machine_mode mode = GET_MODE (vec);
5762 enum machine_mode inner_mode = GET_MODE_INNER (mode);
5763 rtx mem;
5765 if (VECTOR_MEM_VSX_P (mode))
5767 switch (mode)
5769 default:
5770 break;
5771 case V1TImode:
5772 gcc_assert (elt == 0 && inner_mode == TImode);
5773 emit_move_insn (target, gen_lowpart (TImode, vec));
5774 break;
5775 case V2DFmode:
5776 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5777 return;
5778 case V2DImode:
5779 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5780 return;
5781 case V4SFmode:
5782 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5783 return;
5787 /* Allocate mode-sized buffer. */
5788 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5790 emit_move_insn (mem, vec);
5792 /* Add offset to field within buffer matching vector element. */
5793 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5795 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5798 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5799 implement ANDing by the mask IN. */
5800 void
5801 build_mask64_2_operands (rtx in, rtx *out)
5803 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5804 int shift;
5806 gcc_assert (GET_CODE (in) == CONST_INT);
5808 c = INTVAL (in);
5809 if (c & 1)
5811 /* Assume c initially something like 0x00fff000000fffff. The idea
5812 is to rotate the word so that the middle ^^^^^^ group of zeros
5813 is at the MS end and can be cleared with an rldicl mask. We then
5814 rotate back and clear off the MS ^^ group of zeros with a
5815 second rldicl. */
5816 c = ~c; /* c == 0xff000ffffff00000 */
5817 lsb = c & -c; /* lsb == 0x0000000000100000 */
5818 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5819 c = ~c; /* c == 0x00fff000000fffff */
5820 c &= -lsb; /* c == 0x00fff00000000000 */
5821 lsb = c & -c; /* lsb == 0x0000100000000000 */
5822 c = ~c; /* c == 0xff000fffffffffff */
5823 c &= -lsb; /* c == 0xff00000000000000 */
5824 shift = 0;
5825 while ((lsb >>= 1) != 0)
5826 shift++; /* shift == 44 on exit from loop */
5827 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5828 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5829 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5831 else
5833 /* Assume c initially something like 0xff000f0000000000. The idea
5834 is to rotate the word so that the ^^^ middle group of zeros
5835 is at the LS end and can be cleared with an rldicr mask. We then
5836 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5837 a second rldicr. */
5838 lsb = c & -c; /* lsb == 0x0000010000000000 */
5839 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5840 c = ~c; /* c == 0x00fff0ffffffffff */
5841 c &= -lsb; /* c == 0x00fff00000000000 */
5842 lsb = c & -c; /* lsb == 0x0000100000000000 */
5843 c = ~c; /* c == 0xff000fffffffffff */
5844 c &= -lsb; /* c == 0xff00000000000000 */
5845 shift = 0;
5846 while ((lsb >>= 1) != 0)
5847 shift++; /* shift == 44 on exit from loop */
5848 m1 = ~c; /* m1 == 0x00ffffffffffffff */
5849 m1 >>= shift; /* m1 == 0x0000000000000fff */
5850 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
5853 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
5854 masks will be all 1's. We are guaranteed more than one transition. */
5855 out[0] = GEN_INT (64 - shift);
5856 out[1] = GEN_INT (m1);
5857 out[2] = GEN_INT (shift);
5858 out[3] = GEN_INT (m2);
5861 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
5863 bool
5864 invalid_e500_subreg (rtx op, enum machine_mode mode)
5866 if (TARGET_E500_DOUBLE)
5868 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
5869 subreg:TI and reg:TF. Decimal float modes are like integer
5870 modes (only low part of each register used) for this
5871 purpose. */
5872 if (GET_CODE (op) == SUBREG
5873 && (mode == SImode || mode == DImode || mode == TImode
5874 || mode == DDmode || mode == TDmode || mode == PTImode)
5875 && REG_P (SUBREG_REG (op))
5876 && (GET_MODE (SUBREG_REG (op)) == DFmode
5877 || GET_MODE (SUBREG_REG (op)) == TFmode))
5878 return true;
5880 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
5881 reg:TI. */
5882 if (GET_CODE (op) == SUBREG
5883 && (mode == DFmode || mode == TFmode)
5884 && REG_P (SUBREG_REG (op))
5885 && (GET_MODE (SUBREG_REG (op)) == DImode
5886 || GET_MODE (SUBREG_REG (op)) == TImode
5887 || GET_MODE (SUBREG_REG (op)) == PTImode
5888 || GET_MODE (SUBREG_REG (op)) == DDmode
5889 || GET_MODE (SUBREG_REG (op)) == TDmode))
5890 return true;
5893 if (TARGET_SPE
5894 && GET_CODE (op) == SUBREG
5895 && mode == SImode
5896 && REG_P (SUBREG_REG (op))
5897 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
5898 return true;
5900 return false;
5903 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
5904 selects whether the alignment is abi mandated, optional, or
5905 both abi and optional alignment. */
5907 unsigned int
5908 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
5910 if (how != align_opt)
5912 if (TREE_CODE (type) == VECTOR_TYPE)
5914 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
5915 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
5917 if (align < 64)
5918 align = 64;
5920 else if (align < 128)
5921 align = 128;
5923 else if (TARGET_E500_DOUBLE
5924 && TREE_CODE (type) == REAL_TYPE
5925 && TYPE_MODE (type) == DFmode)
5927 if (align < 64)
5928 align = 64;
5932 if (how != align_abi)
5934 if (TREE_CODE (type) == ARRAY_TYPE
5935 && TYPE_MODE (TREE_TYPE (type)) == QImode)
5937 if (align < BITS_PER_WORD)
5938 align = BITS_PER_WORD;
5942 return align;
5945 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
5947 bool
5948 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
5950 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
5952 if (computed != 128)
5954 static bool warned;
5955 if (!warned && warn_psabi)
5957 warned = true;
5958 inform (input_location,
5959 "the layout of aggregates containing vectors with"
5960 " %d-byte alignment has changed in GCC 5",
5961 computed / BITS_PER_UNIT);
5964 /* In current GCC there is no special case. */
5965 return false;
5968 return false;
5971 /* AIX increases natural record alignment to doubleword if the first
5972 field is an FP double while the FP fields remain word aligned. */
5974 unsigned int
5975 rs6000_special_round_type_align (tree type, unsigned int computed,
5976 unsigned int specified)
5978 unsigned int align = MAX (computed, specified);
5979 tree field = TYPE_FIELDS (type);
5981 /* Skip all non field decls */
5982 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
5983 field = DECL_CHAIN (field);
5985 if (field != NULL && field != type)
5987 type = TREE_TYPE (field);
5988 while (TREE_CODE (type) == ARRAY_TYPE)
5989 type = TREE_TYPE (type);
5991 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
5992 align = MAX (align, 64);
5995 return align;
5998 /* Darwin increases record alignment to the natural alignment of
5999 the first field. */
6001 unsigned int
6002 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6003 unsigned int specified)
6005 unsigned int align = MAX (computed, specified);
6007 if (TYPE_PACKED (type))
6008 return align;
6010 /* Find the first field, looking down into aggregates. */
6011 do {
6012 tree field = TYPE_FIELDS (type);
6013 /* Skip all non field decls */
6014 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6015 field = DECL_CHAIN (field);
6016 if (! field)
6017 break;
6018 /* A packed field does not contribute any extra alignment. */
6019 if (DECL_PACKED (field))
6020 return align;
6021 type = TREE_TYPE (field);
6022 while (TREE_CODE (type) == ARRAY_TYPE)
6023 type = TREE_TYPE (type);
6024 } while (AGGREGATE_TYPE_P (type));
6026 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6027 align = MAX (align, TYPE_ALIGN (type));
6029 return align;
6032 /* Return 1 for an operand in small memory on V.4/eabi. */
6035 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6036 enum machine_mode mode ATTRIBUTE_UNUSED)
6038 #if TARGET_ELF
6039 rtx sym_ref;
6041 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6042 return 0;
6044 if (DEFAULT_ABI != ABI_V4)
6045 return 0;
6047 /* Vector and float memory instructions have a limited offset on the
6048 SPE, so using a vector or float variable directly as an operand is
6049 not useful. */
6050 if (TARGET_SPE
6051 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6052 return 0;
6054 if (GET_CODE (op) == SYMBOL_REF)
6055 sym_ref = op;
6057 else if (GET_CODE (op) != CONST
6058 || GET_CODE (XEXP (op, 0)) != PLUS
6059 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6060 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6061 return 0;
6063 else
6065 rtx sum = XEXP (op, 0);
6066 HOST_WIDE_INT summand;
6068 /* We have to be careful here, because it is the referenced address
6069 that must be 32k from _SDA_BASE_, not just the symbol. */
6070 summand = INTVAL (XEXP (sum, 1));
6071 if (summand < 0 || summand > g_switch_value)
6072 return 0;
6074 sym_ref = XEXP (sum, 0);
6077 return SYMBOL_REF_SMALL_P (sym_ref);
6078 #else
6079 return 0;
6080 #endif
6083 /* Return true if either operand is a general purpose register. */
6085 bool
6086 gpr_or_gpr_p (rtx op0, rtx op1)
6088 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6089 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6092 /* Return true if this is a move direct operation between GPR registers and
6093 floating point/VSX registers. */
6095 bool
6096 direct_move_p (rtx op0, rtx op1)
6098 int regno0, regno1;
6100 if (!REG_P (op0) || !REG_P (op1))
6101 return false;
6103 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6104 return false;
6106 regno0 = REGNO (op0);
6107 regno1 = REGNO (op1);
6108 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6109 return false;
6111 if (INT_REGNO_P (regno0))
6112 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6114 else if (INT_REGNO_P (regno1))
6116 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6117 return true;
6119 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6120 return true;
6123 return false;
6126 /* Return true if this is a load or store quad operation. This function does
6127 not handle the atomic quad memory instructions. */
6129 bool
6130 quad_load_store_p (rtx op0, rtx op1)
6132 bool ret;
6134 if (!TARGET_QUAD_MEMORY)
6135 ret = false;
6137 else if (REG_P (op0) && MEM_P (op1))
6138 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6139 && quad_memory_operand (op1, GET_MODE (op1))
6140 && !reg_overlap_mentioned_p (op0, op1));
6142 else if (MEM_P (op0) && REG_P (op1))
6143 ret = (quad_memory_operand (op0, GET_MODE (op0))
6144 && quad_int_reg_operand (op1, GET_MODE (op1)));
6146 else
6147 ret = false;
6149 if (TARGET_DEBUG_ADDR)
6151 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6152 ret ? "true" : "false");
6153 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6156 return ret;
6159 /* Given an address, return a constant offset term if one exists. */
6161 static rtx
6162 address_offset (rtx op)
6164 if (GET_CODE (op) == PRE_INC
6165 || GET_CODE (op) == PRE_DEC)
6166 op = XEXP (op, 0);
6167 else if (GET_CODE (op) == PRE_MODIFY
6168 || GET_CODE (op) == LO_SUM)
6169 op = XEXP (op, 1);
6171 if (GET_CODE (op) == CONST)
6172 op = XEXP (op, 0);
6174 if (GET_CODE (op) == PLUS)
6175 op = XEXP (op, 1);
6177 if (CONST_INT_P (op))
6178 return op;
6180 return NULL_RTX;
6183 /* Return true if the MEM operand is a memory operand suitable for use
6184 with a (full width, possibly multiple) gpr load/store. On
6185 powerpc64 this means the offset must be divisible by 4.
6186 Implements 'Y' constraint.
6188 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6189 a constraint function we know the operand has satisfied a suitable
6190 memory predicate. Also accept some odd rtl generated by reload
6191 (see rs6000_legitimize_reload_address for various forms). It is
6192 important that reload rtl be accepted by appropriate constraints
6193 but not by the operand predicate.
6195 Offsetting a lo_sum should not be allowed, except where we know by
6196 alignment that a 32k boundary is not crossed, but see the ???
6197 comment in rs6000_legitimize_reload_address. Note that by
6198 "offsetting" here we mean a further offset to access parts of the
6199 MEM. It's fine to have a lo_sum where the inner address is offset
6200 from a sym, since the same sym+offset will appear in the high part
6201 of the address calculation. */
6203 bool
6204 mem_operand_gpr (rtx op, enum machine_mode mode)
6206 unsigned HOST_WIDE_INT offset;
6207 int extra;
6208 rtx addr = XEXP (op, 0);
6210 op = address_offset (addr);
6211 if (op == NULL_RTX)
6212 return true;
6214 offset = INTVAL (op);
6215 if (TARGET_POWERPC64 && (offset & 3) != 0)
6216 return false;
6218 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6219 if (extra < 0)
6220 extra = 0;
6222 if (GET_CODE (addr) == LO_SUM)
6223 /* For lo_sum addresses, we must allow any offset except one that
6224 causes a wrap, so test only the low 16 bits. */
6225 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6227 return offset + 0x8000 < 0x10000u - extra;
6230 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6232 static bool
6233 reg_offset_addressing_ok_p (enum machine_mode mode)
6235 switch (mode)
6237 case V16QImode:
6238 case V8HImode:
6239 case V4SFmode:
6240 case V4SImode:
6241 case V2DFmode:
6242 case V2DImode:
6243 case V1TImode:
6244 case TImode:
6245 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6246 TImode is not a vector mode, if we want to use the VSX registers to
6247 move it around, we need to restrict ourselves to reg+reg
6248 addressing. */
6249 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6250 return false;
6251 break;
6253 case V4HImode:
6254 case V2SImode:
6255 case V1DImode:
6256 case V2SFmode:
6257 /* Paired vector modes. Only reg+reg addressing is valid. */
6258 if (TARGET_PAIRED_FLOAT)
6259 return false;
6260 break;
6262 case SDmode:
6263 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6264 addressing for the LFIWZX and STFIWX instructions. */
6265 if (TARGET_NO_SDMODE_STACK)
6266 return false;
6267 break;
6269 default:
6270 break;
6273 return true;
6276 static bool
6277 virtual_stack_registers_memory_p (rtx op)
6279 int regnum;
6281 if (GET_CODE (op) == REG)
6282 regnum = REGNO (op);
6284 else if (GET_CODE (op) == PLUS
6285 && GET_CODE (XEXP (op, 0)) == REG
6286 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6287 regnum = REGNO (XEXP (op, 0));
6289 else
6290 return false;
6292 return (regnum >= FIRST_VIRTUAL_REGISTER
6293 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6296 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6297 is known to not straddle a 32k boundary. */
6299 static bool
6300 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6301 enum machine_mode mode)
6303 tree decl, type;
6304 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6306 if (GET_CODE (op) != SYMBOL_REF)
6307 return false;
6309 dsize = GET_MODE_SIZE (mode);
6310 decl = SYMBOL_REF_DECL (op);
6311 if (!decl)
6313 if (dsize == 0)
6314 return false;
6316 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6317 replacing memory addresses with an anchor plus offset. We
6318 could find the decl by rummaging around in the block->objects
6319 VEC for the given offset but that seems like too much work. */
6320 dalign = BITS_PER_UNIT;
6321 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6322 && SYMBOL_REF_ANCHOR_P (op)
6323 && SYMBOL_REF_BLOCK (op) != NULL)
6325 struct object_block *block = SYMBOL_REF_BLOCK (op);
6327 dalign = block->alignment;
6328 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6330 else if (CONSTANT_POOL_ADDRESS_P (op))
6332 /* It would be nice to have get_pool_align().. */
6333 enum machine_mode cmode = get_pool_mode (op);
6335 dalign = GET_MODE_ALIGNMENT (cmode);
6338 else if (DECL_P (decl))
6340 dalign = DECL_ALIGN (decl);
6342 if (dsize == 0)
6344 /* Allow BLKmode when the entire object is known to not
6345 cross a 32k boundary. */
6346 if (!DECL_SIZE_UNIT (decl))
6347 return false;
6349 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6350 return false;
6352 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6353 if (dsize > 32768)
6354 return false;
6356 return dalign / BITS_PER_UNIT >= dsize;
6359 else
6361 type = TREE_TYPE (decl);
6363 dalign = TYPE_ALIGN (type);
6364 if (CONSTANT_CLASS_P (decl))
6365 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6366 else
6367 dalign = DATA_ALIGNMENT (decl, dalign);
6369 if (dsize == 0)
6371 /* BLKmode, check the entire object. */
6372 if (TREE_CODE (decl) == STRING_CST)
6373 dsize = TREE_STRING_LENGTH (decl);
6374 else if (TYPE_SIZE_UNIT (type)
6375 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6376 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6377 else
6378 return false;
6379 if (dsize > 32768)
6380 return false;
6382 return dalign / BITS_PER_UNIT >= dsize;
6386 /* Find how many bits of the alignment we know for this access. */
6387 mask = dalign / BITS_PER_UNIT - 1;
6388 lsb = offset & -offset;
6389 mask &= lsb - 1;
6390 dalign = mask + 1;
6392 return dalign >= dsize;
6395 static bool
6396 constant_pool_expr_p (rtx op)
6398 rtx base, offset;
6400 split_const (op, &base, &offset);
6401 return (GET_CODE (base) == SYMBOL_REF
6402 && CONSTANT_POOL_ADDRESS_P (base)
6403 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6406 static const_rtx tocrel_base, tocrel_offset;
6408 /* Return true if OP is a toc pointer relative address (the output
6409 of create_TOC_reference). If STRICT, do not match high part or
6410 non-split -mcmodel=large/medium toc pointer relative addresses. */
6412 bool
6413 toc_relative_expr_p (const_rtx op, bool strict)
6415 if (!TARGET_TOC)
6416 return false;
6418 if (TARGET_CMODEL != CMODEL_SMALL)
6420 /* Only match the low part. */
6421 if (GET_CODE (op) == LO_SUM
6422 && REG_P (XEXP (op, 0))
6423 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6424 op = XEXP (op, 1);
6425 else if (strict)
6426 return false;
6429 tocrel_base = op;
6430 tocrel_offset = const0_rtx;
6431 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6433 tocrel_base = XEXP (op, 0);
6434 tocrel_offset = XEXP (op, 1);
6437 return (GET_CODE (tocrel_base) == UNSPEC
6438 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6441 /* Return true if X is a constant pool address, and also for cmodel=medium
6442 if X is a toc-relative address known to be offsettable within MODE. */
6444 bool
6445 legitimate_constant_pool_address_p (const_rtx x, enum machine_mode mode,
6446 bool strict)
6448 return (toc_relative_expr_p (x, strict)
6449 && (TARGET_CMODEL != CMODEL_MEDIUM
6450 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6451 || mode == QImode
6452 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6453 INTVAL (tocrel_offset), mode)));
6456 static bool
6457 legitimate_small_data_p (enum machine_mode mode, rtx x)
6459 return (DEFAULT_ABI == ABI_V4
6460 && !flag_pic && !TARGET_TOC
6461 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6462 && small_data_operand (x, mode));
6465 /* SPE offset addressing is limited to 5-bits worth of double words. */
6466 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6468 bool
6469 rs6000_legitimate_offset_address_p (enum machine_mode mode, rtx x,
6470 bool strict, bool worst_case)
6472 unsigned HOST_WIDE_INT offset;
6473 unsigned int extra;
6475 if (GET_CODE (x) != PLUS)
6476 return false;
6477 if (!REG_P (XEXP (x, 0)))
6478 return false;
6479 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6480 return false;
6481 if (!reg_offset_addressing_ok_p (mode))
6482 return virtual_stack_registers_memory_p (x);
6483 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6484 return true;
6485 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6486 return false;
6488 offset = INTVAL (XEXP (x, 1));
6489 extra = 0;
6490 switch (mode)
6492 case V4HImode:
6493 case V2SImode:
6494 case V1DImode:
6495 case V2SFmode:
6496 /* SPE vector modes. */
6497 return SPE_CONST_OFFSET_OK (offset);
6499 case DFmode:
6500 case DDmode:
6501 case DImode:
6502 /* On e500v2, we may have:
6504 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6506 Which gets addressed with evldd instructions. */
6507 if (TARGET_E500_DOUBLE)
6508 return SPE_CONST_OFFSET_OK (offset);
6510 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6511 addressing. */
6512 if (VECTOR_MEM_VSX_P (mode))
6513 return false;
6515 if (!worst_case)
6516 break;
6517 if (!TARGET_POWERPC64)
6518 extra = 4;
6519 else if (offset & 3)
6520 return false;
6521 break;
6523 case TFmode:
6524 if (TARGET_E500_DOUBLE)
6525 return (SPE_CONST_OFFSET_OK (offset)
6526 && SPE_CONST_OFFSET_OK (offset + 8));
6527 /* fall through */
6529 case TDmode:
6530 case TImode:
6531 case PTImode:
6532 extra = 8;
6533 if (!worst_case)
6534 break;
6535 if (!TARGET_POWERPC64)
6536 extra = 12;
6537 else if (offset & 3)
6538 return false;
6539 break;
6541 default:
6542 break;
6545 offset += 0x8000;
6546 return offset < 0x10000 - extra;
6549 bool
6550 legitimate_indexed_address_p (rtx x, int strict)
6552 rtx op0, op1;
6554 if (GET_CODE (x) != PLUS)
6555 return false;
6557 op0 = XEXP (x, 0);
6558 op1 = XEXP (x, 1);
6560 /* Recognize the rtl generated by reload which we know will later be
6561 replaced with proper base and index regs. */
6562 if (!strict
6563 && reload_in_progress
6564 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6565 && REG_P (op1))
6566 return true;
6568 return (REG_P (op0) && REG_P (op1)
6569 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6570 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6571 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6572 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6575 bool
6576 avoiding_indexed_address_p (enum machine_mode mode)
6578 /* Avoid indexed addressing for modes that have non-indexed
6579 load/store instruction forms. */
6580 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6583 bool
6584 legitimate_indirect_address_p (rtx x, int strict)
6586 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6589 bool
6590 macho_lo_sum_memory_operand (rtx x, enum machine_mode mode)
6592 if (!TARGET_MACHO || !flag_pic
6593 || mode != SImode || GET_CODE (x) != MEM)
6594 return false;
6595 x = XEXP (x, 0);
6597 if (GET_CODE (x) != LO_SUM)
6598 return false;
6599 if (GET_CODE (XEXP (x, 0)) != REG)
6600 return false;
6601 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6602 return false;
6603 x = XEXP (x, 1);
6605 return CONSTANT_P (x);
6608 static bool
6609 legitimate_lo_sum_address_p (enum machine_mode mode, rtx x, int strict)
6611 if (GET_CODE (x) != LO_SUM)
6612 return false;
6613 if (GET_CODE (XEXP (x, 0)) != REG)
6614 return false;
6615 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6616 return false;
6617 /* Restrict addressing for DI because of our SUBREG hackery. */
6618 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6619 return false;
6620 x = XEXP (x, 1);
6622 if (TARGET_ELF || TARGET_MACHO)
6624 bool large_toc_ok;
6626 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6627 return false;
6628 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6629 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6630 recognizes some LO_SUM addresses as valid although this
6631 function says opposite. In most cases, LRA through different
6632 transformations can generate correct code for address reloads.
6633 It can not manage only some LO_SUM cases. So we need to add
6634 code analogous to one in rs6000_legitimize_reload_address for
6635 LOW_SUM here saying that some addresses are still valid. */
6636 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6637 && small_toc_ref (x, VOIDmode));
6638 if (TARGET_TOC && ! large_toc_ok)
6639 return false;
6640 if (GET_MODE_NUNITS (mode) != 1)
6641 return false;
6642 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6643 && !(/* ??? Assume floating point reg based on mode? */
6644 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6645 && (mode == DFmode || mode == DDmode)))
6646 return false;
6648 return CONSTANT_P (x) || large_toc_ok;
6651 return false;
6655 /* Try machine-dependent ways of modifying an illegitimate address
6656 to be legitimate. If we find one, return the new, valid address.
6657 This is used from only one place: `memory_address' in explow.c.
6659 OLDX is the address as it was before break_out_memory_refs was
6660 called. In some cases it is useful to look at this to decide what
6661 needs to be done.
6663 It is always safe for this function to do nothing. It exists to
6664 recognize opportunities to optimize the output.
6666 On RS/6000, first check for the sum of a register with a constant
6667 integer that is out of range. If so, generate code to add the
6668 constant with the low-order 16 bits masked to the register and force
6669 this result into another register (this can be done with `cau').
6670 Then generate an address of REG+(CONST&0xffff), allowing for the
6671 possibility of bit 16 being a one.
6673 Then check for the sum of a register and something not constant, try to
6674 load the other things into a register and return the sum. */
6676 static rtx
6677 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6678 enum machine_mode mode)
6680 unsigned int extra;
6682 if (!reg_offset_addressing_ok_p (mode))
6684 if (virtual_stack_registers_memory_p (x))
6685 return x;
6687 /* In theory we should not be seeing addresses of the form reg+0,
6688 but just in case it is generated, optimize it away. */
6689 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6690 return force_reg (Pmode, XEXP (x, 0));
6692 /* For TImode with load/store quad, restrict addresses to just a single
6693 pointer, so it works with both GPRs and VSX registers. */
6694 /* Make sure both operands are registers. */
6695 else if (GET_CODE (x) == PLUS
6696 && (mode != TImode || !TARGET_QUAD_MEMORY))
6697 return gen_rtx_PLUS (Pmode,
6698 force_reg (Pmode, XEXP (x, 0)),
6699 force_reg (Pmode, XEXP (x, 1)));
6700 else
6701 return force_reg (Pmode, x);
6703 if (GET_CODE (x) == SYMBOL_REF)
6705 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6706 if (model != 0)
6707 return rs6000_legitimize_tls_address (x, model);
6710 extra = 0;
6711 switch (mode)
6713 case TFmode:
6714 case TDmode:
6715 case TImode:
6716 case PTImode:
6717 /* As in legitimate_offset_address_p we do not assume
6718 worst-case. The mode here is just a hint as to the registers
6719 used. A TImode is usually in gprs, but may actually be in
6720 fprs. Leave worst-case scenario for reload to handle via
6721 insn constraints. PTImode is only GPRs. */
6722 extra = 8;
6723 break;
6724 default:
6725 break;
6728 if (GET_CODE (x) == PLUS
6729 && GET_CODE (XEXP (x, 0)) == REG
6730 && GET_CODE (XEXP (x, 1)) == CONST_INT
6731 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6732 >= 0x10000 - extra)
6733 && !(SPE_VECTOR_MODE (mode)
6734 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6736 HOST_WIDE_INT high_int, low_int;
6737 rtx sum;
6738 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6739 if (low_int >= 0x8000 - extra)
6740 low_int = 0;
6741 high_int = INTVAL (XEXP (x, 1)) - low_int;
6742 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6743 GEN_INT (high_int)), 0);
6744 return plus_constant (Pmode, sum, low_int);
6746 else if (GET_CODE (x) == PLUS
6747 && GET_CODE (XEXP (x, 0)) == REG
6748 && GET_CODE (XEXP (x, 1)) != CONST_INT
6749 && GET_MODE_NUNITS (mode) == 1
6750 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6751 || (/* ??? Assume floating point reg based on mode? */
6752 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6753 && (mode == DFmode || mode == DDmode)))
6754 && !avoiding_indexed_address_p (mode))
6756 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6757 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6759 else if (SPE_VECTOR_MODE (mode)
6760 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6762 if (mode == DImode)
6763 return x;
6764 /* We accept [reg + reg] and [reg + OFFSET]. */
6766 if (GET_CODE (x) == PLUS)
6768 rtx op1 = XEXP (x, 0);
6769 rtx op2 = XEXP (x, 1);
6770 rtx y;
6772 op1 = force_reg (Pmode, op1);
6774 if (GET_CODE (op2) != REG
6775 && (GET_CODE (op2) != CONST_INT
6776 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6777 || (GET_MODE_SIZE (mode) > 8
6778 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6779 op2 = force_reg (Pmode, op2);
6781 /* We can't always do [reg + reg] for these, because [reg +
6782 reg + offset] is not a legitimate addressing mode. */
6783 y = gen_rtx_PLUS (Pmode, op1, op2);
6785 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6786 return force_reg (Pmode, y);
6787 else
6788 return y;
6791 return force_reg (Pmode, x);
6793 else if ((TARGET_ELF
6794 #if TARGET_MACHO
6795 || !MACHO_DYNAMIC_NO_PIC_P
6796 #endif
6798 && TARGET_32BIT
6799 && TARGET_NO_TOC
6800 && ! flag_pic
6801 && GET_CODE (x) != CONST_INT
6802 && GET_CODE (x) != CONST_WIDE_INT
6803 && GET_CODE (x) != CONST_DOUBLE
6804 && CONSTANT_P (x)
6805 && GET_MODE_NUNITS (mode) == 1
6806 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6807 || (/* ??? Assume floating point reg based on mode? */
6808 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6809 && (mode == DFmode || mode == DDmode))))
6811 rtx reg = gen_reg_rtx (Pmode);
6812 if (TARGET_ELF)
6813 emit_insn (gen_elf_high (reg, x));
6814 else
6815 emit_insn (gen_macho_high (reg, x));
6816 return gen_rtx_LO_SUM (Pmode, reg, x);
6818 else if (TARGET_TOC
6819 && GET_CODE (x) == SYMBOL_REF
6820 && constant_pool_expr_p (x)
6821 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6822 return create_TOC_reference (x, NULL_RTX);
6823 else
6824 return x;
6827 /* Debug version of rs6000_legitimize_address. */
6828 static rtx
6829 rs6000_debug_legitimize_address (rtx x, rtx oldx, enum machine_mode mode)
6831 rtx ret;
6832 rtx_insn *insns;
6834 start_sequence ();
6835 ret = rs6000_legitimize_address (x, oldx, mode);
6836 insns = get_insns ();
6837 end_sequence ();
6839 if (ret != x)
6841 fprintf (stderr,
6842 "\nrs6000_legitimize_address: mode %s, old code %s, "
6843 "new code %s, modified\n",
6844 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6845 GET_RTX_NAME (GET_CODE (ret)));
6847 fprintf (stderr, "Original address:\n");
6848 debug_rtx (x);
6850 fprintf (stderr, "oldx:\n");
6851 debug_rtx (oldx);
6853 fprintf (stderr, "New address:\n");
6854 debug_rtx (ret);
6856 if (insns)
6858 fprintf (stderr, "Insns added:\n");
6859 debug_rtx_list (insns, 20);
6862 else
6864 fprintf (stderr,
6865 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
6866 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
6868 debug_rtx (x);
6871 if (insns)
6872 emit_insn (insns);
6874 return ret;
6877 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6878 We need to emit DTP-relative relocations. */
6880 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6881 static void
6882 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
6884 switch (size)
6886 case 4:
6887 fputs ("\t.long\t", file);
6888 break;
6889 case 8:
6890 fputs (DOUBLE_INT_ASM_OP, file);
6891 break;
6892 default:
6893 gcc_unreachable ();
6895 output_addr_const (file, x);
6896 fputs ("@dtprel+0x8000", file);
6899 /* Return true if X is a symbol that refers to real (rather than emulated)
6900 TLS. */
6902 static bool
6903 rs6000_real_tls_symbol_ref_p (rtx x)
6905 return (GET_CODE (x) == SYMBOL_REF
6906 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
6909 /* In the name of slightly smaller debug output, and to cater to
6910 general assembler lossage, recognize various UNSPEC sequences
6911 and turn them back into a direct symbol reference. */
6913 static rtx
6914 rs6000_delegitimize_address (rtx orig_x)
6916 rtx x, y, offset;
6918 orig_x = delegitimize_mem_from_attrs (orig_x);
6919 x = orig_x;
6920 if (MEM_P (x))
6921 x = XEXP (x, 0);
6923 y = x;
6924 if (TARGET_CMODEL != CMODEL_SMALL
6925 && GET_CODE (y) == LO_SUM)
6926 y = XEXP (y, 1);
6928 offset = NULL_RTX;
6929 if (GET_CODE (y) == PLUS
6930 && GET_MODE (y) == Pmode
6931 && CONST_INT_P (XEXP (y, 1)))
6933 offset = XEXP (y, 1);
6934 y = XEXP (y, 0);
6937 if (GET_CODE (y) == UNSPEC
6938 && XINT (y, 1) == UNSPEC_TOCREL)
6940 #ifdef ENABLE_CHECKING
6941 if (REG_P (XVECEXP (y, 0, 1))
6942 && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
6944 /* All good. */
6946 else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
6948 /* Weirdness alert. df_note_compute can replace r2 with a
6949 debug_expr when this unspec is in a debug_insn.
6950 Seen in gcc.dg/pr51957-1.c */
6952 else
6954 debug_rtx (orig_x);
6955 abort ();
6957 #endif
6958 y = XVECEXP (y, 0, 0);
6960 #ifdef HAVE_AS_TLS
6961 /* Do not associate thread-local symbols with the original
6962 constant pool symbol. */
6963 if (TARGET_XCOFF
6964 && GET_CODE (y) == SYMBOL_REF
6965 && CONSTANT_POOL_ADDRESS_P (y)
6966 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
6967 return orig_x;
6968 #endif
6970 if (offset != NULL_RTX)
6971 y = gen_rtx_PLUS (Pmode, y, offset);
6972 if (!MEM_P (orig_x))
6973 return y;
6974 else
6975 return replace_equiv_address_nv (orig_x, y);
6978 if (TARGET_MACHO
6979 && GET_CODE (orig_x) == LO_SUM
6980 && GET_CODE (XEXP (orig_x, 1)) == CONST)
6982 y = XEXP (XEXP (orig_x, 1), 0);
6983 if (GET_CODE (y) == UNSPEC
6984 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
6985 return XVECEXP (y, 0, 0);
6988 return orig_x;
6991 /* Return true if X shouldn't be emitted into the debug info.
6992 The linker doesn't like .toc section references from
6993 .debug_* sections, so reject .toc section symbols. */
6995 static bool
6996 rs6000_const_not_ok_for_debug_p (rtx x)
6998 if (GET_CODE (x) == SYMBOL_REF
6999 && CONSTANT_POOL_ADDRESS_P (x))
7001 rtx c = get_pool_constant (x);
7002 enum machine_mode cmode = get_pool_mode (x);
7003 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7004 return true;
7007 return false;
7010 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7012 static GTY(()) rtx rs6000_tls_symbol;
7013 static rtx
7014 rs6000_tls_get_addr (void)
7016 if (!rs6000_tls_symbol)
7017 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7019 return rs6000_tls_symbol;
7022 /* Construct the SYMBOL_REF for TLS GOT references. */
7024 static GTY(()) rtx rs6000_got_symbol;
7025 static rtx
7026 rs6000_got_sym (void)
7028 if (!rs6000_got_symbol)
7030 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7031 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7032 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7035 return rs6000_got_symbol;
7038 /* AIX Thread-Local Address support. */
7040 static rtx
7041 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7043 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7044 const char *name;
7045 char *tlsname;
7047 name = XSTR (addr, 0);
7048 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7049 or the symbol will be in TLS private data section. */
7050 if (name[strlen (name) - 1] != ']'
7051 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7052 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7054 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7055 strcpy (tlsname, name);
7056 strcat (tlsname,
7057 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7058 tlsaddr = copy_rtx (addr);
7059 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7061 else
7062 tlsaddr = addr;
7064 /* Place addr into TOC constant pool. */
7065 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7067 /* Output the TOC entry and create the MEM referencing the value. */
7068 if (constant_pool_expr_p (XEXP (sym, 0))
7069 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7071 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7072 mem = gen_const_mem (Pmode, tocref);
7073 set_mem_alias_set (mem, get_TOC_alias_set ());
7075 else
7076 return sym;
7078 /* Use global-dynamic for local-dynamic. */
7079 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7080 || model == TLS_MODEL_LOCAL_DYNAMIC)
7082 /* Create new TOC reference for @m symbol. */
7083 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7084 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7085 strcpy (tlsname, "*LCM");
7086 strcat (tlsname, name + 3);
7087 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7088 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7089 tocref = create_TOC_reference (modaddr, NULL_RTX);
7090 rtx modmem = gen_const_mem (Pmode, tocref);
7091 set_mem_alias_set (modmem, get_TOC_alias_set ());
7093 rtx modreg = gen_reg_rtx (Pmode);
7094 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7096 tmpreg = gen_reg_rtx (Pmode);
7097 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7099 dest = gen_reg_rtx (Pmode);
7100 if (TARGET_32BIT)
7101 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7102 else
7103 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7104 return dest;
7106 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7107 else if (TARGET_32BIT)
7109 tlsreg = gen_reg_rtx (SImode);
7110 emit_insn (gen_tls_get_tpointer (tlsreg));
7112 else
7113 tlsreg = gen_rtx_REG (DImode, 13);
7115 /* Load the TOC value into temporary register. */
7116 tmpreg = gen_reg_rtx (Pmode);
7117 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7118 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7119 gen_rtx_MINUS (Pmode, addr, tlsreg));
7121 /* Add TOC symbol value to TLS pointer. */
7122 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7124 return dest;
7127 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7128 this (thread-local) address. */
7130 static rtx
7131 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7133 rtx dest, insn;
7135 if (TARGET_XCOFF)
7136 return rs6000_legitimize_tls_address_aix (addr, model);
7138 dest = gen_reg_rtx (Pmode);
7139 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7141 rtx tlsreg;
7143 if (TARGET_64BIT)
7145 tlsreg = gen_rtx_REG (Pmode, 13);
7146 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7148 else
7150 tlsreg = gen_rtx_REG (Pmode, 2);
7151 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7153 emit_insn (insn);
7155 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7157 rtx tlsreg, tmp;
7159 tmp = gen_reg_rtx (Pmode);
7160 if (TARGET_64BIT)
7162 tlsreg = gen_rtx_REG (Pmode, 13);
7163 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7165 else
7167 tlsreg = gen_rtx_REG (Pmode, 2);
7168 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7170 emit_insn (insn);
7171 if (TARGET_64BIT)
7172 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7173 else
7174 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7175 emit_insn (insn);
7177 else
7179 rtx r3, got, tga, tmp1, tmp2, call_insn;
7181 /* We currently use relocations like @got@tlsgd for tls, which
7182 means the linker will handle allocation of tls entries, placing
7183 them in the .got section. So use a pointer to the .got section,
7184 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7185 or to secondary GOT sections used by 32-bit -fPIC. */
7186 if (TARGET_64BIT)
7187 got = gen_rtx_REG (Pmode, 2);
7188 else
7190 if (flag_pic == 1)
7191 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7192 else
7194 rtx gsym = rs6000_got_sym ();
7195 got = gen_reg_rtx (Pmode);
7196 if (flag_pic == 0)
7197 rs6000_emit_move (got, gsym, Pmode);
7198 else
7200 rtx mem, lab, last;
7202 tmp1 = gen_reg_rtx (Pmode);
7203 tmp2 = gen_reg_rtx (Pmode);
7204 mem = gen_const_mem (Pmode, tmp1);
7205 lab = gen_label_rtx ();
7206 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7207 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7208 if (TARGET_LINK_STACK)
7209 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7210 emit_move_insn (tmp2, mem);
7211 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7212 set_unique_reg_note (last, REG_EQUAL, gsym);
7217 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7219 tga = rs6000_tls_get_addr ();
7220 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7221 1, const0_rtx, Pmode);
7223 r3 = gen_rtx_REG (Pmode, 3);
7224 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7226 if (TARGET_64BIT)
7227 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7228 else
7229 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7231 else if (DEFAULT_ABI == ABI_V4)
7232 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7233 else
7234 gcc_unreachable ();
7235 call_insn = last_call_insn ();
7236 PATTERN (call_insn) = insn;
7237 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7238 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7239 pic_offset_table_rtx);
7241 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7243 tga = rs6000_tls_get_addr ();
7244 tmp1 = gen_reg_rtx (Pmode);
7245 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7246 1, const0_rtx, Pmode);
7248 r3 = gen_rtx_REG (Pmode, 3);
7249 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7251 if (TARGET_64BIT)
7252 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7253 else
7254 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7256 else if (DEFAULT_ABI == ABI_V4)
7257 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7258 else
7259 gcc_unreachable ();
7260 call_insn = last_call_insn ();
7261 PATTERN (call_insn) = insn;
7262 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7263 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7264 pic_offset_table_rtx);
7266 if (rs6000_tls_size == 16)
7268 if (TARGET_64BIT)
7269 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7270 else
7271 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7273 else if (rs6000_tls_size == 32)
7275 tmp2 = gen_reg_rtx (Pmode);
7276 if (TARGET_64BIT)
7277 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7278 else
7279 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7280 emit_insn (insn);
7281 if (TARGET_64BIT)
7282 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7283 else
7284 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7286 else
7288 tmp2 = gen_reg_rtx (Pmode);
7289 if (TARGET_64BIT)
7290 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7291 else
7292 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7293 emit_insn (insn);
7294 insn = gen_rtx_SET (Pmode, dest,
7295 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7297 emit_insn (insn);
7299 else
7301 /* IE, or 64-bit offset LE. */
7302 tmp2 = gen_reg_rtx (Pmode);
7303 if (TARGET_64BIT)
7304 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7305 else
7306 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7307 emit_insn (insn);
7308 if (TARGET_64BIT)
7309 insn = gen_tls_tls_64 (dest, tmp2, addr);
7310 else
7311 insn = gen_tls_tls_32 (dest, tmp2, addr);
7312 emit_insn (insn);
7316 return dest;
7319 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7321 static bool
7322 rs6000_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7324 if (GET_CODE (x) == HIGH
7325 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7326 return true;
7328 /* A TLS symbol in the TOC cannot contain a sum. */
7329 if (GET_CODE (x) == CONST
7330 && GET_CODE (XEXP (x, 0)) == PLUS
7331 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7332 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7333 return true;
7335 /* Do not place an ELF TLS symbol in the constant pool. */
7336 return TARGET_ELF && tls_referenced_p (x);
7339 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7340 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7341 can be addressed relative to the toc pointer. */
7343 static bool
7344 use_toc_relative_ref (rtx sym)
7346 return ((constant_pool_expr_p (sym)
7347 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7348 get_pool_mode (sym)))
7349 || (TARGET_CMODEL == CMODEL_MEDIUM
7350 && SYMBOL_REF_LOCAL_P (sym)));
7353 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7354 replace the input X, or the original X if no replacement is called for.
7355 The output parameter *WIN is 1 if the calling macro should goto WIN,
7356 0 if it should not.
7358 For RS/6000, we wish to handle large displacements off a base
7359 register by splitting the addend across an addiu/addis and the mem insn.
7360 This cuts number of extra insns needed from 3 to 1.
7362 On Darwin, we use this to generate code for floating point constants.
7363 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7364 The Darwin code is inside #if TARGET_MACHO because only then are the
7365 machopic_* functions defined. */
7366 static rtx
7367 rs6000_legitimize_reload_address (rtx x, enum machine_mode mode,
7368 int opnum, int type,
7369 int ind_levels ATTRIBUTE_UNUSED, int *win)
7371 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7373 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7374 DFmode/DImode MEM. */
7375 if (reg_offset_p
7376 && opnum == 1
7377 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7378 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7379 reg_offset_p = false;
7381 /* We must recognize output that we have already generated ourselves. */
7382 if (GET_CODE (x) == PLUS
7383 && GET_CODE (XEXP (x, 0)) == PLUS
7384 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7385 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7386 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7388 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7389 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7390 opnum, (enum reload_type) type);
7391 *win = 1;
7392 return x;
7395 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7396 if (GET_CODE (x) == LO_SUM
7397 && GET_CODE (XEXP (x, 0)) == HIGH)
7399 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7400 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7401 opnum, (enum reload_type) type);
7402 *win = 1;
7403 return x;
7406 #if TARGET_MACHO
7407 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7408 && GET_CODE (x) == LO_SUM
7409 && GET_CODE (XEXP (x, 0)) == PLUS
7410 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7411 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7412 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7413 && machopic_operand_p (XEXP (x, 1)))
7415 /* Result of previous invocation of this function on Darwin
7416 floating point constant. */
7417 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7418 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7419 opnum, (enum reload_type) type);
7420 *win = 1;
7421 return x;
7423 #endif
7425 if (TARGET_CMODEL != CMODEL_SMALL
7426 && reg_offset_p
7427 && small_toc_ref (x, VOIDmode))
7429 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7430 x = gen_rtx_LO_SUM (Pmode, hi, x);
7431 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7432 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7433 opnum, (enum reload_type) type);
7434 *win = 1;
7435 return x;
7438 if (GET_CODE (x) == PLUS
7439 && GET_CODE (XEXP (x, 0)) == REG
7440 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7441 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7442 && GET_CODE (XEXP (x, 1)) == CONST_INT
7443 && reg_offset_p
7444 && !SPE_VECTOR_MODE (mode)
7445 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7446 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7448 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7449 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7450 HOST_WIDE_INT high
7451 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7453 /* Check for 32-bit overflow. */
7454 if (high + low != val)
7456 *win = 0;
7457 return x;
7460 /* Reload the high part into a base reg; leave the low part
7461 in the mem directly. */
7463 x = gen_rtx_PLUS (GET_MODE (x),
7464 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7465 GEN_INT (high)),
7466 GEN_INT (low));
7468 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7469 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7470 opnum, (enum reload_type) type);
7471 *win = 1;
7472 return x;
7475 if (GET_CODE (x) == SYMBOL_REF
7476 && reg_offset_p
7477 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7478 && !SPE_VECTOR_MODE (mode)
7479 #if TARGET_MACHO
7480 && DEFAULT_ABI == ABI_DARWIN
7481 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7482 && machopic_symbol_defined_p (x)
7483 #else
7484 && DEFAULT_ABI == ABI_V4
7485 && !flag_pic
7486 #endif
7487 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7488 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7489 without fprs.
7490 ??? Assume floating point reg based on mode? This assumption is
7491 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7492 where reload ends up doing a DFmode load of a constant from
7493 mem using two gprs. Unfortunately, at this point reload
7494 hasn't yet selected regs so poking around in reload data
7495 won't help and even if we could figure out the regs reliably,
7496 we'd still want to allow this transformation when the mem is
7497 naturally aligned. Since we say the address is good here, we
7498 can't disable offsets from LO_SUMs in mem_operand_gpr.
7499 FIXME: Allow offset from lo_sum for other modes too, when
7500 mem is sufficiently aligned. */
7501 && mode != TFmode
7502 && mode != TDmode
7503 && (mode != TImode || !TARGET_VSX_TIMODE)
7504 && mode != PTImode
7505 && (mode != DImode || TARGET_POWERPC64)
7506 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7507 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7509 #if TARGET_MACHO
7510 if (flag_pic)
7512 rtx offset = machopic_gen_offset (x);
7513 x = gen_rtx_LO_SUM (GET_MODE (x),
7514 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7515 gen_rtx_HIGH (Pmode, offset)), offset);
7517 else
7518 #endif
7519 x = gen_rtx_LO_SUM (GET_MODE (x),
7520 gen_rtx_HIGH (Pmode, x), x);
7522 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7523 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7524 opnum, (enum reload_type) type);
7525 *win = 1;
7526 return x;
7529 /* Reload an offset address wrapped by an AND that represents the
7530 masking of the lower bits. Strip the outer AND and let reload
7531 convert the offset address into an indirect address. For VSX,
7532 force reload to create the address with an AND in a separate
7533 register, because we can't guarantee an altivec register will
7534 be used. */
7535 if (VECTOR_MEM_ALTIVEC_P (mode)
7536 && GET_CODE (x) == AND
7537 && GET_CODE (XEXP (x, 0)) == PLUS
7538 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7539 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7540 && GET_CODE (XEXP (x, 1)) == CONST_INT
7541 && INTVAL (XEXP (x, 1)) == -16)
7543 x = XEXP (x, 0);
7544 *win = 1;
7545 return x;
7548 if (TARGET_TOC
7549 && reg_offset_p
7550 && GET_CODE (x) == SYMBOL_REF
7551 && use_toc_relative_ref (x))
7553 x = create_TOC_reference (x, NULL_RTX);
7554 if (TARGET_CMODEL != CMODEL_SMALL)
7555 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7556 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7557 opnum, (enum reload_type) type);
7558 *win = 1;
7559 return x;
7561 *win = 0;
7562 return x;
7565 /* Debug version of rs6000_legitimize_reload_address. */
7566 static rtx
7567 rs6000_debug_legitimize_reload_address (rtx x, enum machine_mode mode,
7568 int opnum, int type,
7569 int ind_levels, int *win)
7571 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7572 ind_levels, win);
7573 fprintf (stderr,
7574 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7575 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7576 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7577 debug_rtx (x);
7579 if (x == ret)
7580 fprintf (stderr, "Same address returned\n");
7581 else if (!ret)
7582 fprintf (stderr, "NULL returned\n");
7583 else
7585 fprintf (stderr, "New address:\n");
7586 debug_rtx (ret);
7589 return ret;
7592 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7593 that is a valid memory address for an instruction.
7594 The MODE argument is the machine mode for the MEM expression
7595 that wants to use this address.
7597 On the RS/6000, there are four valid address: a SYMBOL_REF that
7598 refers to a constant pool entry of an address (or the sum of it
7599 plus a constant), a short (16-bit signed) constant plus a register,
7600 the sum of two registers, or a register indirect, possibly with an
7601 auto-increment. For DFmode, DDmode and DImode with a constant plus
7602 register, we must ensure that both words are addressable or PowerPC64
7603 with offset word aligned.
7605 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7606 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7607 because adjacent memory cells are accessed by adding word-sized offsets
7608 during assembly output. */
7609 static bool
7610 rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
7612 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7614 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7615 if (VECTOR_MEM_ALTIVEC_P (mode)
7616 && GET_CODE (x) == AND
7617 && GET_CODE (XEXP (x, 1)) == CONST_INT
7618 && INTVAL (XEXP (x, 1)) == -16)
7619 x = XEXP (x, 0);
7621 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7622 return 0;
7623 if (legitimate_indirect_address_p (x, reg_ok_strict))
7624 return 1;
7625 if (TARGET_UPDATE
7626 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7627 && mode_supports_pre_incdec_p (mode)
7628 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7629 return 1;
7630 if (virtual_stack_registers_memory_p (x))
7631 return 1;
7632 if (reg_offset_p && legitimate_small_data_p (mode, x))
7633 return 1;
7634 if (reg_offset_p
7635 && legitimate_constant_pool_address_p (x, mode,
7636 reg_ok_strict || lra_in_progress))
7637 return 1;
7638 /* For TImode, if we have load/store quad and TImode in VSX registers, only
7639 allow register indirect addresses. This will allow the values to go in
7640 either GPRs or VSX registers without reloading. The vector types would
7641 tend to go into VSX registers, so we allow REG+REG, while TImode seems
7642 somewhat split, in that some uses are GPR based, and some VSX based. */
7643 if (mode == TImode && TARGET_QUAD_MEMORY && TARGET_VSX_TIMODE)
7644 return 0;
7645 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7646 if (! reg_ok_strict
7647 && reg_offset_p
7648 && GET_CODE (x) == PLUS
7649 && GET_CODE (XEXP (x, 0)) == REG
7650 && (XEXP (x, 0) == virtual_stack_vars_rtx
7651 || XEXP (x, 0) == arg_pointer_rtx)
7652 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7653 return 1;
7654 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7655 return 1;
7656 if (mode != TFmode
7657 && mode != TDmode
7658 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7659 || TARGET_POWERPC64
7660 || (mode != DFmode && mode != DDmode)
7661 || (TARGET_E500_DOUBLE && mode != DDmode))
7662 && (TARGET_POWERPC64 || mode != DImode)
7663 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7664 && mode != PTImode
7665 && !avoiding_indexed_address_p (mode)
7666 && legitimate_indexed_address_p (x, reg_ok_strict))
7667 return 1;
7668 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7669 && mode_supports_pre_modify_p (mode)
7670 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7671 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7672 reg_ok_strict, false)
7673 || (!avoiding_indexed_address_p (mode)
7674 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7675 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7676 return 1;
7677 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7678 return 1;
7679 return 0;
7682 /* Debug version of rs6000_legitimate_address_p. */
7683 static bool
7684 rs6000_debug_legitimate_address_p (enum machine_mode mode, rtx x,
7685 bool reg_ok_strict)
7687 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7688 fprintf (stderr,
7689 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7690 "strict = %d, reload = %s, code = %s\n",
7691 ret ? "true" : "false",
7692 GET_MODE_NAME (mode),
7693 reg_ok_strict,
7694 (reload_completed
7695 ? "after"
7696 : (reload_in_progress ? "progress" : "before")),
7697 GET_RTX_NAME (GET_CODE (x)));
7698 debug_rtx (x);
7700 return ret;
7703 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7705 static bool
7706 rs6000_mode_dependent_address_p (const_rtx addr,
7707 addr_space_t as ATTRIBUTE_UNUSED)
7709 return rs6000_mode_dependent_address_ptr (addr);
7712 /* Go to LABEL if ADDR (a legitimate address expression)
7713 has an effect that depends on the machine mode it is used for.
7715 On the RS/6000 this is true of all integral offsets (since AltiVec
7716 and VSX modes don't allow them) or is a pre-increment or decrement.
7718 ??? Except that due to conceptual problems in offsettable_address_p
7719 we can't really report the problems of integral offsets. So leave
7720 this assuming that the adjustable offset must be valid for the
7721 sub-words of a TFmode operand, which is what we had before. */
7723 static bool
7724 rs6000_mode_dependent_address (const_rtx addr)
7726 switch (GET_CODE (addr))
7728 case PLUS:
7729 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7730 is considered a legitimate address before reload, so there
7731 are no offset restrictions in that case. Note that this
7732 condition is safe in strict mode because any address involving
7733 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7734 been rejected as illegitimate. */
7735 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7736 && XEXP (addr, 0) != arg_pointer_rtx
7737 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7739 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7740 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7742 break;
7744 case LO_SUM:
7745 /* Anything in the constant pool is sufficiently aligned that
7746 all bytes have the same high part address. */
7747 return !legitimate_constant_pool_address_p (addr, QImode, false);
7749 /* Auto-increment cases are now treated generically in recog.c. */
7750 case PRE_MODIFY:
7751 return TARGET_UPDATE;
7753 /* AND is only allowed in Altivec loads. */
7754 case AND:
7755 return true;
7757 default:
7758 break;
7761 return false;
7764 /* Debug version of rs6000_mode_dependent_address. */
7765 static bool
7766 rs6000_debug_mode_dependent_address (const_rtx addr)
7768 bool ret = rs6000_mode_dependent_address (addr);
7770 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7771 ret ? "true" : "false");
7772 debug_rtx (addr);
7774 return ret;
7777 /* Implement FIND_BASE_TERM. */
7780 rs6000_find_base_term (rtx op)
7782 rtx base;
7784 base = op;
7785 if (GET_CODE (base) == CONST)
7786 base = XEXP (base, 0);
7787 if (GET_CODE (base) == PLUS)
7788 base = XEXP (base, 0);
7789 if (GET_CODE (base) == UNSPEC)
7790 switch (XINT (base, 1))
7792 case UNSPEC_TOCREL:
7793 case UNSPEC_MACHOPIC_OFFSET:
7794 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7795 for aliasing purposes. */
7796 return XVECEXP (base, 0, 0);
7799 return op;
7802 /* More elaborate version of recog's offsettable_memref_p predicate
7803 that works around the ??? note of rs6000_mode_dependent_address.
7804 In particular it accepts
7806 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7808 in 32-bit mode, that the recog predicate rejects. */
7810 static bool
7811 rs6000_offsettable_memref_p (rtx op, enum machine_mode reg_mode)
7813 bool worst_case;
7815 if (!MEM_P (op))
7816 return false;
7818 /* First mimic offsettable_memref_p. */
7819 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7820 return true;
7822 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7823 the latter predicate knows nothing about the mode of the memory
7824 reference and, therefore, assumes that it is the largest supported
7825 mode (TFmode). As a consequence, legitimate offsettable memory
7826 references are rejected. rs6000_legitimate_offset_address_p contains
7827 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7828 at least with a little bit of help here given that we know the
7829 actual registers used. */
7830 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7831 || GET_MODE_SIZE (reg_mode) == 4);
7832 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7833 true, worst_case);
7836 /* Change register usage conditional on target flags. */
7837 static void
7838 rs6000_conditional_register_usage (void)
7840 int i;
7842 if (TARGET_DEBUG_TARGET)
7843 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7845 /* Set MQ register fixed (already call_used) so that it will not be
7846 allocated. */
7847 fixed_regs[64] = 1;
7849 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7850 if (TARGET_64BIT)
7851 fixed_regs[13] = call_used_regs[13]
7852 = call_really_used_regs[13] = 1;
7854 /* Conditionally disable FPRs. */
7855 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7856 for (i = 32; i < 64; i++)
7857 fixed_regs[i] = call_used_regs[i]
7858 = call_really_used_regs[i] = 1;
7860 /* The TOC register is not killed across calls in a way that is
7861 visible to the compiler. */
7862 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7863 call_really_used_regs[2] = 0;
7865 if (DEFAULT_ABI == ABI_V4
7866 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7867 && flag_pic == 2)
7868 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7870 if (DEFAULT_ABI == ABI_V4
7871 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7872 && flag_pic == 1)
7873 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7874 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7875 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7877 if (DEFAULT_ABI == ABI_DARWIN
7878 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
7879 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7880 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7881 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7883 if (TARGET_TOC && TARGET_MINIMAL_TOC)
7884 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7885 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7887 if (TARGET_SPE)
7889 global_regs[SPEFSCR_REGNO] = 1;
7890 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
7891 registers in prologues and epilogues. We no longer use r14
7892 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
7893 pool for link-compatibility with older versions of GCC. Once
7894 "old" code has died out, we can return r14 to the allocation
7895 pool. */
7896 fixed_regs[14]
7897 = call_used_regs[14]
7898 = call_really_used_regs[14] = 1;
7901 if (!TARGET_ALTIVEC && !TARGET_VSX)
7903 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
7904 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7905 call_really_used_regs[VRSAVE_REGNO] = 1;
7908 if (TARGET_ALTIVEC || TARGET_VSX)
7909 global_regs[VSCR_REGNO] = 1;
7911 if (TARGET_ALTIVEC_ABI)
7913 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
7914 call_used_regs[i] = call_really_used_regs[i] = 1;
7916 /* AIX reserves VR20:31 in non-extended ABI mode. */
7917 if (TARGET_XCOFF)
7918 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
7919 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7924 /* Output insns to set DEST equal to the constant SOURCE as a series of
7925 lis, ori and shl instructions and return TRUE. */
7927 bool
7928 rs6000_emit_set_const (rtx dest, rtx source)
7930 enum machine_mode mode = GET_MODE (dest);
7931 rtx temp, set;
7932 rtx_insn *insn;
7933 HOST_WIDE_INT c;
7935 gcc_checking_assert (CONST_INT_P (source));
7936 c = INTVAL (source);
7937 switch (mode)
7939 case QImode:
7940 case HImode:
7941 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
7942 return true;
7944 case SImode:
7945 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
7947 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
7948 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
7949 emit_insn (gen_rtx_SET (VOIDmode, dest,
7950 gen_rtx_IOR (SImode, copy_rtx (temp),
7951 GEN_INT (c & 0xffff))));
7952 break;
7954 case DImode:
7955 if (!TARGET_POWERPC64)
7957 rtx hi, lo;
7959 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
7960 DImode);
7961 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
7962 DImode);
7963 emit_move_insn (hi, GEN_INT (c >> 32));
7964 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
7965 emit_move_insn (lo, GEN_INT (c));
7967 else
7968 rs6000_emit_set_long_const (dest, c);
7969 break;
7971 default:
7972 gcc_unreachable ();
7975 insn = get_last_insn ();
7976 set = single_set (insn);
7977 if (! CONSTANT_P (SET_SRC (set)))
7978 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
7980 return true;
7983 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
7984 Output insns to set DEST equal to the constant C as a series of
7985 lis, ori and shl instructions. */
7987 static void
7988 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
7990 rtx temp;
7991 HOST_WIDE_INT ud1, ud2, ud3, ud4;
7993 ud1 = c & 0xffff;
7994 c = c >> 16;
7995 ud2 = c & 0xffff;
7996 c = c >> 16;
7997 ud3 = c & 0xffff;
7998 c = c >> 16;
7999 ud4 = c & 0xffff;
8001 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8002 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8003 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8005 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8006 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8008 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8010 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8011 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8012 if (ud1 != 0)
8013 emit_move_insn (dest,
8014 gen_rtx_IOR (DImode, copy_rtx (temp),
8015 GEN_INT (ud1)));
8017 else if (ud3 == 0 && ud4 == 0)
8019 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8021 gcc_assert (ud2 & 0x8000);
8022 emit_move_insn (copy_rtx (temp),
8023 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8024 if (ud1 != 0)
8025 emit_move_insn (copy_rtx (temp),
8026 gen_rtx_IOR (DImode, copy_rtx (temp),
8027 GEN_INT (ud1)));
8028 emit_move_insn (dest,
8029 gen_rtx_ZERO_EXTEND (DImode,
8030 gen_lowpart (SImode,
8031 copy_rtx (temp))));
8033 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8034 || (ud4 == 0 && ! (ud3 & 0x8000)))
8036 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8038 emit_move_insn (copy_rtx (temp),
8039 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8040 if (ud2 != 0)
8041 emit_move_insn (copy_rtx (temp),
8042 gen_rtx_IOR (DImode, copy_rtx (temp),
8043 GEN_INT (ud2)));
8044 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8045 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8046 GEN_INT (16)));
8047 if (ud1 != 0)
8048 emit_move_insn (dest,
8049 gen_rtx_IOR (DImode, copy_rtx (temp),
8050 GEN_INT (ud1)));
8052 else
8054 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8056 emit_move_insn (copy_rtx (temp),
8057 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8058 if (ud3 != 0)
8059 emit_move_insn (copy_rtx (temp),
8060 gen_rtx_IOR (DImode, copy_rtx (temp),
8061 GEN_INT (ud3)));
8063 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8064 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8065 GEN_INT (32)));
8066 if (ud2 != 0)
8067 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8068 gen_rtx_IOR (DImode, copy_rtx (temp),
8069 GEN_INT (ud2 << 16)));
8070 if (ud1 != 0)
8071 emit_move_insn (dest,
8072 gen_rtx_IOR (DImode, copy_rtx (temp),
8073 GEN_INT (ud1)));
8077 /* Helper for the following. Get rid of [r+r] memory refs
8078 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8080 static void
8081 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8083 if (reload_in_progress)
8084 return;
8086 if (GET_CODE (operands[0]) == MEM
8087 && GET_CODE (XEXP (operands[0], 0)) != REG
8088 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8089 GET_MODE (operands[0]), false))
8090 operands[0]
8091 = replace_equiv_address (operands[0],
8092 copy_addr_to_reg (XEXP (operands[0], 0)));
8094 if (GET_CODE (operands[1]) == MEM
8095 && GET_CODE (XEXP (operands[1], 0)) != REG
8096 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8097 GET_MODE (operands[1]), false))
8098 operands[1]
8099 = replace_equiv_address (operands[1],
8100 copy_addr_to_reg (XEXP (operands[1], 0)));
8103 /* Generate a vector of constants to permute MODE for a little-endian
8104 storage operation by swapping the two halves of a vector. */
8105 static rtvec
8106 rs6000_const_vec (enum machine_mode mode)
8108 int i, subparts;
8109 rtvec v;
8111 switch (mode)
8113 case V1TImode:
8114 subparts = 1;
8115 break;
8116 case V2DFmode:
8117 case V2DImode:
8118 subparts = 2;
8119 break;
8120 case V4SFmode:
8121 case V4SImode:
8122 subparts = 4;
8123 break;
8124 case V8HImode:
8125 subparts = 8;
8126 break;
8127 case V16QImode:
8128 subparts = 16;
8129 break;
8130 default:
8131 gcc_unreachable();
8134 v = rtvec_alloc (subparts);
8136 for (i = 0; i < subparts / 2; ++i)
8137 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8138 for (i = subparts / 2; i < subparts; ++i)
8139 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8141 return v;
8144 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8145 for a VSX load or store operation. */
8147 rs6000_gen_le_vsx_permute (rtx source, enum machine_mode mode)
8149 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8150 return gen_rtx_VEC_SELECT (mode, source, par);
8153 /* Emit a little-endian load from vector memory location SOURCE to VSX
8154 register DEST in mode MODE. The load is done with two permuting
8155 insn's that represent an lxvd2x and xxpermdi. */
8156 void
8157 rs6000_emit_le_vsx_load (rtx dest, rtx source, enum machine_mode mode)
8159 rtx tmp, permute_mem, permute_reg;
8161 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8162 V1TImode). */
8163 if (mode == TImode || mode == V1TImode)
8165 mode = V2DImode;
8166 dest = gen_lowpart (V2DImode, dest);
8167 source = adjust_address (source, V2DImode, 0);
8170 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8171 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8172 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8173 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8174 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8177 /* Emit a little-endian store to vector memory location DEST from VSX
8178 register SOURCE in mode MODE. The store is done with two permuting
8179 insn's that represent an xxpermdi and an stxvd2x. */
8180 void
8181 rs6000_emit_le_vsx_store (rtx dest, rtx source, enum machine_mode mode)
8183 rtx tmp, permute_src, permute_tmp;
8185 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8186 V1TImode). */
8187 if (mode == TImode || mode == V1TImode)
8189 mode = V2DImode;
8190 dest = adjust_address (dest, V2DImode, 0);
8191 source = gen_lowpart (V2DImode, source);
8194 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8195 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8196 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8197 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8198 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8201 /* Emit a sequence representing a little-endian VSX load or store,
8202 moving data from SOURCE to DEST in mode MODE. This is done
8203 separately from rs6000_emit_move to ensure it is called only
8204 during expand. LE VSX loads and stores introduced later are
8205 handled with a split. The expand-time RTL generation allows
8206 us to optimize away redundant pairs of register-permutes. */
8207 void
8208 rs6000_emit_le_vsx_move (rtx dest, rtx source, enum machine_mode mode)
8210 gcc_assert (!BYTES_BIG_ENDIAN
8211 && VECTOR_MEM_VSX_P (mode)
8212 && !gpr_or_gpr_p (dest, source)
8213 && (MEM_P (source) ^ MEM_P (dest)));
8215 if (MEM_P (source))
8217 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8218 rs6000_emit_le_vsx_load (dest, source, mode);
8220 else
8222 if (!REG_P (source))
8223 source = force_reg (mode, source);
8224 rs6000_emit_le_vsx_store (dest, source, mode);
8228 /* Emit a move from SOURCE to DEST in mode MODE. */
8229 void
8230 rs6000_emit_move (rtx dest, rtx source, enum machine_mode mode)
8232 rtx operands[2];
8233 operands[0] = dest;
8234 operands[1] = source;
8236 if (TARGET_DEBUG_ADDR)
8238 fprintf (stderr,
8239 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8240 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8241 GET_MODE_NAME (mode),
8242 reload_in_progress,
8243 reload_completed,
8244 can_create_pseudo_p ());
8245 debug_rtx (dest);
8246 fprintf (stderr, "source:\n");
8247 debug_rtx (source);
8250 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8251 if (CONST_WIDE_INT_P (operands[1])
8252 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8254 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8255 gcc_unreachable ();
8258 /* Check if GCC is setting up a block move that will end up using FP
8259 registers as temporaries. We must make sure this is acceptable. */
8260 if (GET_CODE (operands[0]) == MEM
8261 && GET_CODE (operands[1]) == MEM
8262 && mode == DImode
8263 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8264 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8265 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8266 ? 32 : MEM_ALIGN (operands[0])))
8267 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8268 ? 32
8269 : MEM_ALIGN (operands[1]))))
8270 && ! MEM_VOLATILE_P (operands [0])
8271 && ! MEM_VOLATILE_P (operands [1]))
8273 emit_move_insn (adjust_address (operands[0], SImode, 0),
8274 adjust_address (operands[1], SImode, 0));
8275 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8276 adjust_address (copy_rtx (operands[1]), SImode, 4));
8277 return;
8280 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8281 && !gpc_reg_operand (operands[1], mode))
8282 operands[1] = force_reg (mode, operands[1]);
8284 /* Recognize the case where operand[1] is a reference to thread-local
8285 data and load its address to a register. */
8286 if (tls_referenced_p (operands[1]))
8288 enum tls_model model;
8289 rtx tmp = operands[1];
8290 rtx addend = NULL;
8292 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8294 addend = XEXP (XEXP (tmp, 0), 1);
8295 tmp = XEXP (XEXP (tmp, 0), 0);
8298 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8299 model = SYMBOL_REF_TLS_MODEL (tmp);
8300 gcc_assert (model != 0);
8302 tmp = rs6000_legitimize_tls_address (tmp, model);
8303 if (addend)
8305 tmp = gen_rtx_PLUS (mode, tmp, addend);
8306 tmp = force_operand (tmp, operands[0]);
8308 operands[1] = tmp;
8311 /* Handle the case where reload calls us with an invalid address. */
8312 if (reload_in_progress && mode == Pmode
8313 && (! general_operand (operands[1], mode)
8314 || ! nonimmediate_operand (operands[0], mode)))
8315 goto emit_set;
8317 /* 128-bit constant floating-point values on Darwin should really be
8318 loaded as two parts. */
8319 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8320 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8322 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8323 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8324 DFmode);
8325 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8326 GET_MODE_SIZE (DFmode)),
8327 simplify_gen_subreg (DFmode, operands[1], mode,
8328 GET_MODE_SIZE (DFmode)),
8329 DFmode);
8330 return;
8333 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8334 cfun->machine->sdmode_stack_slot =
8335 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8338 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8339 p1:SD) if p1 is not of floating point class and p0 is spilled as
8340 we can have no analogous movsd_store for this. */
8341 if (lra_in_progress && mode == DDmode
8342 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8343 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8344 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8345 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8347 enum reg_class cl;
8348 int regno = REGNO (SUBREG_REG (operands[1]));
8350 if (regno >= FIRST_PSEUDO_REGISTER)
8352 cl = reg_preferred_class (regno);
8353 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8355 if (regno >= 0 && ! FP_REGNO_P (regno))
8357 mode = SDmode;
8358 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8359 operands[1] = SUBREG_REG (operands[1]);
8362 if (lra_in_progress
8363 && mode == SDmode
8364 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8365 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8366 && (REG_P (operands[1])
8367 || (GET_CODE (operands[1]) == SUBREG
8368 && REG_P (SUBREG_REG (operands[1])))))
8370 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8371 ? SUBREG_REG (operands[1]) : operands[1]);
8372 enum reg_class cl;
8374 if (regno >= FIRST_PSEUDO_REGISTER)
8376 cl = reg_preferred_class (regno);
8377 gcc_assert (cl != NO_REGS);
8378 regno = ira_class_hard_regs[cl][0];
8380 if (FP_REGNO_P (regno))
8382 if (GET_MODE (operands[0]) != DDmode)
8383 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8384 emit_insn (gen_movsd_store (operands[0], operands[1]));
8386 else if (INT_REGNO_P (regno))
8387 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8388 else
8389 gcc_unreachable();
8390 return;
8392 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8393 p:DD)) if p0 is not of floating point class and p1 is spilled as
8394 we can have no analogous movsd_load for this. */
8395 if (lra_in_progress && mode == DDmode
8396 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8397 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8398 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8399 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8401 enum reg_class cl;
8402 int regno = REGNO (SUBREG_REG (operands[0]));
8404 if (regno >= FIRST_PSEUDO_REGISTER)
8406 cl = reg_preferred_class (regno);
8407 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8409 if (regno >= 0 && ! FP_REGNO_P (regno))
8411 mode = SDmode;
8412 operands[0] = SUBREG_REG (operands[0]);
8413 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8416 if (lra_in_progress
8417 && mode == SDmode
8418 && (REG_P (operands[0])
8419 || (GET_CODE (operands[0]) == SUBREG
8420 && REG_P (SUBREG_REG (operands[0]))))
8421 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8422 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8424 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8425 ? SUBREG_REG (operands[0]) : operands[0]);
8426 enum reg_class cl;
8428 if (regno >= FIRST_PSEUDO_REGISTER)
8430 cl = reg_preferred_class (regno);
8431 gcc_assert (cl != NO_REGS);
8432 regno = ira_class_hard_regs[cl][0];
8434 if (FP_REGNO_P (regno))
8436 if (GET_MODE (operands[1]) != DDmode)
8437 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8438 emit_insn (gen_movsd_load (operands[0], operands[1]));
8440 else if (INT_REGNO_P (regno))
8441 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8442 else
8443 gcc_unreachable();
8444 return;
8447 if (reload_in_progress
8448 && mode == SDmode
8449 && cfun->machine->sdmode_stack_slot != NULL_RTX
8450 && MEM_P (operands[0])
8451 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8452 && REG_P (operands[1]))
8454 if (FP_REGNO_P (REGNO (operands[1])))
8456 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8457 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8458 emit_insn (gen_movsd_store (mem, operands[1]));
8460 else if (INT_REGNO_P (REGNO (operands[1])))
8462 rtx mem = operands[0];
8463 if (BYTES_BIG_ENDIAN)
8464 mem = adjust_address_nv (mem, mode, 4);
8465 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8466 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8468 else
8469 gcc_unreachable();
8470 return;
8472 if (reload_in_progress
8473 && mode == SDmode
8474 && REG_P (operands[0])
8475 && MEM_P (operands[1])
8476 && cfun->machine->sdmode_stack_slot != NULL_RTX
8477 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8479 if (FP_REGNO_P (REGNO (operands[0])))
8481 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8482 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8483 emit_insn (gen_movsd_load (operands[0], mem));
8485 else if (INT_REGNO_P (REGNO (operands[0])))
8487 rtx mem = operands[1];
8488 if (BYTES_BIG_ENDIAN)
8489 mem = adjust_address_nv (mem, mode, 4);
8490 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8491 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8493 else
8494 gcc_unreachable();
8495 return;
8498 /* FIXME: In the long term, this switch statement should go away
8499 and be replaced by a sequence of tests based on things like
8500 mode == Pmode. */
8501 switch (mode)
8503 case HImode:
8504 case QImode:
8505 if (CONSTANT_P (operands[1])
8506 && GET_CODE (operands[1]) != CONST_INT)
8507 operands[1] = force_const_mem (mode, operands[1]);
8508 break;
8510 case TFmode:
8511 case TDmode:
8512 rs6000_eliminate_indexed_memrefs (operands);
8513 /* fall through */
8515 case DFmode:
8516 case DDmode:
8517 case SFmode:
8518 case SDmode:
8519 if (CONSTANT_P (operands[1])
8520 && ! easy_fp_constant (operands[1], mode))
8521 operands[1] = force_const_mem (mode, operands[1]);
8522 break;
8524 case V16QImode:
8525 case V8HImode:
8526 case V4SFmode:
8527 case V4SImode:
8528 case V4HImode:
8529 case V2SFmode:
8530 case V2SImode:
8531 case V1DImode:
8532 case V2DFmode:
8533 case V2DImode:
8534 case V1TImode:
8535 if (CONSTANT_P (operands[1])
8536 && !easy_vector_constant (operands[1], mode))
8537 operands[1] = force_const_mem (mode, operands[1]);
8538 break;
8540 case SImode:
8541 case DImode:
8542 /* Use default pattern for address of ELF small data */
8543 if (TARGET_ELF
8544 && mode == Pmode
8545 && DEFAULT_ABI == ABI_V4
8546 && (GET_CODE (operands[1]) == SYMBOL_REF
8547 || GET_CODE (operands[1]) == CONST)
8548 && small_data_operand (operands[1], mode))
8550 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8551 return;
8554 if (DEFAULT_ABI == ABI_V4
8555 && mode == Pmode && mode == SImode
8556 && flag_pic == 1 && got_operand (operands[1], mode))
8558 emit_insn (gen_movsi_got (operands[0], operands[1]));
8559 return;
8562 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8563 && TARGET_NO_TOC
8564 && ! flag_pic
8565 && mode == Pmode
8566 && CONSTANT_P (operands[1])
8567 && GET_CODE (operands[1]) != HIGH
8568 && GET_CODE (operands[1]) != CONST_INT)
8570 rtx target = (!can_create_pseudo_p ()
8571 ? operands[0]
8572 : gen_reg_rtx (mode));
8574 /* If this is a function address on -mcall-aixdesc,
8575 convert it to the address of the descriptor. */
8576 if (DEFAULT_ABI == ABI_AIX
8577 && GET_CODE (operands[1]) == SYMBOL_REF
8578 && XSTR (operands[1], 0)[0] == '.')
8580 const char *name = XSTR (operands[1], 0);
8581 rtx new_ref;
8582 while (*name == '.')
8583 name++;
8584 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8585 CONSTANT_POOL_ADDRESS_P (new_ref)
8586 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8587 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8588 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8589 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8590 operands[1] = new_ref;
8593 if (DEFAULT_ABI == ABI_DARWIN)
8595 #if TARGET_MACHO
8596 if (MACHO_DYNAMIC_NO_PIC_P)
8598 /* Take care of any required data indirection. */
8599 operands[1] = rs6000_machopic_legitimize_pic_address (
8600 operands[1], mode, operands[0]);
8601 if (operands[0] != operands[1])
8602 emit_insn (gen_rtx_SET (VOIDmode,
8603 operands[0], operands[1]));
8604 return;
8606 #endif
8607 emit_insn (gen_macho_high (target, operands[1]));
8608 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8609 return;
8612 emit_insn (gen_elf_high (target, operands[1]));
8613 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8614 return;
8617 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8618 and we have put it in the TOC, we just need to make a TOC-relative
8619 reference to it. */
8620 if (TARGET_TOC
8621 && GET_CODE (operands[1]) == SYMBOL_REF
8622 && use_toc_relative_ref (operands[1]))
8623 operands[1] = create_TOC_reference (operands[1], operands[0]);
8624 else if (mode == Pmode
8625 && CONSTANT_P (operands[1])
8626 && GET_CODE (operands[1]) != HIGH
8627 && ((GET_CODE (operands[1]) != CONST_INT
8628 && ! easy_fp_constant (operands[1], mode))
8629 || (GET_CODE (operands[1]) == CONST_INT
8630 && (num_insns_constant (operands[1], mode)
8631 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8632 || (GET_CODE (operands[0]) == REG
8633 && FP_REGNO_P (REGNO (operands[0]))))
8634 && !toc_relative_expr_p (operands[1], false)
8635 && (TARGET_CMODEL == CMODEL_SMALL
8636 || can_create_pseudo_p ()
8637 || (REG_P (operands[0])
8638 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8641 #if TARGET_MACHO
8642 /* Darwin uses a special PIC legitimizer. */
8643 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8645 operands[1] =
8646 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8647 operands[0]);
8648 if (operands[0] != operands[1])
8649 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8650 return;
8652 #endif
8654 /* If we are to limit the number of things we put in the TOC and
8655 this is a symbol plus a constant we can add in one insn,
8656 just put the symbol in the TOC and add the constant. Don't do
8657 this if reload is in progress. */
8658 if (GET_CODE (operands[1]) == CONST
8659 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8660 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8661 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8662 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8663 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8664 && ! side_effects_p (operands[0]))
8666 rtx sym =
8667 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8668 rtx other = XEXP (XEXP (operands[1], 0), 1);
8670 sym = force_reg (mode, sym);
8671 emit_insn (gen_add3_insn (operands[0], sym, other));
8672 return;
8675 operands[1] = force_const_mem (mode, operands[1]);
8677 if (TARGET_TOC
8678 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8679 && constant_pool_expr_p (XEXP (operands[1], 0))
8680 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8681 get_pool_constant (XEXP (operands[1], 0)),
8682 get_pool_mode (XEXP (operands[1], 0))))
8684 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8685 operands[0]);
8686 operands[1] = gen_const_mem (mode, tocref);
8687 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8690 break;
8692 case TImode:
8693 if (!VECTOR_MEM_VSX_P (TImode))
8694 rs6000_eliminate_indexed_memrefs (operands);
8695 break;
8697 case PTImode:
8698 rs6000_eliminate_indexed_memrefs (operands);
8699 break;
8701 default:
8702 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8705 /* Above, we may have called force_const_mem which may have returned
8706 an invalid address. If we can, fix this up; otherwise, reload will
8707 have to deal with it. */
8708 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8709 operands[1] = validize_mem (operands[1]);
8711 emit_set:
8712 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8715 /* Return true if a structure, union or array containing FIELD should be
8716 accessed using `BLKMODE'.
8718 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8719 entire thing in a DI and use subregs to access the internals.
8720 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8721 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8722 best thing to do is set structs to BLKmode and avoid Severe Tire
8723 Damage.
8725 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8726 fit into 1, whereas DI still needs two. */
8728 static bool
8729 rs6000_member_type_forces_blk (const_tree field, enum machine_mode mode)
8731 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8732 || (TARGET_E500_DOUBLE && mode == DFmode));
8735 /* Nonzero if we can use a floating-point register to pass this arg. */
8736 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8737 (SCALAR_FLOAT_MODE_P (MODE) \
8738 && (CUM)->fregno <= FP_ARG_MAX_REG \
8739 && TARGET_HARD_FLOAT && TARGET_FPRS)
8741 /* Nonzero if we can use an AltiVec register to pass this arg. */
8742 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8743 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8744 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8745 && TARGET_ALTIVEC_ABI \
8746 && (NAMED))
8748 /* Walk down the type tree of TYPE counting consecutive base elements.
8749 If *MODEP is VOIDmode, then set it to the first valid floating point
8750 or vector type. If a non-floating point or vector type is found, or
8751 if a floating point or vector type that doesn't match a non-VOIDmode
8752 *MODEP is found, then return -1, otherwise return the count in the
8753 sub-tree. */
8755 static int
8756 rs6000_aggregate_candidate (const_tree type, enum machine_mode *modep)
8758 enum machine_mode mode;
8759 HOST_WIDE_INT size;
8761 switch (TREE_CODE (type))
8763 case REAL_TYPE:
8764 mode = TYPE_MODE (type);
8765 if (!SCALAR_FLOAT_MODE_P (mode))
8766 return -1;
8768 if (*modep == VOIDmode)
8769 *modep = mode;
8771 if (*modep == mode)
8772 return 1;
8774 break;
8776 case COMPLEX_TYPE:
8777 mode = TYPE_MODE (TREE_TYPE (type));
8778 if (!SCALAR_FLOAT_MODE_P (mode))
8779 return -1;
8781 if (*modep == VOIDmode)
8782 *modep = mode;
8784 if (*modep == mode)
8785 return 2;
8787 break;
8789 case VECTOR_TYPE:
8790 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8791 return -1;
8793 /* Use V4SImode as representative of all 128-bit vector types. */
8794 size = int_size_in_bytes (type);
8795 switch (size)
8797 case 16:
8798 mode = V4SImode;
8799 break;
8800 default:
8801 return -1;
8804 if (*modep == VOIDmode)
8805 *modep = mode;
8807 /* Vector modes are considered to be opaque: two vectors are
8808 equivalent for the purposes of being homogeneous aggregates
8809 if they are the same size. */
8810 if (*modep == mode)
8811 return 1;
8813 break;
8815 case ARRAY_TYPE:
8817 int count;
8818 tree index = TYPE_DOMAIN (type);
8820 /* Can't handle incomplete types nor sizes that are not
8821 fixed. */
8822 if (!COMPLETE_TYPE_P (type)
8823 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8824 return -1;
8826 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8827 if (count == -1
8828 || !index
8829 || !TYPE_MAX_VALUE (index)
8830 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8831 || !TYPE_MIN_VALUE (index)
8832 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8833 || count < 0)
8834 return -1;
8836 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8837 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8839 /* There must be no padding. */
8840 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8841 return -1;
8843 return count;
8846 case RECORD_TYPE:
8848 int count = 0;
8849 int sub_count;
8850 tree field;
8852 /* Can't handle incomplete types nor sizes that are not
8853 fixed. */
8854 if (!COMPLETE_TYPE_P (type)
8855 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8856 return -1;
8858 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8860 if (TREE_CODE (field) != FIELD_DECL)
8861 continue;
8863 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8864 if (sub_count < 0)
8865 return -1;
8866 count += sub_count;
8869 /* There must be no padding. */
8870 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8871 return -1;
8873 return count;
8876 case UNION_TYPE:
8877 case QUAL_UNION_TYPE:
8879 /* These aren't very interesting except in a degenerate case. */
8880 int count = 0;
8881 int sub_count;
8882 tree field;
8884 /* Can't handle incomplete types nor sizes that are not
8885 fixed. */
8886 if (!COMPLETE_TYPE_P (type)
8887 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8888 return -1;
8890 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8892 if (TREE_CODE (field) != FIELD_DECL)
8893 continue;
8895 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8896 if (sub_count < 0)
8897 return -1;
8898 count = count > sub_count ? count : sub_count;
8901 /* There must be no padding. */
8902 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8903 return -1;
8905 return count;
8908 default:
8909 break;
8912 return -1;
8915 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
8916 float or vector aggregate that shall be passed in FP/vector registers
8917 according to the ELFv2 ABI, return the homogeneous element mode in
8918 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
8920 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
8922 static bool
8923 rs6000_discover_homogeneous_aggregate (enum machine_mode mode, const_tree type,
8924 enum machine_mode *elt_mode,
8925 int *n_elts)
8927 /* Note that we do not accept complex types at the top level as
8928 homogeneous aggregates; these types are handled via the
8929 targetm.calls.split_complex_arg mechanism. Complex types
8930 can be elements of homogeneous aggregates, however. */
8931 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
8933 enum machine_mode field_mode = VOIDmode;
8934 int field_count = rs6000_aggregate_candidate (type, &field_mode);
8936 if (field_count > 0)
8938 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
8939 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
8941 /* The ELFv2 ABI allows homogeneous aggregates to occupy
8942 up to AGGR_ARG_NUM_REG registers. */
8943 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
8945 if (elt_mode)
8946 *elt_mode = field_mode;
8947 if (n_elts)
8948 *n_elts = field_count;
8949 return true;
8954 if (elt_mode)
8955 *elt_mode = mode;
8956 if (n_elts)
8957 *n_elts = 1;
8958 return false;
8961 /* Return a nonzero value to say to return the function value in
8962 memory, just as large structures are always returned. TYPE will be
8963 the data type of the value, and FNTYPE will be the type of the
8964 function doing the returning, or @code{NULL} for libcalls.
8966 The AIX ABI for the RS/6000 specifies that all structures are
8967 returned in memory. The Darwin ABI does the same.
8969 For the Darwin 64 Bit ABI, a function result can be returned in
8970 registers or in memory, depending on the size of the return data
8971 type. If it is returned in registers, the value occupies the same
8972 registers as it would if it were the first and only function
8973 argument. Otherwise, the function places its result in memory at
8974 the location pointed to by GPR3.
8976 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
8977 but a draft put them in memory, and GCC used to implement the draft
8978 instead of the final standard. Therefore, aix_struct_return
8979 controls this instead of DEFAULT_ABI; V.4 targets needing backward
8980 compatibility can change DRAFT_V4_STRUCT_RET to override the
8981 default, and -m switches get the final word. See
8982 rs6000_option_override_internal for more details.
8984 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
8985 long double support is enabled. These values are returned in memory.
8987 int_size_in_bytes returns -1 for variable size objects, which go in
8988 memory always. The cast to unsigned makes -1 > 8. */
8990 static bool
8991 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
8993 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
8994 if (TARGET_MACHO
8995 && rs6000_darwin64_abi
8996 && TREE_CODE (type) == RECORD_TYPE
8997 && int_size_in_bytes (type) > 0)
8999 CUMULATIVE_ARGS valcum;
9000 rtx valret;
9002 valcum.words = 0;
9003 valcum.fregno = FP_ARG_MIN_REG;
9004 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9005 /* Do a trial code generation as if this were going to be passed
9006 as an argument; if any part goes in memory, we return NULL. */
9007 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9008 if (valret)
9009 return false;
9010 /* Otherwise fall through to more conventional ABI rules. */
9013 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9014 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9015 NULL, NULL))
9016 return false;
9018 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9019 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9020 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9021 return false;
9023 if (AGGREGATE_TYPE_P (type)
9024 && (aix_struct_return
9025 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9026 return true;
9028 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9029 modes only exist for GCC vector types if -maltivec. */
9030 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9031 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9032 return false;
9034 /* Return synthetic vectors in memory. */
9035 if (TREE_CODE (type) == VECTOR_TYPE
9036 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9038 static bool warned_for_return_big_vectors = false;
9039 if (!warned_for_return_big_vectors)
9041 warning (0, "GCC vector returned by reference: "
9042 "non-standard ABI extension with no compatibility guarantee");
9043 warned_for_return_big_vectors = true;
9045 return true;
9048 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9049 return true;
9051 return false;
9054 /* Specify whether values returned in registers should be at the most
9055 significant end of a register. We want aggregates returned by
9056 value to match the way aggregates are passed to functions. */
9058 static bool
9059 rs6000_return_in_msb (const_tree valtype)
9061 return (DEFAULT_ABI == ABI_ELFv2
9062 && BYTES_BIG_ENDIAN
9063 && AGGREGATE_TYPE_P (valtype)
9064 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9067 #ifdef HAVE_AS_GNU_ATTRIBUTE
9068 /* Return TRUE if a call to function FNDECL may be one that
9069 potentially affects the function calling ABI of the object file. */
9071 static bool
9072 call_ABI_of_interest (tree fndecl)
9074 if (symtab->state == EXPANSION)
9076 struct cgraph_node *c_node;
9078 /* Libcalls are always interesting. */
9079 if (fndecl == NULL_TREE)
9080 return true;
9082 /* Any call to an external function is interesting. */
9083 if (DECL_EXTERNAL (fndecl))
9084 return true;
9086 /* Interesting functions that we are emitting in this object file. */
9087 c_node = cgraph_node::get (fndecl);
9088 c_node = c_node->ultimate_alias_target ();
9089 return !c_node->only_called_directly_p ();
9091 return false;
9093 #endif
9095 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9096 for a call to a function whose data type is FNTYPE.
9097 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9099 For incoming args we set the number of arguments in the prototype large
9100 so we never return a PARALLEL. */
9102 void
9103 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9104 rtx libname ATTRIBUTE_UNUSED, int incoming,
9105 int libcall, int n_named_args,
9106 tree fndecl ATTRIBUTE_UNUSED,
9107 enum machine_mode return_mode ATTRIBUTE_UNUSED)
9109 static CUMULATIVE_ARGS zero_cumulative;
9111 *cum = zero_cumulative;
9112 cum->words = 0;
9113 cum->fregno = FP_ARG_MIN_REG;
9114 cum->vregno = ALTIVEC_ARG_MIN_REG;
9115 cum->prototype = (fntype && prototype_p (fntype));
9116 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9117 ? CALL_LIBCALL : CALL_NORMAL);
9118 cum->sysv_gregno = GP_ARG_MIN_REG;
9119 cum->stdarg = stdarg_p (fntype);
9121 cum->nargs_prototype = 0;
9122 if (incoming || cum->prototype)
9123 cum->nargs_prototype = n_named_args;
9125 /* Check for a longcall attribute. */
9126 if ((!fntype && rs6000_default_long_calls)
9127 || (fntype
9128 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9129 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9130 cum->call_cookie |= CALL_LONG;
9132 if (TARGET_DEBUG_ARG)
9134 fprintf (stderr, "\ninit_cumulative_args:");
9135 if (fntype)
9137 tree ret_type = TREE_TYPE (fntype);
9138 fprintf (stderr, " ret code = %s,",
9139 get_tree_code_name (TREE_CODE (ret_type)));
9142 if (cum->call_cookie & CALL_LONG)
9143 fprintf (stderr, " longcall,");
9145 fprintf (stderr, " proto = %d, nargs = %d\n",
9146 cum->prototype, cum->nargs_prototype);
9149 #ifdef HAVE_AS_GNU_ATTRIBUTE
9150 if (DEFAULT_ABI == ABI_V4)
9152 cum->escapes = call_ABI_of_interest (fndecl);
9153 if (cum->escapes)
9155 tree return_type;
9157 if (fntype)
9159 return_type = TREE_TYPE (fntype);
9160 return_mode = TYPE_MODE (return_type);
9162 else
9163 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9165 if (return_type != NULL)
9167 if (TREE_CODE (return_type) == RECORD_TYPE
9168 && TYPE_TRANSPARENT_AGGR (return_type))
9170 return_type = TREE_TYPE (first_field (return_type));
9171 return_mode = TYPE_MODE (return_type);
9173 if (AGGREGATE_TYPE_P (return_type)
9174 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9175 <= 8))
9176 rs6000_returns_struct = true;
9178 if (SCALAR_FLOAT_MODE_P (return_mode))
9179 rs6000_passes_float = true;
9180 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9181 || SPE_VECTOR_MODE (return_mode))
9182 rs6000_passes_vector = true;
9185 #endif
9187 if (fntype
9188 && !TARGET_ALTIVEC
9189 && TARGET_ALTIVEC_ABI
9190 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9192 error ("cannot return value in vector register because"
9193 " altivec instructions are disabled, use -maltivec"
9194 " to enable them");
9198 /* Return true if TYPE must be passed on the stack and not in registers. */
9200 static bool
9201 rs6000_must_pass_in_stack (enum machine_mode mode, const_tree type)
9203 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9204 return must_pass_in_stack_var_size (mode, type);
9205 else
9206 return must_pass_in_stack_var_size_or_pad (mode, type);
9209 /* If defined, a C expression which determines whether, and in which
9210 direction, to pad out an argument with extra space. The value
9211 should be of type `enum direction': either `upward' to pad above
9212 the argument, `downward' to pad below, or `none' to inhibit
9213 padding.
9215 For the AIX ABI structs are always stored left shifted in their
9216 argument slot. */
9218 enum direction
9219 function_arg_padding (enum machine_mode mode, const_tree type)
9221 #ifndef AGGREGATE_PADDING_FIXED
9222 #define AGGREGATE_PADDING_FIXED 0
9223 #endif
9224 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9225 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9226 #endif
9228 if (!AGGREGATE_PADDING_FIXED)
9230 /* GCC used to pass structures of the same size as integer types as
9231 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9232 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9233 passed padded downward, except that -mstrict-align further
9234 muddied the water in that multi-component structures of 2 and 4
9235 bytes in size were passed padded upward.
9237 The following arranges for best compatibility with previous
9238 versions of gcc, but removes the -mstrict-align dependency. */
9239 if (BYTES_BIG_ENDIAN)
9241 HOST_WIDE_INT size = 0;
9243 if (mode == BLKmode)
9245 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9246 size = int_size_in_bytes (type);
9248 else
9249 size = GET_MODE_SIZE (mode);
9251 if (size == 1 || size == 2 || size == 4)
9252 return downward;
9254 return upward;
9257 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9259 if (type != 0 && AGGREGATE_TYPE_P (type))
9260 return upward;
9263 /* Fall back to the default. */
9264 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9267 /* If defined, a C expression that gives the alignment boundary, in bits,
9268 of an argument with the specified mode and type. If it is not defined,
9269 PARM_BOUNDARY is used for all arguments.
9271 V.4 wants long longs and doubles to be double word aligned. Just
9272 testing the mode size is a boneheaded way to do this as it means
9273 that other types such as complex int are also double word aligned.
9274 However, we're stuck with this because changing the ABI might break
9275 existing library interfaces.
9277 Doubleword align SPE vectors.
9278 Quadword align Altivec/VSX vectors.
9279 Quadword align large synthetic vector types. */
9281 static unsigned int
9282 rs6000_function_arg_boundary (enum machine_mode mode, const_tree type)
9284 enum machine_mode elt_mode;
9285 int n_elts;
9287 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9289 if (DEFAULT_ABI == ABI_V4
9290 && (GET_MODE_SIZE (mode) == 8
9291 || (TARGET_HARD_FLOAT
9292 && TARGET_FPRS
9293 && (mode == TFmode || mode == TDmode))))
9294 return 64;
9295 else if (SPE_VECTOR_MODE (mode)
9296 || (type && TREE_CODE (type) == VECTOR_TYPE
9297 && int_size_in_bytes (type) >= 8
9298 && int_size_in_bytes (type) < 16))
9299 return 64;
9300 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9301 || (type && TREE_CODE (type) == VECTOR_TYPE
9302 && int_size_in_bytes (type) >= 16))
9303 return 128;
9305 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9306 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9307 -mcompat-align-parm is used. */
9308 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9309 || DEFAULT_ABI == ABI_ELFv2)
9310 && type && TYPE_ALIGN (type) > 64)
9312 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9313 or homogeneous float/vector aggregates here. We already handled
9314 vector aggregates above, but still need to check for float here. */
9315 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9316 && !SCALAR_FLOAT_MODE_P (elt_mode));
9318 /* We used to check for BLKmode instead of the above aggregate type
9319 check. Warn when this results in any difference to the ABI. */
9320 if (aggregate_p != (mode == BLKmode))
9322 static bool warned;
9323 if (!warned && warn_psabi)
9325 warned = true;
9326 inform (input_location,
9327 "the ABI of passing aggregates with %d-byte alignment"
9328 " has changed in GCC 5",
9329 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9333 if (aggregate_p)
9334 return 128;
9337 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9338 implement the "aggregate type" check as a BLKmode check here; this
9339 means certain aggregate types are in fact not aligned. */
9340 if (TARGET_MACHO && rs6000_darwin64_abi
9341 && mode == BLKmode
9342 && type && TYPE_ALIGN (type) > 64)
9343 return 128;
9345 return PARM_BOUNDARY;
9348 /* The offset in words to the start of the parameter save area. */
9350 static unsigned int
9351 rs6000_parm_offset (void)
9353 return (DEFAULT_ABI == ABI_V4 ? 2
9354 : DEFAULT_ABI == ABI_ELFv2 ? 4
9355 : 6);
9358 /* For a function parm of MODE and TYPE, return the starting word in
9359 the parameter area. NWORDS of the parameter area are already used. */
9361 static unsigned int
9362 rs6000_parm_start (enum machine_mode mode, const_tree type,
9363 unsigned int nwords)
9365 unsigned int align;
9367 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9368 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9371 /* Compute the size (in words) of a function argument. */
9373 static unsigned long
9374 rs6000_arg_size (enum machine_mode mode, const_tree type)
9376 unsigned long size;
9378 if (mode != BLKmode)
9379 size = GET_MODE_SIZE (mode);
9380 else
9381 size = int_size_in_bytes (type);
9383 if (TARGET_32BIT)
9384 return (size + 3) >> 2;
9385 else
9386 return (size + 7) >> 3;
9389 /* Use this to flush pending int fields. */
9391 static void
9392 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9393 HOST_WIDE_INT bitpos, int final)
9395 unsigned int startbit, endbit;
9396 int intregs, intoffset;
9397 enum machine_mode mode;
9399 /* Handle the situations where a float is taking up the first half
9400 of the GPR, and the other half is empty (typically due to
9401 alignment restrictions). We can detect this by a 8-byte-aligned
9402 int field, or by seeing that this is the final flush for this
9403 argument. Count the word and continue on. */
9404 if (cum->floats_in_gpr == 1
9405 && (cum->intoffset % 64 == 0
9406 || (cum->intoffset == -1 && final)))
9408 cum->words++;
9409 cum->floats_in_gpr = 0;
9412 if (cum->intoffset == -1)
9413 return;
9415 intoffset = cum->intoffset;
9416 cum->intoffset = -1;
9417 cum->floats_in_gpr = 0;
9419 if (intoffset % BITS_PER_WORD != 0)
9421 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9422 MODE_INT, 0);
9423 if (mode == BLKmode)
9425 /* We couldn't find an appropriate mode, which happens,
9426 e.g., in packed structs when there are 3 bytes to load.
9427 Back intoffset back to the beginning of the word in this
9428 case. */
9429 intoffset = intoffset & -BITS_PER_WORD;
9433 startbit = intoffset & -BITS_PER_WORD;
9434 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9435 intregs = (endbit - startbit) / BITS_PER_WORD;
9436 cum->words += intregs;
9437 /* words should be unsigned. */
9438 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9440 int pad = (endbit/BITS_PER_WORD) - cum->words;
9441 cum->words += pad;
9445 /* The darwin64 ABI calls for us to recurse down through structs,
9446 looking for elements passed in registers. Unfortunately, we have
9447 to track int register count here also because of misalignments
9448 in powerpc alignment mode. */
9450 static void
9451 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9452 const_tree type,
9453 HOST_WIDE_INT startbitpos)
9455 tree f;
9457 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9458 if (TREE_CODE (f) == FIELD_DECL)
9460 HOST_WIDE_INT bitpos = startbitpos;
9461 tree ftype = TREE_TYPE (f);
9462 enum machine_mode mode;
9463 if (ftype == error_mark_node)
9464 continue;
9465 mode = TYPE_MODE (ftype);
9467 if (DECL_SIZE (f) != 0
9468 && tree_fits_uhwi_p (bit_position (f)))
9469 bitpos += int_bit_position (f);
9471 /* ??? FIXME: else assume zero offset. */
9473 if (TREE_CODE (ftype) == RECORD_TYPE)
9474 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9475 else if (USE_FP_FOR_ARG_P (cum, mode))
9477 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9478 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9479 cum->fregno += n_fpregs;
9480 /* Single-precision floats present a special problem for
9481 us, because they are smaller than an 8-byte GPR, and so
9482 the structure-packing rules combined with the standard
9483 varargs behavior mean that we want to pack float/float
9484 and float/int combinations into a single register's
9485 space. This is complicated by the arg advance flushing,
9486 which works on arbitrarily large groups of int-type
9487 fields. */
9488 if (mode == SFmode)
9490 if (cum->floats_in_gpr == 1)
9492 /* Two floats in a word; count the word and reset
9493 the float count. */
9494 cum->words++;
9495 cum->floats_in_gpr = 0;
9497 else if (bitpos % 64 == 0)
9499 /* A float at the beginning of an 8-byte word;
9500 count it and put off adjusting cum->words until
9501 we see if a arg advance flush is going to do it
9502 for us. */
9503 cum->floats_in_gpr++;
9505 else
9507 /* The float is at the end of a word, preceded
9508 by integer fields, so the arg advance flush
9509 just above has already set cum->words and
9510 everything is taken care of. */
9513 else
9514 cum->words += n_fpregs;
9516 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9518 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9519 cum->vregno++;
9520 cum->words += 2;
9522 else if (cum->intoffset == -1)
9523 cum->intoffset = bitpos;
9527 /* Check for an item that needs to be considered specially under the darwin 64
9528 bit ABI. These are record types where the mode is BLK or the structure is
9529 8 bytes in size. */
9530 static int
9531 rs6000_darwin64_struct_check_p (enum machine_mode mode, const_tree type)
9533 return rs6000_darwin64_abi
9534 && ((mode == BLKmode
9535 && TREE_CODE (type) == RECORD_TYPE
9536 && int_size_in_bytes (type) > 0)
9537 || (type && TREE_CODE (type) == RECORD_TYPE
9538 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9541 /* Update the data in CUM to advance over an argument
9542 of mode MODE and data type TYPE.
9543 (TYPE is null for libcalls where that information may not be available.)
9545 Note that for args passed by reference, function_arg will be called
9546 with MODE and TYPE set to that of the pointer to the arg, not the arg
9547 itself. */
9549 static void
9550 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
9551 const_tree type, bool named, int depth)
9553 enum machine_mode elt_mode;
9554 int n_elts;
9556 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9558 /* Only tick off an argument if we're not recursing. */
9559 if (depth == 0)
9560 cum->nargs_prototype--;
9562 #ifdef HAVE_AS_GNU_ATTRIBUTE
9563 if (DEFAULT_ABI == ABI_V4
9564 && cum->escapes)
9566 if (SCALAR_FLOAT_MODE_P (mode))
9567 rs6000_passes_float = true;
9568 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9569 rs6000_passes_vector = true;
9570 else if (SPE_VECTOR_MODE (mode)
9571 && !cum->stdarg
9572 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9573 rs6000_passes_vector = true;
9575 #endif
9577 if (TARGET_ALTIVEC_ABI
9578 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9579 || (type && TREE_CODE (type) == VECTOR_TYPE
9580 && int_size_in_bytes (type) == 16)))
9582 bool stack = false;
9584 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9586 cum->vregno += n_elts;
9588 if (!TARGET_ALTIVEC)
9589 error ("cannot pass argument in vector register because"
9590 " altivec instructions are disabled, use -maltivec"
9591 " to enable them");
9593 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9594 even if it is going to be passed in a vector register.
9595 Darwin does the same for variable-argument functions. */
9596 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9597 && TARGET_64BIT)
9598 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9599 stack = true;
9601 else
9602 stack = true;
9604 if (stack)
9606 int align;
9608 /* Vector parameters must be 16-byte aligned. In 32-bit
9609 mode this means we need to take into account the offset
9610 to the parameter save area. In 64-bit mode, they just
9611 have to start on an even word, since the parameter save
9612 area is 16-byte aligned. */
9613 if (TARGET_32BIT)
9614 align = -(rs6000_parm_offset () + cum->words) & 3;
9615 else
9616 align = cum->words & 1;
9617 cum->words += align + rs6000_arg_size (mode, type);
9619 if (TARGET_DEBUG_ARG)
9621 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9622 cum->words, align);
9623 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9624 cum->nargs_prototype, cum->prototype,
9625 GET_MODE_NAME (mode));
9629 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9630 && !cum->stdarg
9631 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9632 cum->sysv_gregno++;
9634 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9636 int size = int_size_in_bytes (type);
9637 /* Variable sized types have size == -1 and are
9638 treated as if consisting entirely of ints.
9639 Pad to 16 byte boundary if needed. */
9640 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9641 && (cum->words % 2) != 0)
9642 cum->words++;
9643 /* For varargs, we can just go up by the size of the struct. */
9644 if (!named)
9645 cum->words += (size + 7) / 8;
9646 else
9648 /* It is tempting to say int register count just goes up by
9649 sizeof(type)/8, but this is wrong in a case such as
9650 { int; double; int; } [powerpc alignment]. We have to
9651 grovel through the fields for these too. */
9652 cum->intoffset = 0;
9653 cum->floats_in_gpr = 0;
9654 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9655 rs6000_darwin64_record_arg_advance_flush (cum,
9656 size * BITS_PER_UNIT, 1);
9658 if (TARGET_DEBUG_ARG)
9660 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9661 cum->words, TYPE_ALIGN (type), size);
9662 fprintf (stderr,
9663 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9664 cum->nargs_prototype, cum->prototype,
9665 GET_MODE_NAME (mode));
9668 else if (DEFAULT_ABI == ABI_V4)
9670 if (TARGET_HARD_FLOAT && TARGET_FPRS
9671 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9672 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9673 || (mode == TFmode && !TARGET_IEEEQUAD)
9674 || mode == SDmode || mode == DDmode || mode == TDmode))
9676 /* _Decimal128 must use an even/odd register pair. This assumes
9677 that the register number is odd when fregno is odd. */
9678 if (mode == TDmode && (cum->fregno % 2) == 1)
9679 cum->fregno++;
9681 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9682 <= FP_ARG_V4_MAX_REG)
9683 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9684 else
9686 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9687 if (mode == DFmode || mode == TFmode
9688 || mode == DDmode || mode == TDmode)
9689 cum->words += cum->words & 1;
9690 cum->words += rs6000_arg_size (mode, type);
9693 else
9695 int n_words = rs6000_arg_size (mode, type);
9696 int gregno = cum->sysv_gregno;
9698 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9699 (r7,r8) or (r9,r10). As does any other 2 word item such
9700 as complex int due to a historical mistake. */
9701 if (n_words == 2)
9702 gregno += (1 - gregno) & 1;
9704 /* Multi-reg args are not split between registers and stack. */
9705 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9707 /* Long long and SPE vectors are aligned on the stack.
9708 So are other 2 word items such as complex int due to
9709 a historical mistake. */
9710 if (n_words == 2)
9711 cum->words += cum->words & 1;
9712 cum->words += n_words;
9715 /* Note: continuing to accumulate gregno past when we've started
9716 spilling to the stack indicates the fact that we've started
9717 spilling to the stack to expand_builtin_saveregs. */
9718 cum->sysv_gregno = gregno + n_words;
9721 if (TARGET_DEBUG_ARG)
9723 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9724 cum->words, cum->fregno);
9725 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9726 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9727 fprintf (stderr, "mode = %4s, named = %d\n",
9728 GET_MODE_NAME (mode), named);
9731 else
9733 int n_words = rs6000_arg_size (mode, type);
9734 int start_words = cum->words;
9735 int align_words = rs6000_parm_start (mode, type, start_words);
9737 cum->words = align_words + n_words;
9739 if (SCALAR_FLOAT_MODE_P (elt_mode)
9740 && TARGET_HARD_FLOAT && TARGET_FPRS)
9742 /* _Decimal128 must be passed in an even/odd float register pair.
9743 This assumes that the register number is odd when fregno is
9744 odd. */
9745 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9746 cum->fregno++;
9747 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9750 if (TARGET_DEBUG_ARG)
9752 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9753 cum->words, cum->fregno);
9754 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9755 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9756 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9757 named, align_words - start_words, depth);
9762 static void
9763 rs6000_function_arg_advance (cumulative_args_t cum, enum machine_mode mode,
9764 const_tree type, bool named)
9766 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9770 static rtx
9771 spe_build_register_parallel (enum machine_mode mode, int gregno)
9773 rtx r1, r3, r5, r7;
9775 switch (mode)
9777 case DFmode:
9778 r1 = gen_rtx_REG (DImode, gregno);
9779 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9780 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9782 case DCmode:
9783 case TFmode:
9784 r1 = gen_rtx_REG (DImode, gregno);
9785 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9786 r3 = gen_rtx_REG (DImode, gregno + 2);
9787 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9788 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9790 case TCmode:
9791 r1 = gen_rtx_REG (DImode, gregno);
9792 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9793 r3 = gen_rtx_REG (DImode, gregno + 2);
9794 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9795 r5 = gen_rtx_REG (DImode, gregno + 4);
9796 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9797 r7 = gen_rtx_REG (DImode, gregno + 6);
9798 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9799 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9801 default:
9802 gcc_unreachable ();
9806 /* Determine where to put a SIMD argument on the SPE. */
9807 static rtx
9808 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, enum machine_mode mode,
9809 const_tree type)
9811 int gregno = cum->sysv_gregno;
9813 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9814 are passed and returned in a pair of GPRs for ABI compatibility. */
9815 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9816 || mode == DCmode || mode == TCmode))
9818 int n_words = rs6000_arg_size (mode, type);
9820 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9821 if (mode == DFmode)
9822 gregno += (1 - gregno) & 1;
9824 /* Multi-reg args are not split between registers and stack. */
9825 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9826 return NULL_RTX;
9828 return spe_build_register_parallel (mode, gregno);
9830 if (cum->stdarg)
9832 int n_words = rs6000_arg_size (mode, type);
9834 /* SPE vectors are put in odd registers. */
9835 if (n_words == 2 && (gregno & 1) == 0)
9836 gregno += 1;
9838 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
9840 rtx r1, r2;
9841 enum machine_mode m = SImode;
9843 r1 = gen_rtx_REG (m, gregno);
9844 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
9845 r2 = gen_rtx_REG (m, gregno + 1);
9846 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
9847 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
9849 else
9850 return NULL_RTX;
9852 else
9854 if (gregno <= GP_ARG_MAX_REG)
9855 return gen_rtx_REG (mode, gregno);
9856 else
9857 return NULL_RTX;
9861 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
9862 structure between cum->intoffset and bitpos to integer registers. */
9864 static void
9865 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
9866 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
9868 enum machine_mode mode;
9869 unsigned int regno;
9870 unsigned int startbit, endbit;
9871 int this_regno, intregs, intoffset;
9872 rtx reg;
9874 if (cum->intoffset == -1)
9875 return;
9877 intoffset = cum->intoffset;
9878 cum->intoffset = -1;
9880 /* If this is the trailing part of a word, try to only load that
9881 much into the register. Otherwise load the whole register. Note
9882 that in the latter case we may pick up unwanted bits. It's not a
9883 problem at the moment but may wish to revisit. */
9885 if (intoffset % BITS_PER_WORD != 0)
9887 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9888 MODE_INT, 0);
9889 if (mode == BLKmode)
9891 /* We couldn't find an appropriate mode, which happens,
9892 e.g., in packed structs when there are 3 bytes to load.
9893 Back intoffset back to the beginning of the word in this
9894 case. */
9895 intoffset = intoffset & -BITS_PER_WORD;
9896 mode = word_mode;
9899 else
9900 mode = word_mode;
9902 startbit = intoffset & -BITS_PER_WORD;
9903 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9904 intregs = (endbit - startbit) / BITS_PER_WORD;
9905 this_regno = cum->words + intoffset / BITS_PER_WORD;
9907 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
9908 cum->use_stack = 1;
9910 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
9911 if (intregs <= 0)
9912 return;
9914 intoffset /= BITS_PER_UNIT;
9917 regno = GP_ARG_MIN_REG + this_regno;
9918 reg = gen_rtx_REG (mode, regno);
9919 rvec[(*k)++] =
9920 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
9922 this_regno += 1;
9923 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
9924 mode = word_mode;
9925 intregs -= 1;
9927 while (intregs > 0);
9930 /* Recursive workhorse for the following. */
9932 static void
9933 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
9934 HOST_WIDE_INT startbitpos, rtx rvec[],
9935 int *k)
9937 tree f;
9939 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9940 if (TREE_CODE (f) == FIELD_DECL)
9942 HOST_WIDE_INT bitpos = startbitpos;
9943 tree ftype = TREE_TYPE (f);
9944 enum machine_mode mode;
9945 if (ftype == error_mark_node)
9946 continue;
9947 mode = TYPE_MODE (ftype);
9949 if (DECL_SIZE (f) != 0
9950 && tree_fits_uhwi_p (bit_position (f)))
9951 bitpos += int_bit_position (f);
9953 /* ??? FIXME: else assume zero offset. */
9955 if (TREE_CODE (ftype) == RECORD_TYPE)
9956 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
9957 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
9959 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
9960 #if 0
9961 switch (mode)
9963 case SCmode: mode = SFmode; break;
9964 case DCmode: mode = DFmode; break;
9965 case TCmode: mode = TFmode; break;
9966 default: break;
9968 #endif
9969 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
9970 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
9972 gcc_assert (cum->fregno == FP_ARG_MAX_REG
9973 && (mode == TFmode || mode == TDmode));
9974 /* Long double or _Decimal128 split over regs and memory. */
9975 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
9976 cum->use_stack=1;
9978 rvec[(*k)++]
9979 = gen_rtx_EXPR_LIST (VOIDmode,
9980 gen_rtx_REG (mode, cum->fregno++),
9981 GEN_INT (bitpos / BITS_PER_UNIT));
9982 if (mode == TFmode || mode == TDmode)
9983 cum->fregno++;
9985 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9987 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
9988 rvec[(*k)++]
9989 = gen_rtx_EXPR_LIST (VOIDmode,
9990 gen_rtx_REG (mode, cum->vregno++),
9991 GEN_INT (bitpos / BITS_PER_UNIT));
9993 else if (cum->intoffset == -1)
9994 cum->intoffset = bitpos;
9998 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
9999 the register(s) to be used for each field and subfield of a struct
10000 being passed by value, along with the offset of where the
10001 register's value may be found in the block. FP fields go in FP
10002 register, vector fields go in vector registers, and everything
10003 else goes in int registers, packed as in memory.
10005 This code is also used for function return values. RETVAL indicates
10006 whether this is the case.
10008 Much of this is taken from the SPARC V9 port, which has a similar
10009 calling convention. */
10011 static rtx
10012 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10013 bool named, bool retval)
10015 rtx rvec[FIRST_PSEUDO_REGISTER];
10016 int k = 1, kbase = 1;
10017 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10018 /* This is a copy; modifications are not visible to our caller. */
10019 CUMULATIVE_ARGS copy_cum = *orig_cum;
10020 CUMULATIVE_ARGS *cum = &copy_cum;
10022 /* Pad to 16 byte boundary if needed. */
10023 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10024 && (cum->words % 2) != 0)
10025 cum->words++;
10027 cum->intoffset = 0;
10028 cum->use_stack = 0;
10029 cum->named = named;
10031 /* Put entries into rvec[] for individual FP and vector fields, and
10032 for the chunks of memory that go in int regs. Note we start at
10033 element 1; 0 is reserved for an indication of using memory, and
10034 may or may not be filled in below. */
10035 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10036 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10038 /* If any part of the struct went on the stack put all of it there.
10039 This hack is because the generic code for
10040 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10041 parts of the struct are not at the beginning. */
10042 if (cum->use_stack)
10044 if (retval)
10045 return NULL_RTX; /* doesn't go in registers at all */
10046 kbase = 0;
10047 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10049 if (k > 1 || cum->use_stack)
10050 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10051 else
10052 return NULL_RTX;
10055 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10057 static rtx
10058 rs6000_mixed_function_arg (enum machine_mode mode, const_tree type,
10059 int align_words)
10061 int n_units;
10062 int i, k;
10063 rtx rvec[GP_ARG_NUM_REG + 1];
10065 if (align_words >= GP_ARG_NUM_REG)
10066 return NULL_RTX;
10068 n_units = rs6000_arg_size (mode, type);
10070 /* Optimize the simple case where the arg fits in one gpr, except in
10071 the case of BLKmode due to assign_parms assuming that registers are
10072 BITS_PER_WORD wide. */
10073 if (n_units == 0
10074 || (n_units == 1 && mode != BLKmode))
10075 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10077 k = 0;
10078 if (align_words + n_units > GP_ARG_NUM_REG)
10079 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10080 using a magic NULL_RTX component.
10081 This is not strictly correct. Only some of the arg belongs in
10082 memory, not all of it. However, the normal scheme using
10083 function_arg_partial_nregs can result in unusual subregs, eg.
10084 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10085 store the whole arg to memory is often more efficient than code
10086 to store pieces, and we know that space is available in the right
10087 place for the whole arg. */
10088 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10090 i = 0;
10093 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10094 rtx off = GEN_INT (i++ * 4);
10095 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10097 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10099 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10102 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10103 but must also be copied into the parameter save area starting at
10104 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10105 to the GPRs and/or memory. Return the number of elements used. */
10107 static int
10108 rs6000_psave_function_arg (enum machine_mode mode, const_tree type,
10109 int align_words, rtx *rvec)
10111 int k = 0;
10113 if (align_words < GP_ARG_NUM_REG)
10115 int n_words = rs6000_arg_size (mode, type);
10117 if (align_words + n_words > GP_ARG_NUM_REG
10118 || mode == BLKmode
10119 || (TARGET_32BIT && TARGET_POWERPC64))
10121 /* If this is partially on the stack, then we only
10122 include the portion actually in registers here. */
10123 enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10124 int i = 0;
10126 if (align_words + n_words > GP_ARG_NUM_REG)
10128 /* Not all of the arg fits in gprs. Say that it goes in memory
10129 too, using a magic NULL_RTX component. Also see comment in
10130 rs6000_mixed_function_arg for why the normal
10131 function_arg_partial_nregs scheme doesn't work in this case. */
10132 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10137 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10138 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10139 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10141 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10143 else
10145 /* The whole arg fits in gprs. */
10146 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10147 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10150 else
10152 /* It's entirely in memory. */
10153 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10156 return k;
10159 /* RVEC is a vector of K components of an argument of mode MODE.
10160 Construct the final function_arg return value from it. */
10162 static rtx
10163 rs6000_finish_function_arg (enum machine_mode mode, rtx *rvec, int k)
10165 gcc_assert (k >= 1);
10167 /* Avoid returning a PARALLEL in the trivial cases. */
10168 if (k == 1)
10170 if (XEXP (rvec[0], 0) == NULL_RTX)
10171 return NULL_RTX;
10173 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10174 return XEXP (rvec[0], 0);
10177 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10180 /* Determine where to put an argument to a function.
10181 Value is zero to push the argument on the stack,
10182 or a hard register in which to store the argument.
10184 MODE is the argument's machine mode.
10185 TYPE is the data type of the argument (as a tree).
10186 This is null for libcalls where that information may
10187 not be available.
10188 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10189 the preceding args and about the function being called. It is
10190 not modified in this routine.
10191 NAMED is nonzero if this argument is a named parameter
10192 (otherwise it is an extra parameter matching an ellipsis).
10194 On RS/6000 the first eight words of non-FP are normally in registers
10195 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10196 Under V.4, the first 8 FP args are in registers.
10198 If this is floating-point and no prototype is specified, we use
10199 both an FP and integer register (or possibly FP reg and stack). Library
10200 functions (when CALL_LIBCALL is set) always have the proper types for args,
10201 so we can pass the FP value just in one register. emit_library_function
10202 doesn't support PARALLEL anyway.
10204 Note that for args passed by reference, function_arg will be called
10205 with MODE and TYPE set to that of the pointer to the arg, not the arg
10206 itself. */
10208 static rtx
10209 rs6000_function_arg (cumulative_args_t cum_v, enum machine_mode mode,
10210 const_tree type, bool named)
10212 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10213 enum rs6000_abi abi = DEFAULT_ABI;
10214 enum machine_mode elt_mode;
10215 int n_elts;
10217 /* Return a marker to indicate whether CR1 needs to set or clear the
10218 bit that V.4 uses to say fp args were passed in registers.
10219 Assume that we don't need the marker for software floating point,
10220 or compiler generated library calls. */
10221 if (mode == VOIDmode)
10223 if (abi == ABI_V4
10224 && (cum->call_cookie & CALL_LIBCALL) == 0
10225 && (cum->stdarg
10226 || (cum->nargs_prototype < 0
10227 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10229 /* For the SPE, we need to crxor CR6 always. */
10230 if (TARGET_SPE_ABI)
10231 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10232 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10233 return GEN_INT (cum->call_cookie
10234 | ((cum->fregno == FP_ARG_MIN_REG)
10235 ? CALL_V4_SET_FP_ARGS
10236 : CALL_V4_CLEAR_FP_ARGS));
10239 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10242 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10244 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10246 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10247 if (rslt != NULL_RTX)
10248 return rslt;
10249 /* Else fall through to usual handling. */
10252 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10254 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10255 rtx r, off;
10256 int i, k = 0;
10258 /* Do we also need to pass this argument in the parameter
10259 save area? */
10260 if (TARGET_64BIT && ! cum->prototype)
10262 int align_words = (cum->words + 1) & ~1;
10263 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10266 /* Describe where this argument goes in the vector registers. */
10267 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10269 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10270 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10271 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10274 return rs6000_finish_function_arg (mode, rvec, k);
10276 else if (TARGET_ALTIVEC_ABI
10277 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10278 || (type && TREE_CODE (type) == VECTOR_TYPE
10279 && int_size_in_bytes (type) == 16)))
10281 if (named || abi == ABI_V4)
10282 return NULL_RTX;
10283 else
10285 /* Vector parameters to varargs functions under AIX or Darwin
10286 get passed in memory and possibly also in GPRs. */
10287 int align, align_words, n_words;
10288 enum machine_mode part_mode;
10290 /* Vector parameters must be 16-byte aligned. In 32-bit
10291 mode this means we need to take into account the offset
10292 to the parameter save area. In 64-bit mode, they just
10293 have to start on an even word, since the parameter save
10294 area is 16-byte aligned. */
10295 if (TARGET_32BIT)
10296 align = -(rs6000_parm_offset () + cum->words) & 3;
10297 else
10298 align = cum->words & 1;
10299 align_words = cum->words + align;
10301 /* Out of registers? Memory, then. */
10302 if (align_words >= GP_ARG_NUM_REG)
10303 return NULL_RTX;
10305 if (TARGET_32BIT && TARGET_POWERPC64)
10306 return rs6000_mixed_function_arg (mode, type, align_words);
10308 /* The vector value goes in GPRs. Only the part of the
10309 value in GPRs is reported here. */
10310 part_mode = mode;
10311 n_words = rs6000_arg_size (mode, type);
10312 if (align_words + n_words > GP_ARG_NUM_REG)
10313 /* Fortunately, there are only two possibilities, the value
10314 is either wholly in GPRs or half in GPRs and half not. */
10315 part_mode = DImode;
10317 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10320 else if (TARGET_SPE_ABI && TARGET_SPE
10321 && (SPE_VECTOR_MODE (mode)
10322 || (TARGET_E500_DOUBLE && (mode == DFmode
10323 || mode == DCmode
10324 || mode == TFmode
10325 || mode == TCmode))))
10326 return rs6000_spe_function_arg (cum, mode, type);
10328 else if (abi == ABI_V4)
10330 if (TARGET_HARD_FLOAT && TARGET_FPRS
10331 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10332 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10333 || (mode == TFmode && !TARGET_IEEEQUAD)
10334 || mode == SDmode || mode == DDmode || mode == TDmode))
10336 /* _Decimal128 must use an even/odd register pair. This assumes
10337 that the register number is odd when fregno is odd. */
10338 if (mode == TDmode && (cum->fregno % 2) == 1)
10339 cum->fregno++;
10341 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10342 <= FP_ARG_V4_MAX_REG)
10343 return gen_rtx_REG (mode, cum->fregno);
10344 else
10345 return NULL_RTX;
10347 else
10349 int n_words = rs6000_arg_size (mode, type);
10350 int gregno = cum->sysv_gregno;
10352 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10353 (r7,r8) or (r9,r10). As does any other 2 word item such
10354 as complex int due to a historical mistake. */
10355 if (n_words == 2)
10356 gregno += (1 - gregno) & 1;
10358 /* Multi-reg args are not split between registers and stack. */
10359 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10360 return NULL_RTX;
10362 if (TARGET_32BIT && TARGET_POWERPC64)
10363 return rs6000_mixed_function_arg (mode, type,
10364 gregno - GP_ARG_MIN_REG);
10365 return gen_rtx_REG (mode, gregno);
10368 else
10370 int align_words = rs6000_parm_start (mode, type, cum->words);
10372 /* _Decimal128 must be passed in an even/odd float register pair.
10373 This assumes that the register number is odd when fregno is odd. */
10374 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10375 cum->fregno++;
10377 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10379 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10380 rtx r, off;
10381 int i, k = 0;
10382 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10383 int fpr_words;
10385 /* Do we also need to pass this argument in the parameter
10386 save area? */
10387 if (type && (cum->nargs_prototype <= 0
10388 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10389 && TARGET_XL_COMPAT
10390 && align_words >= GP_ARG_NUM_REG)))
10391 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10393 /* Describe where this argument goes in the fprs. */
10394 for (i = 0; i < n_elts
10395 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10397 /* Check if the argument is split over registers and memory.
10398 This can only ever happen for long double or _Decimal128;
10399 complex types are handled via split_complex_arg. */
10400 enum machine_mode fmode = elt_mode;
10401 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10403 gcc_assert (fmode == TFmode || fmode == TDmode);
10404 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10407 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10408 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10409 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10412 /* If there were not enough FPRs to hold the argument, the rest
10413 usually goes into memory. However, if the current position
10414 is still within the register parameter area, a portion may
10415 actually have to go into GPRs.
10417 Note that it may happen that the portion of the argument
10418 passed in the first "half" of the first GPR was already
10419 passed in the last FPR as well.
10421 For unnamed arguments, we already set up GPRs to cover the
10422 whole argument in rs6000_psave_function_arg, so there is
10423 nothing further to do at this point. */
10424 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10425 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10426 && cum->nargs_prototype > 0)
10428 static bool warned;
10430 enum machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10431 int n_words = rs6000_arg_size (mode, type);
10433 align_words += fpr_words;
10434 n_words -= fpr_words;
10438 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10439 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10440 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10442 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10444 if (!warned && warn_psabi)
10446 warned = true;
10447 inform (input_location,
10448 "the ABI of passing homogeneous float aggregates"
10449 " has changed in GCC 5");
10453 return rs6000_finish_function_arg (mode, rvec, k);
10455 else if (align_words < GP_ARG_NUM_REG)
10457 if (TARGET_32BIT && TARGET_POWERPC64)
10458 return rs6000_mixed_function_arg (mode, type, align_words);
10460 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10462 else
10463 return NULL_RTX;
10467 /* For an arg passed partly in registers and partly in memory, this is
10468 the number of bytes passed in registers. For args passed entirely in
10469 registers or entirely in memory, zero. When an arg is described by a
10470 PARALLEL, perhaps using more than one register type, this function
10471 returns the number of bytes used by the first element of the PARALLEL. */
10473 static int
10474 rs6000_arg_partial_bytes (cumulative_args_t cum_v, enum machine_mode mode,
10475 tree type, bool named)
10477 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10478 bool passed_in_gprs = true;
10479 int ret = 0;
10480 int align_words;
10481 enum machine_mode elt_mode;
10482 int n_elts;
10484 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10486 if (DEFAULT_ABI == ABI_V4)
10487 return 0;
10489 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10491 /* If we are passing this arg in the fixed parameter save area
10492 (gprs or memory) as well as VRs, we do not use the partial
10493 bytes mechanism; instead, rs6000_function_arg will return a
10494 PARALLEL including a memory element as necessary. */
10495 if (TARGET_64BIT && ! cum->prototype)
10496 return 0;
10498 /* Otherwise, we pass in VRs only. Check for partial copies. */
10499 passed_in_gprs = false;
10500 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10501 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10504 /* In this complicated case we just disable the partial_nregs code. */
10505 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10506 return 0;
10508 align_words = rs6000_parm_start (mode, type, cum->words);
10510 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10512 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10514 /* If we are passing this arg in the fixed parameter save area
10515 (gprs or memory) as well as FPRs, we do not use the partial
10516 bytes mechanism; instead, rs6000_function_arg will return a
10517 PARALLEL including a memory element as necessary. */
10518 if (type
10519 && (cum->nargs_prototype <= 0
10520 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10521 && TARGET_XL_COMPAT
10522 && align_words >= GP_ARG_NUM_REG)))
10523 return 0;
10525 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10526 passed_in_gprs = false;
10527 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10529 /* Compute number of bytes / words passed in FPRs. If there
10530 is still space available in the register parameter area
10531 *after* that amount, a part of the argument will be passed
10532 in GPRs. In that case, the total amount passed in any
10533 registers is equal to the amount that would have been passed
10534 in GPRs if everything were passed there, so we fall back to
10535 the GPR code below to compute the appropriate value. */
10536 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10537 * MIN (8, GET_MODE_SIZE (elt_mode)));
10538 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10540 if (align_words + fpr_words < GP_ARG_NUM_REG)
10541 passed_in_gprs = true;
10542 else
10543 ret = fpr;
10547 if (passed_in_gprs
10548 && align_words < GP_ARG_NUM_REG
10549 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10550 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10552 if (ret != 0 && TARGET_DEBUG_ARG)
10553 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10555 return ret;
10558 /* A C expression that indicates when an argument must be passed by
10559 reference. If nonzero for an argument, a copy of that argument is
10560 made in memory and a pointer to the argument is passed instead of
10561 the argument itself. The pointer is passed in whatever way is
10562 appropriate for passing a pointer to that type.
10564 Under V.4, aggregates and long double are passed by reference.
10566 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10567 reference unless the AltiVec vector extension ABI is in force.
10569 As an extension to all ABIs, variable sized types are passed by
10570 reference. */
10572 static bool
10573 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10574 enum machine_mode mode, const_tree type,
10575 bool named ATTRIBUTE_UNUSED)
10577 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10579 if (TARGET_DEBUG_ARG)
10580 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10581 return 1;
10584 if (!type)
10585 return 0;
10587 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10589 if (TARGET_DEBUG_ARG)
10590 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10591 return 1;
10594 if (int_size_in_bytes (type) < 0)
10596 if (TARGET_DEBUG_ARG)
10597 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10598 return 1;
10601 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10602 modes only exist for GCC vector types if -maltivec. */
10603 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10605 if (TARGET_DEBUG_ARG)
10606 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10607 return 1;
10610 /* Pass synthetic vectors in memory. */
10611 if (TREE_CODE (type) == VECTOR_TYPE
10612 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10614 static bool warned_for_pass_big_vectors = false;
10615 if (TARGET_DEBUG_ARG)
10616 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10617 if (!warned_for_pass_big_vectors)
10619 warning (0, "GCC vector passed by reference: "
10620 "non-standard ABI extension with no compatibility guarantee");
10621 warned_for_pass_big_vectors = true;
10623 return 1;
10626 return 0;
10629 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10630 already processes. Return true if the parameter must be passed
10631 (fully or partially) on the stack. */
10633 static bool
10634 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10636 enum machine_mode mode;
10637 int unsignedp;
10638 rtx entry_parm;
10640 /* Catch errors. */
10641 if (type == NULL || type == error_mark_node)
10642 return true;
10644 /* Handle types with no storage requirement. */
10645 if (TYPE_MODE (type) == VOIDmode)
10646 return false;
10648 /* Handle complex types. */
10649 if (TREE_CODE (type) == COMPLEX_TYPE)
10650 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10651 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10653 /* Handle transparent aggregates. */
10654 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10655 && TYPE_TRANSPARENT_AGGR (type))
10656 type = TREE_TYPE (first_field (type));
10658 /* See if this arg was passed by invisible reference. */
10659 if (pass_by_reference (get_cumulative_args (args_so_far),
10660 TYPE_MODE (type), type, true))
10661 type = build_pointer_type (type);
10663 /* Find mode as it is passed by the ABI. */
10664 unsignedp = TYPE_UNSIGNED (type);
10665 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10667 /* If we must pass in stack, we need a stack. */
10668 if (rs6000_must_pass_in_stack (mode, type))
10669 return true;
10671 /* If there is no incoming register, we need a stack. */
10672 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10673 if (entry_parm == NULL)
10674 return true;
10676 /* Likewise if we need to pass both in registers and on the stack. */
10677 if (GET_CODE (entry_parm) == PARALLEL
10678 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10679 return true;
10681 /* Also true if we're partially in registers and partially not. */
10682 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10683 return true;
10685 /* Update info on where next arg arrives in registers. */
10686 rs6000_function_arg_advance (args_so_far, mode, type, true);
10687 return false;
10690 /* Return true if FUN has no prototype, has a variable argument
10691 list, or passes any parameter in memory. */
10693 static bool
10694 rs6000_function_parms_need_stack (tree fun, bool incoming)
10696 tree fntype, result;
10697 CUMULATIVE_ARGS args_so_far_v;
10698 cumulative_args_t args_so_far;
10700 if (!fun)
10701 /* Must be a libcall, all of which only use reg parms. */
10702 return false;
10704 fntype = fun;
10705 if (!TYPE_P (fun))
10706 fntype = TREE_TYPE (fun);
10708 /* Varargs functions need the parameter save area. */
10709 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10710 return true;
10712 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10713 args_so_far = pack_cumulative_args (&args_so_far_v);
10715 /* When incoming, we will have been passed the function decl.
10716 It is necessary to use the decl to handle K&R style functions,
10717 where TYPE_ARG_TYPES may not be available. */
10718 if (incoming)
10720 gcc_assert (DECL_P (fun));
10721 result = DECL_RESULT (fun);
10723 else
10724 result = TREE_TYPE (fntype);
10726 if (result && aggregate_value_p (result, fntype))
10728 if (!TYPE_P (result))
10729 result = TREE_TYPE (result);
10730 result = build_pointer_type (result);
10731 rs6000_parm_needs_stack (args_so_far, result);
10734 if (incoming)
10736 tree parm;
10738 for (parm = DECL_ARGUMENTS (fun);
10739 parm && parm != void_list_node;
10740 parm = TREE_CHAIN (parm))
10741 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10742 return true;
10744 else
10746 function_args_iterator args_iter;
10747 tree arg_type;
10749 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10750 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10751 return true;
10754 return false;
10757 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10758 usually a constant depending on the ABI. However, in the ELFv2 ABI
10759 the register parameter area is optional when calling a function that
10760 has a prototype is scope, has no variable argument list, and passes
10761 all parameters in registers. */
10764 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10766 int reg_parm_stack_space;
10768 switch (DEFAULT_ABI)
10770 default:
10771 reg_parm_stack_space = 0;
10772 break;
10774 case ABI_AIX:
10775 case ABI_DARWIN:
10776 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10777 break;
10779 case ABI_ELFv2:
10780 /* ??? Recomputing this every time is a bit expensive. Is there
10781 a place to cache this information? */
10782 if (rs6000_function_parms_need_stack (fun, incoming))
10783 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10784 else
10785 reg_parm_stack_space = 0;
10786 break;
10789 return reg_parm_stack_space;
10792 static void
10793 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10795 int i;
10796 enum machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10798 if (nregs == 0)
10799 return;
10801 for (i = 0; i < nregs; i++)
10803 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10804 if (reload_completed)
10806 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10807 tem = NULL_RTX;
10808 else
10809 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10810 i * GET_MODE_SIZE (reg_mode));
10812 else
10813 tem = replace_equiv_address (tem, XEXP (tem, 0));
10815 gcc_assert (tem);
10817 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
10821 /* Perform any needed actions needed for a function that is receiving a
10822 variable number of arguments.
10824 CUM is as above.
10826 MODE and TYPE are the mode and type of the current parameter.
10828 PRETEND_SIZE is a variable that should be set to the amount of stack
10829 that must be pushed by the prolog to pretend that our caller pushed
10832 Normally, this macro will push all remaining incoming registers on the
10833 stack and set PRETEND_SIZE to the length of the registers pushed. */
10835 static void
10836 setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
10837 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10838 int no_rtl)
10840 CUMULATIVE_ARGS next_cum;
10841 int reg_size = TARGET_32BIT ? 4 : 8;
10842 rtx save_area = NULL_RTX, mem;
10843 int first_reg_offset;
10844 alias_set_type set;
10846 /* Skip the last named argument. */
10847 next_cum = *get_cumulative_args (cum);
10848 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
10850 if (DEFAULT_ABI == ABI_V4)
10852 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
10854 if (! no_rtl)
10856 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
10857 HOST_WIDE_INT offset = 0;
10859 /* Try to optimize the size of the varargs save area.
10860 The ABI requires that ap.reg_save_area is doubleword
10861 aligned, but we don't need to allocate space for all
10862 the bytes, only those to which we actually will save
10863 anything. */
10864 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
10865 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
10866 if (TARGET_HARD_FLOAT && TARGET_FPRS
10867 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10868 && cfun->va_list_fpr_size)
10870 if (gpr_reg_num)
10871 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
10872 * UNITS_PER_FP_WORD;
10873 if (cfun->va_list_fpr_size
10874 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10875 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
10876 else
10877 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10878 * UNITS_PER_FP_WORD;
10880 if (gpr_reg_num)
10882 offset = -((first_reg_offset * reg_size) & ~7);
10883 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
10885 gpr_reg_num = cfun->va_list_gpr_size;
10886 if (reg_size == 4 && (first_reg_offset & 1))
10887 gpr_reg_num++;
10889 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
10891 else if (fpr_size)
10892 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
10893 * UNITS_PER_FP_WORD
10894 - (int) (GP_ARG_NUM_REG * reg_size);
10896 if (gpr_size + fpr_size)
10898 rtx reg_save_area
10899 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
10900 gcc_assert (GET_CODE (reg_save_area) == MEM);
10901 reg_save_area = XEXP (reg_save_area, 0);
10902 if (GET_CODE (reg_save_area) == PLUS)
10904 gcc_assert (XEXP (reg_save_area, 0)
10905 == virtual_stack_vars_rtx);
10906 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
10907 offset += INTVAL (XEXP (reg_save_area, 1));
10909 else
10910 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
10913 cfun->machine->varargs_save_offset = offset;
10914 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
10917 else
10919 first_reg_offset = next_cum.words;
10920 save_area = virtual_incoming_args_rtx;
10922 if (targetm.calls.must_pass_in_stack (mode, type))
10923 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
10926 set = get_varargs_alias_set ();
10927 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
10928 && cfun->va_list_gpr_size)
10930 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
10932 if (va_list_gpr_counter_field)
10933 /* V4 va_list_gpr_size counts number of registers needed. */
10934 n_gpr = cfun->va_list_gpr_size;
10935 else
10936 /* char * va_list instead counts number of bytes needed. */
10937 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
10939 if (nregs > n_gpr)
10940 nregs = n_gpr;
10942 mem = gen_rtx_MEM (BLKmode,
10943 plus_constant (Pmode, save_area,
10944 first_reg_offset * reg_size));
10945 MEM_NOTRAP_P (mem) = 1;
10946 set_mem_alias_set (mem, set);
10947 set_mem_align (mem, BITS_PER_WORD);
10949 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
10950 nregs);
10953 /* Save FP registers if needed. */
10954 if (DEFAULT_ABI == ABI_V4
10955 && TARGET_HARD_FLOAT && TARGET_FPRS
10956 && ! no_rtl
10957 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10958 && cfun->va_list_fpr_size)
10960 int fregno = next_cum.fregno, nregs;
10961 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
10962 rtx lab = gen_label_rtx ();
10963 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
10964 * UNITS_PER_FP_WORD);
10966 emit_jump_insn
10967 (gen_rtx_SET (VOIDmode,
10968 pc_rtx,
10969 gen_rtx_IF_THEN_ELSE (VOIDmode,
10970 gen_rtx_NE (VOIDmode, cr1,
10971 const0_rtx),
10972 gen_rtx_LABEL_REF (VOIDmode, lab),
10973 pc_rtx)));
10975 for (nregs = 0;
10976 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
10977 fregno++, off += UNITS_PER_FP_WORD, nregs++)
10979 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10980 ? DFmode : SFmode,
10981 plus_constant (Pmode, save_area, off));
10982 MEM_NOTRAP_P (mem) = 1;
10983 set_mem_alias_set (mem, set);
10984 set_mem_align (mem, GET_MODE_ALIGNMENT (
10985 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10986 ? DFmode : SFmode));
10987 emit_move_insn (mem, gen_rtx_REG (
10988 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
10989 ? DFmode : SFmode, fregno));
10992 emit_label (lab);
10996 /* Create the va_list data type. */
10998 static tree
10999 rs6000_build_builtin_va_list (void)
11001 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11003 /* For AIX, prefer 'char *' because that's what the system
11004 header files like. */
11005 if (DEFAULT_ABI != ABI_V4)
11006 return build_pointer_type (char_type_node);
11008 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11009 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11010 get_identifier ("__va_list_tag"), record);
11012 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11013 unsigned_char_type_node);
11014 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11015 unsigned_char_type_node);
11016 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11017 every user file. */
11018 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11019 get_identifier ("reserved"), short_unsigned_type_node);
11020 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11021 get_identifier ("overflow_arg_area"),
11022 ptr_type_node);
11023 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11024 get_identifier ("reg_save_area"),
11025 ptr_type_node);
11027 va_list_gpr_counter_field = f_gpr;
11028 va_list_fpr_counter_field = f_fpr;
11030 DECL_FIELD_CONTEXT (f_gpr) = record;
11031 DECL_FIELD_CONTEXT (f_fpr) = record;
11032 DECL_FIELD_CONTEXT (f_res) = record;
11033 DECL_FIELD_CONTEXT (f_ovf) = record;
11034 DECL_FIELD_CONTEXT (f_sav) = record;
11036 TYPE_STUB_DECL (record) = type_decl;
11037 TYPE_NAME (record) = type_decl;
11038 TYPE_FIELDS (record) = f_gpr;
11039 DECL_CHAIN (f_gpr) = f_fpr;
11040 DECL_CHAIN (f_fpr) = f_res;
11041 DECL_CHAIN (f_res) = f_ovf;
11042 DECL_CHAIN (f_ovf) = f_sav;
11044 layout_type (record);
11046 /* The correct type is an array type of one element. */
11047 return build_array_type (record, build_index_type (size_zero_node));
11050 /* Implement va_start. */
11052 static void
11053 rs6000_va_start (tree valist, rtx nextarg)
11055 HOST_WIDE_INT words, n_gpr, n_fpr;
11056 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11057 tree gpr, fpr, ovf, sav, t;
11059 /* Only SVR4 needs something special. */
11060 if (DEFAULT_ABI != ABI_V4)
11062 std_expand_builtin_va_start (valist, nextarg);
11063 return;
11066 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11067 f_fpr = DECL_CHAIN (f_gpr);
11068 f_res = DECL_CHAIN (f_fpr);
11069 f_ovf = DECL_CHAIN (f_res);
11070 f_sav = DECL_CHAIN (f_ovf);
11072 valist = build_simple_mem_ref (valist);
11073 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11074 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11075 f_fpr, NULL_TREE);
11076 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11077 f_ovf, NULL_TREE);
11078 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11079 f_sav, NULL_TREE);
11081 /* Count number of gp and fp argument registers used. */
11082 words = crtl->args.info.words;
11083 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11084 GP_ARG_NUM_REG);
11085 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11086 FP_ARG_NUM_REG);
11088 if (TARGET_DEBUG_ARG)
11089 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11090 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11091 words, n_gpr, n_fpr);
11093 if (cfun->va_list_gpr_size)
11095 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11096 build_int_cst (NULL_TREE, n_gpr));
11097 TREE_SIDE_EFFECTS (t) = 1;
11098 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11101 if (cfun->va_list_fpr_size)
11103 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11104 build_int_cst (NULL_TREE, n_fpr));
11105 TREE_SIDE_EFFECTS (t) = 1;
11106 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11108 #ifdef HAVE_AS_GNU_ATTRIBUTE
11109 if (call_ABI_of_interest (cfun->decl))
11110 rs6000_passes_float = true;
11111 #endif
11114 /* Find the overflow area. */
11115 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11116 if (words != 0)
11117 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
11118 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11119 TREE_SIDE_EFFECTS (t) = 1;
11120 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11122 /* If there were no va_arg invocations, don't set up the register
11123 save area. */
11124 if (!cfun->va_list_gpr_size
11125 && !cfun->va_list_fpr_size
11126 && n_gpr < GP_ARG_NUM_REG
11127 && n_fpr < FP_ARG_V4_MAX_REG)
11128 return;
11130 /* Find the register save area. */
11131 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11132 if (cfun->machine->varargs_save_offset)
11133 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11134 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11135 TREE_SIDE_EFFECTS (t) = 1;
11136 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11139 /* Implement va_arg. */
11141 static tree
11142 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11143 gimple_seq *post_p)
11145 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11146 tree gpr, fpr, ovf, sav, reg, t, u;
11147 int size, rsize, n_reg, sav_ofs, sav_scale;
11148 tree lab_false, lab_over, addr;
11149 int align;
11150 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11151 int regalign = 0;
11152 gimple stmt;
11154 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11156 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11157 return build_va_arg_indirect_ref (t);
11160 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11161 earlier version of gcc, with the property that it always applied alignment
11162 adjustments to the va-args (even for zero-sized types). The cheapest way
11163 to deal with this is to replicate the effect of the part of
11164 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11165 of relevance.
11166 We don't need to check for pass-by-reference because of the test above.
11167 We can return a simplifed answer, since we know there's no offset to add. */
11169 if (((TARGET_MACHO
11170 && rs6000_darwin64_abi)
11171 || DEFAULT_ABI == ABI_ELFv2
11172 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11173 && integer_zerop (TYPE_SIZE (type)))
11175 unsigned HOST_WIDE_INT align, boundary;
11176 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11177 align = PARM_BOUNDARY / BITS_PER_UNIT;
11178 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11179 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11180 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11181 boundary /= BITS_PER_UNIT;
11182 if (boundary > align)
11184 tree t ;
11185 /* This updates arg ptr by the amount that would be necessary
11186 to align the zero-sized (but not zero-alignment) item. */
11187 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11188 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11189 gimplify_and_add (t, pre_p);
11191 t = fold_convert (sizetype, valist_tmp);
11192 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11193 fold_convert (TREE_TYPE (valist),
11194 fold_build2 (BIT_AND_EXPR, sizetype, t,
11195 size_int (-boundary))));
11196 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11197 gimplify_and_add (t, pre_p);
11199 /* Since it is zero-sized there's no increment for the item itself. */
11200 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11201 return build_va_arg_indirect_ref (valist_tmp);
11204 if (DEFAULT_ABI != ABI_V4)
11206 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11208 tree elem_type = TREE_TYPE (type);
11209 enum machine_mode elem_mode = TYPE_MODE (elem_type);
11210 int elem_size = GET_MODE_SIZE (elem_mode);
11212 if (elem_size < UNITS_PER_WORD)
11214 tree real_part, imag_part;
11215 gimple_seq post = NULL;
11217 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11218 &post);
11219 /* Copy the value into a temporary, lest the formal temporary
11220 be reused out from under us. */
11221 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11222 gimple_seq_add_seq (pre_p, post);
11224 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11225 post_p);
11227 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11231 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11234 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11235 f_fpr = DECL_CHAIN (f_gpr);
11236 f_res = DECL_CHAIN (f_fpr);
11237 f_ovf = DECL_CHAIN (f_res);
11238 f_sav = DECL_CHAIN (f_ovf);
11240 valist = build_va_arg_indirect_ref (valist);
11241 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11242 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11243 f_fpr, NULL_TREE);
11244 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11245 f_ovf, NULL_TREE);
11246 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11247 f_sav, NULL_TREE);
11249 size = int_size_in_bytes (type);
11250 rsize = (size + 3) / 4;
11251 align = 1;
11253 if (TARGET_HARD_FLOAT && TARGET_FPRS
11254 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11255 || (TARGET_DOUBLE_FLOAT
11256 && (TYPE_MODE (type) == DFmode
11257 || TYPE_MODE (type) == TFmode
11258 || TYPE_MODE (type) == SDmode
11259 || TYPE_MODE (type) == DDmode
11260 || TYPE_MODE (type) == TDmode))))
11262 /* FP args go in FP registers, if present. */
11263 reg = fpr;
11264 n_reg = (size + 7) / 8;
11265 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11266 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11267 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11268 align = 8;
11270 else
11272 /* Otherwise into GP registers. */
11273 reg = gpr;
11274 n_reg = rsize;
11275 sav_ofs = 0;
11276 sav_scale = 4;
11277 if (n_reg == 2)
11278 align = 8;
11281 /* Pull the value out of the saved registers.... */
11283 lab_over = NULL;
11284 addr = create_tmp_var (ptr_type_node, "addr");
11286 /* AltiVec vectors never go in registers when -mabi=altivec. */
11287 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11288 align = 16;
11289 else
11291 lab_false = create_artificial_label (input_location);
11292 lab_over = create_artificial_label (input_location);
11294 /* Long long and SPE vectors are aligned in the registers.
11295 As are any other 2 gpr item such as complex int due to a
11296 historical mistake. */
11297 u = reg;
11298 if (n_reg == 2 && reg == gpr)
11300 regalign = 1;
11301 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11302 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11303 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11304 unshare_expr (reg), u);
11306 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11307 reg number is 0 for f1, so we want to make it odd. */
11308 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11310 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11311 build_int_cst (TREE_TYPE (reg), 1));
11312 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11315 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11316 t = build2 (GE_EXPR, boolean_type_node, u, t);
11317 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11318 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11319 gimplify_and_add (t, pre_p);
11321 t = sav;
11322 if (sav_ofs)
11323 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11325 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11326 build_int_cst (TREE_TYPE (reg), n_reg));
11327 u = fold_convert (sizetype, u);
11328 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11329 t = fold_build_pointer_plus (t, u);
11331 /* _Decimal32 varargs are located in the second word of the 64-bit
11332 FP register for 32-bit binaries. */
11333 if (!TARGET_POWERPC64
11334 && TARGET_HARD_FLOAT && TARGET_FPRS
11335 && TYPE_MODE (type) == SDmode)
11336 t = fold_build_pointer_plus_hwi (t, size);
11338 gimplify_assign (addr, t, pre_p);
11340 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11342 stmt = gimple_build_label (lab_false);
11343 gimple_seq_add_stmt (pre_p, stmt);
11345 if ((n_reg == 2 && !regalign) || n_reg > 2)
11347 /* Ensure that we don't find any more args in regs.
11348 Alignment has taken care of for special cases. */
11349 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11353 /* ... otherwise out of the overflow area. */
11355 /* Care for on-stack alignment if needed. */
11356 t = ovf;
11357 if (align != 1)
11359 t = fold_build_pointer_plus_hwi (t, align - 1);
11360 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11361 build_int_cst (TREE_TYPE (t), -align));
11363 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11365 gimplify_assign (unshare_expr (addr), t, pre_p);
11367 t = fold_build_pointer_plus_hwi (t, size);
11368 gimplify_assign (unshare_expr (ovf), t, pre_p);
11370 if (lab_over)
11372 stmt = gimple_build_label (lab_over);
11373 gimple_seq_add_stmt (pre_p, stmt);
11376 if (STRICT_ALIGNMENT
11377 && (TYPE_ALIGN (type)
11378 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11380 /* The value (of type complex double, for example) may not be
11381 aligned in memory in the saved registers, so copy via a
11382 temporary. (This is the same code as used for SPARC.) */
11383 tree tmp = create_tmp_var (type, "va_arg_tmp");
11384 tree dest_addr = build_fold_addr_expr (tmp);
11386 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11387 3, dest_addr, addr, size_int (rsize * 4));
11389 gimplify_and_add (copy, pre_p);
11390 addr = dest_addr;
11393 addr = fold_convert (ptrtype, addr);
11394 return build_va_arg_indirect_ref (addr);
11397 /* Builtins. */
11399 static void
11400 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11402 tree t;
11403 unsigned classify = rs6000_builtin_info[(int)code].attr;
11404 const char *attr_string = "";
11406 gcc_assert (name != NULL);
11407 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11409 if (rs6000_builtin_decls[(int)code])
11410 fatal_error ("internal error: builtin function %s already processed", name);
11412 rs6000_builtin_decls[(int)code] = t =
11413 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11415 /* Set any special attributes. */
11416 if ((classify & RS6000_BTC_CONST) != 0)
11418 /* const function, function only depends on the inputs. */
11419 TREE_READONLY (t) = 1;
11420 TREE_NOTHROW (t) = 1;
11421 attr_string = ", pure";
11423 else if ((classify & RS6000_BTC_PURE) != 0)
11425 /* pure function, function can read global memory, but does not set any
11426 external state. */
11427 DECL_PURE_P (t) = 1;
11428 TREE_NOTHROW (t) = 1;
11429 attr_string = ", const";
11431 else if ((classify & RS6000_BTC_FP) != 0)
11433 /* Function is a math function. If rounding mode is on, then treat the
11434 function as not reading global memory, but it can have arbitrary side
11435 effects. If it is off, then assume the function is a const function.
11436 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11437 builtin-attribute.def that is used for the math functions. */
11438 TREE_NOTHROW (t) = 1;
11439 if (flag_rounding_math)
11441 DECL_PURE_P (t) = 1;
11442 DECL_IS_NOVOPS (t) = 1;
11443 attr_string = ", fp, pure";
11445 else
11447 TREE_READONLY (t) = 1;
11448 attr_string = ", fp, const";
11451 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11452 gcc_unreachable ();
11454 if (TARGET_DEBUG_BUILTIN)
11455 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11456 (int)code, name, attr_string);
11459 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11461 #undef RS6000_BUILTIN_1
11462 #undef RS6000_BUILTIN_2
11463 #undef RS6000_BUILTIN_3
11464 #undef RS6000_BUILTIN_A
11465 #undef RS6000_BUILTIN_D
11466 #undef RS6000_BUILTIN_E
11467 #undef RS6000_BUILTIN_H
11468 #undef RS6000_BUILTIN_P
11469 #undef RS6000_BUILTIN_Q
11470 #undef RS6000_BUILTIN_S
11471 #undef RS6000_BUILTIN_X
11473 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11474 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11475 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11476 { MASK, ICODE, NAME, ENUM },
11478 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11479 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11480 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11481 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11482 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11483 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11484 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11485 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11487 static const struct builtin_description bdesc_3arg[] =
11489 #include "rs6000-builtin.def"
11492 /* DST operations: void foo (void *, const int, const char). */
11494 #undef RS6000_BUILTIN_1
11495 #undef RS6000_BUILTIN_2
11496 #undef RS6000_BUILTIN_3
11497 #undef RS6000_BUILTIN_A
11498 #undef RS6000_BUILTIN_D
11499 #undef RS6000_BUILTIN_E
11500 #undef RS6000_BUILTIN_H
11501 #undef RS6000_BUILTIN_P
11502 #undef RS6000_BUILTIN_Q
11503 #undef RS6000_BUILTIN_S
11504 #undef RS6000_BUILTIN_X
11506 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11507 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11508 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11509 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11510 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11511 { MASK, ICODE, NAME, ENUM },
11513 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11514 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11515 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11516 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11517 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11518 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11520 static const struct builtin_description bdesc_dst[] =
11522 #include "rs6000-builtin.def"
11525 /* Simple binary operations: VECc = foo (VECa, VECb). */
11527 #undef RS6000_BUILTIN_1
11528 #undef RS6000_BUILTIN_2
11529 #undef RS6000_BUILTIN_3
11530 #undef RS6000_BUILTIN_A
11531 #undef RS6000_BUILTIN_D
11532 #undef RS6000_BUILTIN_E
11533 #undef RS6000_BUILTIN_H
11534 #undef RS6000_BUILTIN_P
11535 #undef RS6000_BUILTIN_Q
11536 #undef RS6000_BUILTIN_S
11537 #undef RS6000_BUILTIN_X
11539 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11540 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11541 { MASK, ICODE, NAME, ENUM },
11543 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11544 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11545 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11546 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11547 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11548 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11549 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11550 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11551 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11553 static const struct builtin_description bdesc_2arg[] =
11555 #include "rs6000-builtin.def"
11558 #undef RS6000_BUILTIN_1
11559 #undef RS6000_BUILTIN_2
11560 #undef RS6000_BUILTIN_3
11561 #undef RS6000_BUILTIN_A
11562 #undef RS6000_BUILTIN_D
11563 #undef RS6000_BUILTIN_E
11564 #undef RS6000_BUILTIN_H
11565 #undef RS6000_BUILTIN_P
11566 #undef RS6000_BUILTIN_Q
11567 #undef RS6000_BUILTIN_S
11568 #undef RS6000_BUILTIN_X
11570 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11571 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11572 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11573 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11574 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11575 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11576 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11577 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11578 { MASK, ICODE, NAME, ENUM },
11580 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11581 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11582 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11584 /* AltiVec predicates. */
11586 static const struct builtin_description bdesc_altivec_preds[] =
11588 #include "rs6000-builtin.def"
11591 /* SPE predicates. */
11592 #undef RS6000_BUILTIN_1
11593 #undef RS6000_BUILTIN_2
11594 #undef RS6000_BUILTIN_3
11595 #undef RS6000_BUILTIN_A
11596 #undef RS6000_BUILTIN_D
11597 #undef RS6000_BUILTIN_E
11598 #undef RS6000_BUILTIN_H
11599 #undef RS6000_BUILTIN_P
11600 #undef RS6000_BUILTIN_Q
11601 #undef RS6000_BUILTIN_S
11602 #undef RS6000_BUILTIN_X
11604 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11605 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11606 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11607 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11608 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11609 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11610 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11611 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11612 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11613 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11614 { MASK, ICODE, NAME, ENUM },
11616 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11618 static const struct builtin_description bdesc_spe_predicates[] =
11620 #include "rs6000-builtin.def"
11623 /* SPE evsel predicates. */
11624 #undef RS6000_BUILTIN_1
11625 #undef RS6000_BUILTIN_2
11626 #undef RS6000_BUILTIN_3
11627 #undef RS6000_BUILTIN_A
11628 #undef RS6000_BUILTIN_D
11629 #undef RS6000_BUILTIN_E
11630 #undef RS6000_BUILTIN_H
11631 #undef RS6000_BUILTIN_P
11632 #undef RS6000_BUILTIN_Q
11633 #undef RS6000_BUILTIN_S
11634 #undef RS6000_BUILTIN_X
11636 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11637 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11638 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11639 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11640 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11641 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11642 { MASK, ICODE, NAME, ENUM },
11644 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11645 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11646 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11647 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11648 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11650 static const struct builtin_description bdesc_spe_evsel[] =
11652 #include "rs6000-builtin.def"
11655 /* PAIRED predicates. */
11656 #undef RS6000_BUILTIN_1
11657 #undef RS6000_BUILTIN_2
11658 #undef RS6000_BUILTIN_3
11659 #undef RS6000_BUILTIN_A
11660 #undef RS6000_BUILTIN_D
11661 #undef RS6000_BUILTIN_E
11662 #undef RS6000_BUILTIN_H
11663 #undef RS6000_BUILTIN_P
11664 #undef RS6000_BUILTIN_Q
11665 #undef RS6000_BUILTIN_S
11666 #undef RS6000_BUILTIN_X
11668 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11669 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11670 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11671 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11672 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11673 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11674 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11675 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11676 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11677 { MASK, ICODE, NAME, ENUM },
11679 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11680 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11682 static const struct builtin_description bdesc_paired_preds[] =
11684 #include "rs6000-builtin.def"
11687 /* ABS* operations. */
11689 #undef RS6000_BUILTIN_1
11690 #undef RS6000_BUILTIN_2
11691 #undef RS6000_BUILTIN_3
11692 #undef RS6000_BUILTIN_A
11693 #undef RS6000_BUILTIN_D
11694 #undef RS6000_BUILTIN_E
11695 #undef RS6000_BUILTIN_H
11696 #undef RS6000_BUILTIN_P
11697 #undef RS6000_BUILTIN_Q
11698 #undef RS6000_BUILTIN_S
11699 #undef RS6000_BUILTIN_X
11701 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11702 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11703 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11704 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11705 { MASK, ICODE, NAME, ENUM },
11707 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11708 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11709 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11710 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11711 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11712 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11713 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11715 static const struct builtin_description bdesc_abs[] =
11717 #include "rs6000-builtin.def"
11720 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11721 foo (VECa). */
11723 #undef RS6000_BUILTIN_1
11724 #undef RS6000_BUILTIN_2
11725 #undef RS6000_BUILTIN_3
11726 #undef RS6000_BUILTIN_A
11727 #undef RS6000_BUILTIN_D
11728 #undef RS6000_BUILTIN_E
11729 #undef RS6000_BUILTIN_H
11730 #undef RS6000_BUILTIN_P
11731 #undef RS6000_BUILTIN_Q
11732 #undef RS6000_BUILTIN_S
11733 #undef RS6000_BUILTIN_X
11735 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11736 { MASK, ICODE, NAME, ENUM },
11738 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11739 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11740 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11741 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11742 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11743 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11744 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11745 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11746 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11747 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11749 static const struct builtin_description bdesc_1arg[] =
11751 #include "rs6000-builtin.def"
11754 /* HTM builtins. */
11755 #undef RS6000_BUILTIN_1
11756 #undef RS6000_BUILTIN_2
11757 #undef RS6000_BUILTIN_3
11758 #undef RS6000_BUILTIN_A
11759 #undef RS6000_BUILTIN_D
11760 #undef RS6000_BUILTIN_E
11761 #undef RS6000_BUILTIN_H
11762 #undef RS6000_BUILTIN_P
11763 #undef RS6000_BUILTIN_Q
11764 #undef RS6000_BUILTIN_S
11765 #undef RS6000_BUILTIN_X
11767 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11768 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11769 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11770 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11771 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11772 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11773 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11774 { MASK, ICODE, NAME, ENUM },
11776 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11777 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11778 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11779 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11781 static const struct builtin_description bdesc_htm[] =
11783 #include "rs6000-builtin.def"
11786 #undef RS6000_BUILTIN_1
11787 #undef RS6000_BUILTIN_2
11788 #undef RS6000_BUILTIN_3
11789 #undef RS6000_BUILTIN_A
11790 #undef RS6000_BUILTIN_D
11791 #undef RS6000_BUILTIN_E
11792 #undef RS6000_BUILTIN_H
11793 #undef RS6000_BUILTIN_P
11794 #undef RS6000_BUILTIN_Q
11795 #undef RS6000_BUILTIN_S
11797 /* Return true if a builtin function is overloaded. */
11798 bool
11799 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11801 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11804 /* Expand an expression EXP that calls a builtin without arguments. */
11805 static rtx
11806 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11808 rtx pat;
11809 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11811 if (icode == CODE_FOR_nothing)
11812 /* Builtin not supported on this processor. */
11813 return 0;
11815 if (target == 0
11816 || GET_MODE (target) != tmode
11817 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11818 target = gen_reg_rtx (tmode);
11820 pat = GEN_FCN (icode) (target);
11821 if (! pat)
11822 return 0;
11823 emit_insn (pat);
11825 return target;
11829 static rtx
11830 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
11832 rtx pat;
11833 tree arg0 = CALL_EXPR_ARG (exp, 0);
11834 tree arg1 = CALL_EXPR_ARG (exp, 1);
11835 rtx op0 = expand_normal (arg0);
11836 rtx op1 = expand_normal (arg1);
11837 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
11838 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
11840 if (icode == CODE_FOR_nothing)
11841 /* Builtin not supported on this processor. */
11842 return 0;
11844 /* If we got invalid arguments bail out before generating bad rtl. */
11845 if (arg0 == error_mark_node || arg1 == error_mark_node)
11846 return const0_rtx;
11848 if (GET_CODE (op0) != CONST_INT
11849 || INTVAL (op0) > 255
11850 || INTVAL (op0) < 0)
11852 error ("argument 1 must be an 8-bit field value");
11853 return const0_rtx;
11856 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11857 op0 = copy_to_mode_reg (mode0, op0);
11859 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11860 op1 = copy_to_mode_reg (mode1, op1);
11862 pat = GEN_FCN (icode) (op0, op1);
11863 if (! pat)
11864 return const0_rtx;
11865 emit_insn (pat);
11867 return NULL_RTX;
11871 static rtx
11872 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
11874 rtx pat;
11875 tree arg0 = CALL_EXPR_ARG (exp, 0);
11876 rtx op0 = expand_normal (arg0);
11877 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11878 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11880 if (icode == CODE_FOR_nothing)
11881 /* Builtin not supported on this processor. */
11882 return 0;
11884 /* If we got invalid arguments bail out before generating bad rtl. */
11885 if (arg0 == error_mark_node)
11886 return const0_rtx;
11888 if (icode == CODE_FOR_altivec_vspltisb
11889 || icode == CODE_FOR_altivec_vspltish
11890 || icode == CODE_FOR_altivec_vspltisw
11891 || icode == CODE_FOR_spe_evsplatfi
11892 || icode == CODE_FOR_spe_evsplati)
11894 /* Only allow 5-bit *signed* literals. */
11895 if (GET_CODE (op0) != CONST_INT
11896 || INTVAL (op0) > 15
11897 || INTVAL (op0) < -16)
11899 error ("argument 1 must be a 5-bit signed literal");
11900 return const0_rtx;
11904 if (target == 0
11905 || GET_MODE (target) != tmode
11906 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11907 target = gen_reg_rtx (tmode);
11909 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11910 op0 = copy_to_mode_reg (mode0, op0);
11912 pat = GEN_FCN (icode) (target, op0);
11913 if (! pat)
11914 return 0;
11915 emit_insn (pat);
11917 return target;
11920 static rtx
11921 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
11923 rtx pat, scratch1, scratch2;
11924 tree arg0 = CALL_EXPR_ARG (exp, 0);
11925 rtx op0 = expand_normal (arg0);
11926 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11927 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11929 /* If we have invalid arguments, bail out before generating bad rtl. */
11930 if (arg0 == error_mark_node)
11931 return const0_rtx;
11933 if (target == 0
11934 || GET_MODE (target) != tmode
11935 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11936 target = gen_reg_rtx (tmode);
11938 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11939 op0 = copy_to_mode_reg (mode0, op0);
11941 scratch1 = gen_reg_rtx (mode0);
11942 scratch2 = gen_reg_rtx (mode0);
11944 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
11945 if (! pat)
11946 return 0;
11947 emit_insn (pat);
11949 return target;
11952 static rtx
11953 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
11955 rtx pat;
11956 tree arg0 = CALL_EXPR_ARG (exp, 0);
11957 tree arg1 = CALL_EXPR_ARG (exp, 1);
11958 rtx op0 = expand_normal (arg0);
11959 rtx op1 = expand_normal (arg1);
11960 enum machine_mode tmode = insn_data[icode].operand[0].mode;
11961 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
11962 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
11964 if (icode == CODE_FOR_nothing)
11965 /* Builtin not supported on this processor. */
11966 return 0;
11968 /* If we got invalid arguments bail out before generating bad rtl. */
11969 if (arg0 == error_mark_node || arg1 == error_mark_node)
11970 return const0_rtx;
11972 if (icode == CODE_FOR_altivec_vcfux
11973 || icode == CODE_FOR_altivec_vcfsx
11974 || icode == CODE_FOR_altivec_vctsxs
11975 || icode == CODE_FOR_altivec_vctuxs
11976 || icode == CODE_FOR_altivec_vspltb
11977 || icode == CODE_FOR_altivec_vsplth
11978 || icode == CODE_FOR_altivec_vspltw
11979 || icode == CODE_FOR_spe_evaddiw
11980 || icode == CODE_FOR_spe_evldd
11981 || icode == CODE_FOR_spe_evldh
11982 || icode == CODE_FOR_spe_evldw
11983 || icode == CODE_FOR_spe_evlhhesplat
11984 || icode == CODE_FOR_spe_evlhhossplat
11985 || icode == CODE_FOR_spe_evlhhousplat
11986 || icode == CODE_FOR_spe_evlwhe
11987 || icode == CODE_FOR_spe_evlwhos
11988 || icode == CODE_FOR_spe_evlwhou
11989 || icode == CODE_FOR_spe_evlwhsplat
11990 || icode == CODE_FOR_spe_evlwwsplat
11991 || icode == CODE_FOR_spe_evrlwi
11992 || icode == CODE_FOR_spe_evslwi
11993 || icode == CODE_FOR_spe_evsrwis
11994 || icode == CODE_FOR_spe_evsubifw
11995 || icode == CODE_FOR_spe_evsrwiu)
11997 /* Only allow 5-bit unsigned literals. */
11998 STRIP_NOPS (arg1);
11999 if (TREE_CODE (arg1) != INTEGER_CST
12000 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12002 error ("argument 2 must be a 5-bit unsigned literal");
12003 return const0_rtx;
12007 if (target == 0
12008 || GET_MODE (target) != tmode
12009 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12010 target = gen_reg_rtx (tmode);
12012 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12013 op0 = copy_to_mode_reg (mode0, op0);
12014 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12015 op1 = copy_to_mode_reg (mode1, op1);
12017 pat = GEN_FCN (icode) (target, op0, op1);
12018 if (! pat)
12019 return 0;
12020 emit_insn (pat);
12022 return target;
12025 static rtx
12026 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12028 rtx pat, scratch;
12029 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12030 tree arg0 = CALL_EXPR_ARG (exp, 1);
12031 tree arg1 = CALL_EXPR_ARG (exp, 2);
12032 rtx op0 = expand_normal (arg0);
12033 rtx op1 = expand_normal (arg1);
12034 enum machine_mode tmode = SImode;
12035 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12036 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12037 int cr6_form_int;
12039 if (TREE_CODE (cr6_form) != INTEGER_CST)
12041 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12042 return const0_rtx;
12044 else
12045 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12047 gcc_assert (mode0 == mode1);
12049 /* If we have invalid arguments, bail out before generating bad rtl. */
12050 if (arg0 == error_mark_node || arg1 == error_mark_node)
12051 return const0_rtx;
12053 if (target == 0
12054 || GET_MODE (target) != tmode
12055 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12056 target = gen_reg_rtx (tmode);
12058 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12059 op0 = copy_to_mode_reg (mode0, op0);
12060 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12061 op1 = copy_to_mode_reg (mode1, op1);
12063 scratch = gen_reg_rtx (mode0);
12065 pat = GEN_FCN (icode) (scratch, op0, op1);
12066 if (! pat)
12067 return 0;
12068 emit_insn (pat);
12070 /* The vec_any* and vec_all* predicates use the same opcodes for two
12071 different operations, but the bits in CR6 will be different
12072 depending on what information we want. So we have to play tricks
12073 with CR6 to get the right bits out.
12075 If you think this is disgusting, look at the specs for the
12076 AltiVec predicates. */
12078 switch (cr6_form_int)
12080 case 0:
12081 emit_insn (gen_cr6_test_for_zero (target));
12082 break;
12083 case 1:
12084 emit_insn (gen_cr6_test_for_zero_reverse (target));
12085 break;
12086 case 2:
12087 emit_insn (gen_cr6_test_for_lt (target));
12088 break;
12089 case 3:
12090 emit_insn (gen_cr6_test_for_lt_reverse (target));
12091 break;
12092 default:
12093 error ("argument 1 of __builtin_altivec_predicate is out of range");
12094 break;
12097 return target;
12100 static rtx
12101 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12103 rtx pat, addr;
12104 tree arg0 = CALL_EXPR_ARG (exp, 0);
12105 tree arg1 = CALL_EXPR_ARG (exp, 1);
12106 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12107 enum machine_mode mode0 = Pmode;
12108 enum machine_mode mode1 = Pmode;
12109 rtx op0 = expand_normal (arg0);
12110 rtx op1 = expand_normal (arg1);
12112 if (icode == CODE_FOR_nothing)
12113 /* Builtin not supported on this processor. */
12114 return 0;
12116 /* If we got invalid arguments bail out before generating bad rtl. */
12117 if (arg0 == error_mark_node || arg1 == error_mark_node)
12118 return const0_rtx;
12120 if (target == 0
12121 || GET_MODE (target) != tmode
12122 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12123 target = gen_reg_rtx (tmode);
12125 op1 = copy_to_mode_reg (mode1, op1);
12127 if (op0 == const0_rtx)
12129 addr = gen_rtx_MEM (tmode, op1);
12131 else
12133 op0 = copy_to_mode_reg (mode0, op0);
12134 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12137 pat = GEN_FCN (icode) (target, addr);
12139 if (! pat)
12140 return 0;
12141 emit_insn (pat);
12143 return target;
12146 /* Return a constant vector for use as a little-endian permute control vector
12147 to reverse the order of elements of the given vector mode. */
12148 static rtx
12149 swap_selector_for_mode (enum machine_mode mode)
12151 /* These are little endian vectors, so their elements are reversed
12152 from what you would normally expect for a permute control vector. */
12153 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12154 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12155 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12156 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12157 unsigned int *swaparray, i;
12158 rtx perm[16];
12160 switch (mode)
12162 case V2DFmode:
12163 case V2DImode:
12164 swaparray = swap2;
12165 break;
12166 case V4SFmode:
12167 case V4SImode:
12168 swaparray = swap4;
12169 break;
12170 case V8HImode:
12171 swaparray = swap8;
12172 break;
12173 case V16QImode:
12174 swaparray = swap16;
12175 break;
12176 default:
12177 gcc_unreachable ();
12180 for (i = 0; i < 16; ++i)
12181 perm[i] = GEN_INT (swaparray[i]);
12183 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12186 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12187 with -maltivec=be specified. Issue the load followed by an element-reversing
12188 permute. */
12189 void
12190 altivec_expand_lvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
12192 rtx tmp = gen_reg_rtx (mode);
12193 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12194 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12195 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12196 rtx sel = swap_selector_for_mode (mode);
12197 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12199 gcc_assert (REG_P (op0));
12200 emit_insn (par);
12201 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12204 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12205 with -maltivec=be specified. Issue the store preceded by an element-reversing
12206 permute. */
12207 void
12208 altivec_expand_stvx_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
12210 rtx tmp = gen_reg_rtx (mode);
12211 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12212 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12213 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12214 rtx sel = swap_selector_for_mode (mode);
12215 rtx vperm;
12217 gcc_assert (REG_P (op1));
12218 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12219 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12220 emit_insn (par);
12223 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12224 specified. Issue the store preceded by an element-reversing permute. */
12225 void
12226 altivec_expand_stvex_be (rtx op0, rtx op1, enum machine_mode mode, unsigned unspec)
12228 enum machine_mode inner_mode = GET_MODE_INNER (mode);
12229 rtx tmp = gen_reg_rtx (mode);
12230 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12231 rtx sel = swap_selector_for_mode (mode);
12232 rtx vperm;
12234 gcc_assert (REG_P (op1));
12235 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12236 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12237 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12240 static rtx
12241 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12243 rtx pat, addr;
12244 tree arg0 = CALL_EXPR_ARG (exp, 0);
12245 tree arg1 = CALL_EXPR_ARG (exp, 1);
12246 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12247 enum machine_mode mode0 = Pmode;
12248 enum machine_mode mode1 = Pmode;
12249 rtx op0 = expand_normal (arg0);
12250 rtx op1 = expand_normal (arg1);
12252 if (icode == CODE_FOR_nothing)
12253 /* Builtin not supported on this processor. */
12254 return 0;
12256 /* If we got invalid arguments bail out before generating bad rtl. */
12257 if (arg0 == error_mark_node || arg1 == error_mark_node)
12258 return const0_rtx;
12260 if (target == 0
12261 || GET_MODE (target) != tmode
12262 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12263 target = gen_reg_rtx (tmode);
12265 op1 = copy_to_mode_reg (mode1, op1);
12267 if (op0 == const0_rtx)
12269 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12271 else
12273 op0 = copy_to_mode_reg (mode0, op0);
12274 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12277 pat = GEN_FCN (icode) (target, addr);
12279 if (! pat)
12280 return 0;
12281 emit_insn (pat);
12283 return target;
12286 static rtx
12287 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12289 tree arg0 = CALL_EXPR_ARG (exp, 0);
12290 tree arg1 = CALL_EXPR_ARG (exp, 1);
12291 tree arg2 = CALL_EXPR_ARG (exp, 2);
12292 rtx op0 = expand_normal (arg0);
12293 rtx op1 = expand_normal (arg1);
12294 rtx op2 = expand_normal (arg2);
12295 rtx pat;
12296 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
12297 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
12298 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
12300 /* Invalid arguments. Bail before doing anything stoopid! */
12301 if (arg0 == error_mark_node
12302 || arg1 == error_mark_node
12303 || arg2 == error_mark_node)
12304 return const0_rtx;
12306 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12307 op0 = copy_to_mode_reg (mode2, op0);
12308 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12309 op1 = copy_to_mode_reg (mode0, op1);
12310 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12311 op2 = copy_to_mode_reg (mode1, op2);
12313 pat = GEN_FCN (icode) (op1, op2, op0);
12314 if (pat)
12315 emit_insn (pat);
12316 return NULL_RTX;
12319 static rtx
12320 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12322 tree arg0 = CALL_EXPR_ARG (exp, 0);
12323 tree arg1 = CALL_EXPR_ARG (exp, 1);
12324 tree arg2 = CALL_EXPR_ARG (exp, 2);
12325 rtx op0 = expand_normal (arg0);
12326 rtx op1 = expand_normal (arg1);
12327 rtx op2 = expand_normal (arg2);
12328 rtx pat, addr;
12329 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12330 enum machine_mode mode1 = Pmode;
12331 enum machine_mode mode2 = Pmode;
12333 /* Invalid arguments. Bail before doing anything stoopid! */
12334 if (arg0 == error_mark_node
12335 || arg1 == error_mark_node
12336 || arg2 == error_mark_node)
12337 return const0_rtx;
12339 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12340 op0 = copy_to_mode_reg (tmode, op0);
12342 op2 = copy_to_mode_reg (mode2, op2);
12344 if (op1 == const0_rtx)
12346 addr = gen_rtx_MEM (tmode, op2);
12348 else
12350 op1 = copy_to_mode_reg (mode1, op1);
12351 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12354 pat = GEN_FCN (icode) (addr, op0);
12355 if (pat)
12356 emit_insn (pat);
12357 return NULL_RTX;
12360 static rtx
12361 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12363 tree arg0 = CALL_EXPR_ARG (exp, 0);
12364 tree arg1 = CALL_EXPR_ARG (exp, 1);
12365 tree arg2 = CALL_EXPR_ARG (exp, 2);
12366 rtx op0 = expand_normal (arg0);
12367 rtx op1 = expand_normal (arg1);
12368 rtx op2 = expand_normal (arg2);
12369 rtx pat, addr;
12370 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12371 enum machine_mode smode = insn_data[icode].operand[1].mode;
12372 enum machine_mode mode1 = Pmode;
12373 enum machine_mode mode2 = Pmode;
12375 /* Invalid arguments. Bail before doing anything stoopid! */
12376 if (arg0 == error_mark_node
12377 || arg1 == error_mark_node
12378 || arg2 == error_mark_node)
12379 return const0_rtx;
12381 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12382 op0 = copy_to_mode_reg (smode, op0);
12384 op2 = copy_to_mode_reg (mode2, op2);
12386 if (op1 == const0_rtx)
12388 addr = gen_rtx_MEM (tmode, op2);
12390 else
12392 op1 = copy_to_mode_reg (mode1, op1);
12393 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12396 pat = GEN_FCN (icode) (addr, op0);
12397 if (pat)
12398 emit_insn (pat);
12399 return NULL_RTX;
12402 /* Return the appropriate SPR number associated with the given builtin. */
12403 static inline HOST_WIDE_INT
12404 htm_spr_num (enum rs6000_builtins code)
12406 if (code == HTM_BUILTIN_GET_TFHAR
12407 || code == HTM_BUILTIN_SET_TFHAR)
12408 return TFHAR_SPR;
12409 else if (code == HTM_BUILTIN_GET_TFIAR
12410 || code == HTM_BUILTIN_SET_TFIAR)
12411 return TFIAR_SPR;
12412 else if (code == HTM_BUILTIN_GET_TEXASR
12413 || code == HTM_BUILTIN_SET_TEXASR)
12414 return TEXASR_SPR;
12415 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12416 || code == HTM_BUILTIN_SET_TEXASRU);
12417 return TEXASRU_SPR;
12420 /* Return the appropriate SPR regno associated with the given builtin. */
12421 static inline HOST_WIDE_INT
12422 htm_spr_regno (enum rs6000_builtins code)
12424 if (code == HTM_BUILTIN_GET_TFHAR
12425 || code == HTM_BUILTIN_SET_TFHAR)
12426 return TFHAR_REGNO;
12427 else if (code == HTM_BUILTIN_GET_TFIAR
12428 || code == HTM_BUILTIN_SET_TFIAR)
12429 return TFIAR_REGNO;
12430 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12431 || code == HTM_BUILTIN_SET_TEXASR
12432 || code == HTM_BUILTIN_GET_TEXASRU
12433 || code == HTM_BUILTIN_SET_TEXASRU);
12434 return TEXASR_REGNO;
12437 /* Return the correct ICODE value depending on whether we are
12438 setting or reading the HTM SPRs. */
12439 static inline enum insn_code
12440 rs6000_htm_spr_icode (bool nonvoid)
12442 if (nonvoid)
12443 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12444 else
12445 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12448 /* Expand the HTM builtin in EXP and store the result in TARGET.
12449 Store true in *EXPANDEDP if we found a builtin to expand. */
12450 static rtx
12451 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12453 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12454 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12455 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12456 const struct builtin_description *d;
12457 size_t i;
12459 *expandedp = false;
12461 /* Expand the HTM builtins. */
12462 d = bdesc_htm;
12463 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12464 if (d->code == fcode)
12466 rtx op[MAX_HTM_OPERANDS], pat;
12467 int nopnds = 0;
12468 tree arg;
12469 call_expr_arg_iterator iter;
12470 unsigned attr = rs6000_builtin_info[fcode].attr;
12471 enum insn_code icode = d->icode;
12473 if (attr & RS6000_BTC_SPR)
12474 icode = rs6000_htm_spr_icode (nonvoid);
12476 if (nonvoid)
12478 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12479 if (!target
12480 || GET_MODE (target) != tmode
12481 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12482 target = gen_reg_rtx (tmode);
12483 op[nopnds++] = target;
12486 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12488 const struct insn_operand_data *insn_op;
12490 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12491 return NULL_RTX;
12493 insn_op = &insn_data[icode].operand[nopnds];
12495 op[nopnds] = expand_normal (arg);
12497 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12499 if (!strcmp (insn_op->constraint, "n"))
12501 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12502 if (!CONST_INT_P (op[nopnds]))
12503 error ("argument %d must be an unsigned literal", arg_num);
12504 else
12505 error ("argument %d is an unsigned literal that is "
12506 "out of range", arg_num);
12507 return const0_rtx;
12509 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12512 nopnds++;
12515 /* Handle the builtins for extended mnemonics. These accept
12516 no arguments, but map to builtins that take arguments. */
12517 switch (fcode)
12519 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12520 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12521 op[nopnds++] = GEN_INT (1);
12522 #ifdef ENABLE_CHECKING
12523 attr |= RS6000_BTC_UNARY;
12524 #endif
12525 break;
12526 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12527 op[nopnds++] = GEN_INT (0);
12528 #ifdef ENABLE_CHECKING
12529 attr |= RS6000_BTC_UNARY;
12530 #endif
12531 break;
12532 default:
12533 break;
12536 /* If this builtin accesses SPRs, then pass in the appropriate
12537 SPR number and SPR regno as the last two operands. */
12538 if (attr & RS6000_BTC_SPR)
12540 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12541 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12544 #ifdef ENABLE_CHECKING
12545 int expected_nopnds = 0;
12546 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12547 expected_nopnds = 1;
12548 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12549 expected_nopnds = 2;
12550 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12551 expected_nopnds = 3;
12552 if (!(attr & RS6000_BTC_VOID))
12553 expected_nopnds += 1;
12554 if (attr & RS6000_BTC_SPR)
12555 expected_nopnds += 2;
12557 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12558 #endif
12560 switch (nopnds)
12562 case 1:
12563 pat = GEN_FCN (icode) (op[0]);
12564 break;
12565 case 2:
12566 pat = GEN_FCN (icode) (op[0], op[1]);
12567 break;
12568 case 3:
12569 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12570 break;
12571 case 4:
12572 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12573 break;
12574 default:
12575 gcc_unreachable ();
12577 if (!pat)
12578 return NULL_RTX;
12579 emit_insn (pat);
12581 *expandedp = true;
12582 if (nonvoid)
12583 return target;
12584 return const0_rtx;
12587 return NULL_RTX;
12590 static rtx
12591 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12593 rtx pat;
12594 tree arg0 = CALL_EXPR_ARG (exp, 0);
12595 tree arg1 = CALL_EXPR_ARG (exp, 1);
12596 tree arg2 = CALL_EXPR_ARG (exp, 2);
12597 rtx op0 = expand_normal (arg0);
12598 rtx op1 = expand_normal (arg1);
12599 rtx op2 = expand_normal (arg2);
12600 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12601 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12602 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12603 enum machine_mode mode2 = insn_data[icode].operand[3].mode;
12605 if (icode == CODE_FOR_nothing)
12606 /* Builtin not supported on this processor. */
12607 return 0;
12609 /* If we got invalid arguments bail out before generating bad rtl. */
12610 if (arg0 == error_mark_node
12611 || arg1 == error_mark_node
12612 || arg2 == error_mark_node)
12613 return const0_rtx;
12615 /* Check and prepare argument depending on the instruction code.
12617 Note that a switch statement instead of the sequence of tests
12618 would be incorrect as many of the CODE_FOR values could be
12619 CODE_FOR_nothing and that would yield multiple alternatives
12620 with identical values. We'd never reach here at runtime in
12621 this case. */
12622 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12623 || icode == CODE_FOR_altivec_vsldoi_v4si
12624 || icode == CODE_FOR_altivec_vsldoi_v8hi
12625 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12627 /* Only allow 4-bit unsigned literals. */
12628 STRIP_NOPS (arg2);
12629 if (TREE_CODE (arg2) != INTEGER_CST
12630 || TREE_INT_CST_LOW (arg2) & ~0xf)
12632 error ("argument 3 must be a 4-bit unsigned literal");
12633 return const0_rtx;
12636 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12637 || icode == CODE_FOR_vsx_xxpermdi_v2di
12638 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12639 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12640 || icode == CODE_FOR_vsx_xxsldwi_v4si
12641 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12642 || icode == CODE_FOR_vsx_xxsldwi_v2di
12643 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12645 /* Only allow 2-bit unsigned literals. */
12646 STRIP_NOPS (arg2);
12647 if (TREE_CODE (arg2) != INTEGER_CST
12648 || TREE_INT_CST_LOW (arg2) & ~0x3)
12650 error ("argument 3 must be a 2-bit unsigned literal");
12651 return const0_rtx;
12654 else if (icode == CODE_FOR_vsx_set_v2df
12655 || icode == CODE_FOR_vsx_set_v2di
12656 || icode == CODE_FOR_bcdadd
12657 || icode == CODE_FOR_bcdadd_lt
12658 || icode == CODE_FOR_bcdadd_eq
12659 || icode == CODE_FOR_bcdadd_gt
12660 || icode == CODE_FOR_bcdsub
12661 || icode == CODE_FOR_bcdsub_lt
12662 || icode == CODE_FOR_bcdsub_eq
12663 || icode == CODE_FOR_bcdsub_gt)
12665 /* Only allow 1-bit unsigned literals. */
12666 STRIP_NOPS (arg2);
12667 if (TREE_CODE (arg2) != INTEGER_CST
12668 || TREE_INT_CST_LOW (arg2) & ~0x1)
12670 error ("argument 3 must be a 1-bit unsigned literal");
12671 return const0_rtx;
12674 else if (icode == CODE_FOR_dfp_ddedpd_dd
12675 || icode == CODE_FOR_dfp_ddedpd_td)
12677 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12678 STRIP_NOPS (arg0);
12679 if (TREE_CODE (arg0) != INTEGER_CST
12680 || TREE_INT_CST_LOW (arg2) & ~0x3)
12682 error ("argument 1 must be 0 or 2");
12683 return const0_rtx;
12686 else if (icode == CODE_FOR_dfp_denbcd_dd
12687 || icode == CODE_FOR_dfp_denbcd_td)
12689 /* Only allow 1-bit unsigned literals. */
12690 STRIP_NOPS (arg0);
12691 if (TREE_CODE (arg0) != INTEGER_CST
12692 || TREE_INT_CST_LOW (arg0) & ~0x1)
12694 error ("argument 1 must be a 1-bit unsigned literal");
12695 return const0_rtx;
12698 else if (icode == CODE_FOR_dfp_dscli_dd
12699 || icode == CODE_FOR_dfp_dscli_td
12700 || icode == CODE_FOR_dfp_dscri_dd
12701 || icode == CODE_FOR_dfp_dscri_td)
12703 /* Only allow 6-bit unsigned literals. */
12704 STRIP_NOPS (arg1);
12705 if (TREE_CODE (arg1) != INTEGER_CST
12706 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12708 error ("argument 2 must be a 6-bit unsigned literal");
12709 return const0_rtx;
12712 else if (icode == CODE_FOR_crypto_vshasigmaw
12713 || icode == CODE_FOR_crypto_vshasigmad)
12715 /* Check whether the 2nd and 3rd arguments are integer constants and in
12716 range and prepare arguments. */
12717 STRIP_NOPS (arg1);
12718 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12720 error ("argument 2 must be 0 or 1");
12721 return const0_rtx;
12724 STRIP_NOPS (arg2);
12725 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12727 error ("argument 3 must be in the range 0..15");
12728 return const0_rtx;
12732 if (target == 0
12733 || GET_MODE (target) != tmode
12734 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12735 target = gen_reg_rtx (tmode);
12737 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12738 op0 = copy_to_mode_reg (mode0, op0);
12739 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12740 op1 = copy_to_mode_reg (mode1, op1);
12741 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12742 op2 = copy_to_mode_reg (mode2, op2);
12744 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12745 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12746 else
12747 pat = GEN_FCN (icode) (target, op0, op1, op2);
12748 if (! pat)
12749 return 0;
12750 emit_insn (pat);
12752 return target;
12755 /* Expand the lvx builtins. */
12756 static rtx
12757 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12759 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12760 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12761 tree arg0;
12762 enum machine_mode tmode, mode0;
12763 rtx pat, op0;
12764 enum insn_code icode;
12766 switch (fcode)
12768 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12769 icode = CODE_FOR_vector_altivec_load_v16qi;
12770 break;
12771 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12772 icode = CODE_FOR_vector_altivec_load_v8hi;
12773 break;
12774 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12775 icode = CODE_FOR_vector_altivec_load_v4si;
12776 break;
12777 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12778 icode = CODE_FOR_vector_altivec_load_v4sf;
12779 break;
12780 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12781 icode = CODE_FOR_vector_altivec_load_v2df;
12782 break;
12783 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12784 icode = CODE_FOR_vector_altivec_load_v2di;
12785 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12786 icode = CODE_FOR_vector_altivec_load_v1ti;
12787 break;
12788 default:
12789 *expandedp = false;
12790 return NULL_RTX;
12793 *expandedp = true;
12795 arg0 = CALL_EXPR_ARG (exp, 0);
12796 op0 = expand_normal (arg0);
12797 tmode = insn_data[icode].operand[0].mode;
12798 mode0 = insn_data[icode].operand[1].mode;
12800 if (target == 0
12801 || GET_MODE (target) != tmode
12802 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12803 target = gen_reg_rtx (tmode);
12805 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12806 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12808 pat = GEN_FCN (icode) (target, op0);
12809 if (! pat)
12810 return 0;
12811 emit_insn (pat);
12812 return target;
12815 /* Expand the stvx builtins. */
12816 static rtx
12817 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12818 bool *expandedp)
12820 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12821 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12822 tree arg0, arg1;
12823 enum machine_mode mode0, mode1;
12824 rtx pat, op0, op1;
12825 enum insn_code icode;
12827 switch (fcode)
12829 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
12830 icode = CODE_FOR_vector_altivec_store_v16qi;
12831 break;
12832 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
12833 icode = CODE_FOR_vector_altivec_store_v8hi;
12834 break;
12835 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
12836 icode = CODE_FOR_vector_altivec_store_v4si;
12837 break;
12838 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
12839 icode = CODE_FOR_vector_altivec_store_v4sf;
12840 break;
12841 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
12842 icode = CODE_FOR_vector_altivec_store_v2df;
12843 break;
12844 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
12845 icode = CODE_FOR_vector_altivec_store_v2di;
12846 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
12847 icode = CODE_FOR_vector_altivec_store_v1ti;
12848 break;
12849 default:
12850 *expandedp = false;
12851 return NULL_RTX;
12854 arg0 = CALL_EXPR_ARG (exp, 0);
12855 arg1 = CALL_EXPR_ARG (exp, 1);
12856 op0 = expand_normal (arg0);
12857 op1 = expand_normal (arg1);
12858 mode0 = insn_data[icode].operand[0].mode;
12859 mode1 = insn_data[icode].operand[1].mode;
12861 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12862 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12863 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12864 op1 = copy_to_mode_reg (mode1, op1);
12866 pat = GEN_FCN (icode) (op0, op1);
12867 if (pat)
12868 emit_insn (pat);
12870 *expandedp = true;
12871 return NULL_RTX;
12874 /* Expand the dst builtins. */
12875 static rtx
12876 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12877 bool *expandedp)
12879 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12880 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12881 tree arg0, arg1, arg2;
12882 enum machine_mode mode0, mode1;
12883 rtx pat, op0, op1, op2;
12884 const struct builtin_description *d;
12885 size_t i;
12887 *expandedp = false;
12889 /* Handle DST variants. */
12890 d = bdesc_dst;
12891 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
12892 if (d->code == fcode)
12894 arg0 = CALL_EXPR_ARG (exp, 0);
12895 arg1 = CALL_EXPR_ARG (exp, 1);
12896 arg2 = CALL_EXPR_ARG (exp, 2);
12897 op0 = expand_normal (arg0);
12898 op1 = expand_normal (arg1);
12899 op2 = expand_normal (arg2);
12900 mode0 = insn_data[d->icode].operand[0].mode;
12901 mode1 = insn_data[d->icode].operand[1].mode;
12903 /* Invalid arguments, bail out before generating bad rtl. */
12904 if (arg0 == error_mark_node
12905 || arg1 == error_mark_node
12906 || arg2 == error_mark_node)
12907 return const0_rtx;
12909 *expandedp = true;
12910 STRIP_NOPS (arg2);
12911 if (TREE_CODE (arg2) != INTEGER_CST
12912 || TREE_INT_CST_LOW (arg2) & ~0x3)
12914 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
12915 return const0_rtx;
12918 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
12919 op0 = copy_to_mode_reg (Pmode, op0);
12920 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
12921 op1 = copy_to_mode_reg (mode1, op1);
12923 pat = GEN_FCN (d->icode) (op0, op1, op2);
12924 if (pat != 0)
12925 emit_insn (pat);
12927 return NULL_RTX;
12930 return NULL_RTX;
12933 /* Expand vec_init builtin. */
12934 static rtx
12935 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
12937 enum machine_mode tmode = TYPE_MODE (type);
12938 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
12939 int i, n_elt = GET_MODE_NUNITS (tmode);
12941 gcc_assert (VECTOR_MODE_P (tmode));
12942 gcc_assert (n_elt == call_expr_nargs (exp));
12944 if (!target || !register_operand (target, tmode))
12945 target = gen_reg_rtx (tmode);
12947 /* If we have a vector compromised of a single element, such as V1TImode, do
12948 the initialization directly. */
12949 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
12951 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
12952 emit_move_insn (target, gen_lowpart (tmode, x));
12954 else
12956 rtvec v = rtvec_alloc (n_elt);
12958 for (i = 0; i < n_elt; ++i)
12960 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
12961 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
12964 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
12967 return target;
12970 /* Return the integer constant in ARG. Constrain it to be in the range
12971 of the subparts of VEC_TYPE; issue an error if not. */
12973 static int
12974 get_element_number (tree vec_type, tree arg)
12976 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
12978 if (!tree_fits_uhwi_p (arg)
12979 || (elt = tree_to_uhwi (arg), elt > max))
12981 error ("selector must be an integer constant in the range 0..%wi", max);
12982 return 0;
12985 return elt;
12988 /* Expand vec_set builtin. */
12989 static rtx
12990 altivec_expand_vec_set_builtin (tree exp)
12992 enum machine_mode tmode, mode1;
12993 tree arg0, arg1, arg2;
12994 int elt;
12995 rtx op0, op1;
12997 arg0 = CALL_EXPR_ARG (exp, 0);
12998 arg1 = CALL_EXPR_ARG (exp, 1);
12999 arg2 = CALL_EXPR_ARG (exp, 2);
13001 tmode = TYPE_MODE (TREE_TYPE (arg0));
13002 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13003 gcc_assert (VECTOR_MODE_P (tmode));
13005 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13006 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13007 elt = get_element_number (TREE_TYPE (arg0), arg2);
13009 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13010 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13012 op0 = force_reg (tmode, op0);
13013 op1 = force_reg (mode1, op1);
13015 rs6000_expand_vector_set (op0, op1, elt);
13017 return op0;
13020 /* Expand vec_ext builtin. */
13021 static rtx
13022 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13024 enum machine_mode tmode, mode0;
13025 tree arg0, arg1;
13026 int elt;
13027 rtx op0;
13029 arg0 = CALL_EXPR_ARG (exp, 0);
13030 arg1 = CALL_EXPR_ARG (exp, 1);
13032 op0 = expand_normal (arg0);
13033 elt = get_element_number (TREE_TYPE (arg0), arg1);
13035 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13036 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13037 gcc_assert (VECTOR_MODE_P (mode0));
13039 op0 = force_reg (mode0, op0);
13041 if (optimize || !target || !register_operand (target, tmode))
13042 target = gen_reg_rtx (tmode);
13044 rs6000_expand_vector_extract (target, op0, elt);
13046 return target;
13049 /* Expand the builtin in EXP and store the result in TARGET. Store
13050 true in *EXPANDEDP if we found a builtin to expand. */
13051 static rtx
13052 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13054 const struct builtin_description *d;
13055 size_t i;
13056 enum insn_code icode;
13057 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13058 tree arg0;
13059 rtx op0, pat;
13060 enum machine_mode tmode, mode0;
13061 enum rs6000_builtins fcode
13062 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13064 if (rs6000_overloaded_builtin_p (fcode))
13066 *expandedp = true;
13067 error ("unresolved overload for Altivec builtin %qF", fndecl);
13069 /* Given it is invalid, just generate a normal call. */
13070 return expand_call (exp, target, false);
13073 target = altivec_expand_ld_builtin (exp, target, expandedp);
13074 if (*expandedp)
13075 return target;
13077 target = altivec_expand_st_builtin (exp, target, expandedp);
13078 if (*expandedp)
13079 return target;
13081 target = altivec_expand_dst_builtin (exp, target, expandedp);
13082 if (*expandedp)
13083 return target;
13085 *expandedp = true;
13087 switch (fcode)
13089 case ALTIVEC_BUILTIN_STVX_V2DF:
13090 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13091 case ALTIVEC_BUILTIN_STVX_V2DI:
13092 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13093 case ALTIVEC_BUILTIN_STVX_V4SF:
13094 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13095 case ALTIVEC_BUILTIN_STVX:
13096 case ALTIVEC_BUILTIN_STVX_V4SI:
13097 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13098 case ALTIVEC_BUILTIN_STVX_V8HI:
13099 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13100 case ALTIVEC_BUILTIN_STVX_V16QI:
13101 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13102 case ALTIVEC_BUILTIN_STVEBX:
13103 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13104 case ALTIVEC_BUILTIN_STVEHX:
13105 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13106 case ALTIVEC_BUILTIN_STVEWX:
13107 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13108 case ALTIVEC_BUILTIN_STVXL_V2DF:
13109 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13110 case ALTIVEC_BUILTIN_STVXL_V2DI:
13111 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13112 case ALTIVEC_BUILTIN_STVXL_V4SF:
13113 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13114 case ALTIVEC_BUILTIN_STVXL:
13115 case ALTIVEC_BUILTIN_STVXL_V4SI:
13116 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13117 case ALTIVEC_BUILTIN_STVXL_V8HI:
13118 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13119 case ALTIVEC_BUILTIN_STVXL_V16QI:
13120 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13122 case ALTIVEC_BUILTIN_STVLX:
13123 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13124 case ALTIVEC_BUILTIN_STVLXL:
13125 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13126 case ALTIVEC_BUILTIN_STVRX:
13127 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13128 case ALTIVEC_BUILTIN_STVRXL:
13129 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13131 case VSX_BUILTIN_STXVD2X_V1TI:
13132 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13133 case VSX_BUILTIN_STXVD2X_V2DF:
13134 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13135 case VSX_BUILTIN_STXVD2X_V2DI:
13136 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13137 case VSX_BUILTIN_STXVW4X_V4SF:
13138 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13139 case VSX_BUILTIN_STXVW4X_V4SI:
13140 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13141 case VSX_BUILTIN_STXVW4X_V8HI:
13142 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13143 case VSX_BUILTIN_STXVW4X_V16QI:
13144 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13146 case ALTIVEC_BUILTIN_MFVSCR:
13147 icode = CODE_FOR_altivec_mfvscr;
13148 tmode = insn_data[icode].operand[0].mode;
13150 if (target == 0
13151 || GET_MODE (target) != tmode
13152 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13153 target = gen_reg_rtx (tmode);
13155 pat = GEN_FCN (icode) (target);
13156 if (! pat)
13157 return 0;
13158 emit_insn (pat);
13159 return target;
13161 case ALTIVEC_BUILTIN_MTVSCR:
13162 icode = CODE_FOR_altivec_mtvscr;
13163 arg0 = CALL_EXPR_ARG (exp, 0);
13164 op0 = expand_normal (arg0);
13165 mode0 = insn_data[icode].operand[0].mode;
13167 /* If we got invalid arguments bail out before generating bad rtl. */
13168 if (arg0 == error_mark_node)
13169 return const0_rtx;
13171 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13172 op0 = copy_to_mode_reg (mode0, op0);
13174 pat = GEN_FCN (icode) (op0);
13175 if (pat)
13176 emit_insn (pat);
13177 return NULL_RTX;
13179 case ALTIVEC_BUILTIN_DSSALL:
13180 emit_insn (gen_altivec_dssall ());
13181 return NULL_RTX;
13183 case ALTIVEC_BUILTIN_DSS:
13184 icode = CODE_FOR_altivec_dss;
13185 arg0 = CALL_EXPR_ARG (exp, 0);
13186 STRIP_NOPS (arg0);
13187 op0 = expand_normal (arg0);
13188 mode0 = insn_data[icode].operand[0].mode;
13190 /* If we got invalid arguments bail out before generating bad rtl. */
13191 if (arg0 == error_mark_node)
13192 return const0_rtx;
13194 if (TREE_CODE (arg0) != INTEGER_CST
13195 || TREE_INT_CST_LOW (arg0) & ~0x3)
13197 error ("argument to dss must be a 2-bit unsigned literal");
13198 return const0_rtx;
13201 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13202 op0 = copy_to_mode_reg (mode0, op0);
13204 emit_insn (gen_altivec_dss (op0));
13205 return NULL_RTX;
13207 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13208 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13209 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13210 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13211 case VSX_BUILTIN_VEC_INIT_V2DF:
13212 case VSX_BUILTIN_VEC_INIT_V2DI:
13213 case VSX_BUILTIN_VEC_INIT_V1TI:
13214 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13216 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13217 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13218 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13219 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13220 case VSX_BUILTIN_VEC_SET_V2DF:
13221 case VSX_BUILTIN_VEC_SET_V2DI:
13222 case VSX_BUILTIN_VEC_SET_V1TI:
13223 return altivec_expand_vec_set_builtin (exp);
13225 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13226 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13227 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13228 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13229 case VSX_BUILTIN_VEC_EXT_V2DF:
13230 case VSX_BUILTIN_VEC_EXT_V2DI:
13231 case VSX_BUILTIN_VEC_EXT_V1TI:
13232 return altivec_expand_vec_ext_builtin (exp, target);
13234 default:
13235 break;
13236 /* Fall through. */
13239 /* Expand abs* operations. */
13240 d = bdesc_abs;
13241 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13242 if (d->code == fcode)
13243 return altivec_expand_abs_builtin (d->icode, exp, target);
13245 /* Expand the AltiVec predicates. */
13246 d = bdesc_altivec_preds;
13247 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13248 if (d->code == fcode)
13249 return altivec_expand_predicate_builtin (d->icode, exp, target);
13251 /* LV* are funky. We initialized them differently. */
13252 switch (fcode)
13254 case ALTIVEC_BUILTIN_LVSL:
13255 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13256 exp, target, false);
13257 case ALTIVEC_BUILTIN_LVSR:
13258 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13259 exp, target, false);
13260 case ALTIVEC_BUILTIN_LVEBX:
13261 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13262 exp, target, false);
13263 case ALTIVEC_BUILTIN_LVEHX:
13264 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13265 exp, target, false);
13266 case ALTIVEC_BUILTIN_LVEWX:
13267 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13268 exp, target, false);
13269 case ALTIVEC_BUILTIN_LVXL_V2DF:
13270 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13271 exp, target, false);
13272 case ALTIVEC_BUILTIN_LVXL_V2DI:
13273 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13274 exp, target, false);
13275 case ALTIVEC_BUILTIN_LVXL_V4SF:
13276 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13277 exp, target, false);
13278 case ALTIVEC_BUILTIN_LVXL:
13279 case ALTIVEC_BUILTIN_LVXL_V4SI:
13280 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13281 exp, target, false);
13282 case ALTIVEC_BUILTIN_LVXL_V8HI:
13283 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13284 exp, target, false);
13285 case ALTIVEC_BUILTIN_LVXL_V16QI:
13286 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13287 exp, target, false);
13288 case ALTIVEC_BUILTIN_LVX_V2DF:
13289 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13290 exp, target, false);
13291 case ALTIVEC_BUILTIN_LVX_V2DI:
13292 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13293 exp, target, false);
13294 case ALTIVEC_BUILTIN_LVX_V4SF:
13295 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13296 exp, target, false);
13297 case ALTIVEC_BUILTIN_LVX:
13298 case ALTIVEC_BUILTIN_LVX_V4SI:
13299 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13300 exp, target, false);
13301 case ALTIVEC_BUILTIN_LVX_V8HI:
13302 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13303 exp, target, false);
13304 case ALTIVEC_BUILTIN_LVX_V16QI:
13305 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13306 exp, target, false);
13307 case ALTIVEC_BUILTIN_LVLX:
13308 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13309 exp, target, true);
13310 case ALTIVEC_BUILTIN_LVLXL:
13311 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13312 exp, target, true);
13313 case ALTIVEC_BUILTIN_LVRX:
13314 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13315 exp, target, true);
13316 case ALTIVEC_BUILTIN_LVRXL:
13317 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13318 exp, target, true);
13319 case VSX_BUILTIN_LXVD2X_V1TI:
13320 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13321 exp, target, false);
13322 case VSX_BUILTIN_LXVD2X_V2DF:
13323 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13324 exp, target, false);
13325 case VSX_BUILTIN_LXVD2X_V2DI:
13326 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13327 exp, target, false);
13328 case VSX_BUILTIN_LXVW4X_V4SF:
13329 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13330 exp, target, false);
13331 case VSX_BUILTIN_LXVW4X_V4SI:
13332 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13333 exp, target, false);
13334 case VSX_BUILTIN_LXVW4X_V8HI:
13335 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13336 exp, target, false);
13337 case VSX_BUILTIN_LXVW4X_V16QI:
13338 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13339 exp, target, false);
13340 break;
13341 default:
13342 break;
13343 /* Fall through. */
13346 *expandedp = false;
13347 return NULL_RTX;
13350 /* Expand the builtin in EXP and store the result in TARGET. Store
13351 true in *EXPANDEDP if we found a builtin to expand. */
13352 static rtx
13353 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13355 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13356 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13357 const struct builtin_description *d;
13358 size_t i;
13360 *expandedp = true;
13362 switch (fcode)
13364 case PAIRED_BUILTIN_STX:
13365 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13366 case PAIRED_BUILTIN_LX:
13367 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13368 default:
13369 break;
13370 /* Fall through. */
13373 /* Expand the paired predicates. */
13374 d = bdesc_paired_preds;
13375 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13376 if (d->code == fcode)
13377 return paired_expand_predicate_builtin (d->icode, exp, target);
13379 *expandedp = false;
13380 return NULL_RTX;
13383 /* Binops that need to be initialized manually, but can be expanded
13384 automagically by rs6000_expand_binop_builtin. */
13385 static const struct builtin_description bdesc_2arg_spe[] =
13387 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13388 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13389 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13390 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13391 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13392 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13393 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13394 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13395 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13396 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13397 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13398 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13399 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13400 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13401 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13402 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13403 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13404 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13405 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13406 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13407 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13408 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13411 /* Expand the builtin in EXP and store the result in TARGET. Store
13412 true in *EXPANDEDP if we found a builtin to expand.
13414 This expands the SPE builtins that are not simple unary and binary
13415 operations. */
13416 static rtx
13417 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13419 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13420 tree arg1, arg0;
13421 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13422 enum insn_code icode;
13423 enum machine_mode tmode, mode0;
13424 rtx pat, op0;
13425 const struct builtin_description *d;
13426 size_t i;
13428 *expandedp = true;
13430 /* Syntax check for a 5-bit unsigned immediate. */
13431 switch (fcode)
13433 case SPE_BUILTIN_EVSTDD:
13434 case SPE_BUILTIN_EVSTDH:
13435 case SPE_BUILTIN_EVSTDW:
13436 case SPE_BUILTIN_EVSTWHE:
13437 case SPE_BUILTIN_EVSTWHO:
13438 case SPE_BUILTIN_EVSTWWE:
13439 case SPE_BUILTIN_EVSTWWO:
13440 arg1 = CALL_EXPR_ARG (exp, 2);
13441 if (TREE_CODE (arg1) != INTEGER_CST
13442 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13444 error ("argument 2 must be a 5-bit unsigned literal");
13445 return const0_rtx;
13447 break;
13448 default:
13449 break;
13452 /* The evsplat*i instructions are not quite generic. */
13453 switch (fcode)
13455 case SPE_BUILTIN_EVSPLATFI:
13456 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13457 exp, target);
13458 case SPE_BUILTIN_EVSPLATI:
13459 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13460 exp, target);
13461 default:
13462 break;
13465 d = bdesc_2arg_spe;
13466 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13467 if (d->code == fcode)
13468 return rs6000_expand_binop_builtin (d->icode, exp, target);
13470 d = bdesc_spe_predicates;
13471 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13472 if (d->code == fcode)
13473 return spe_expand_predicate_builtin (d->icode, exp, target);
13475 d = bdesc_spe_evsel;
13476 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13477 if (d->code == fcode)
13478 return spe_expand_evsel_builtin (d->icode, exp, target);
13480 switch (fcode)
13482 case SPE_BUILTIN_EVSTDDX:
13483 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13484 case SPE_BUILTIN_EVSTDHX:
13485 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13486 case SPE_BUILTIN_EVSTDWX:
13487 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13488 case SPE_BUILTIN_EVSTWHEX:
13489 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13490 case SPE_BUILTIN_EVSTWHOX:
13491 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13492 case SPE_BUILTIN_EVSTWWEX:
13493 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13494 case SPE_BUILTIN_EVSTWWOX:
13495 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13496 case SPE_BUILTIN_EVSTDD:
13497 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13498 case SPE_BUILTIN_EVSTDH:
13499 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13500 case SPE_BUILTIN_EVSTDW:
13501 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13502 case SPE_BUILTIN_EVSTWHE:
13503 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13504 case SPE_BUILTIN_EVSTWHO:
13505 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13506 case SPE_BUILTIN_EVSTWWE:
13507 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13508 case SPE_BUILTIN_EVSTWWO:
13509 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13510 case SPE_BUILTIN_MFSPEFSCR:
13511 icode = CODE_FOR_spe_mfspefscr;
13512 tmode = insn_data[icode].operand[0].mode;
13514 if (target == 0
13515 || GET_MODE (target) != tmode
13516 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13517 target = gen_reg_rtx (tmode);
13519 pat = GEN_FCN (icode) (target);
13520 if (! pat)
13521 return 0;
13522 emit_insn (pat);
13523 return target;
13524 case SPE_BUILTIN_MTSPEFSCR:
13525 icode = CODE_FOR_spe_mtspefscr;
13526 arg0 = CALL_EXPR_ARG (exp, 0);
13527 op0 = expand_normal (arg0);
13528 mode0 = insn_data[icode].operand[0].mode;
13530 if (arg0 == error_mark_node)
13531 return const0_rtx;
13533 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13534 op0 = copy_to_mode_reg (mode0, op0);
13536 pat = GEN_FCN (icode) (op0);
13537 if (pat)
13538 emit_insn (pat);
13539 return NULL_RTX;
13540 default:
13541 break;
13544 *expandedp = false;
13545 return NULL_RTX;
13548 static rtx
13549 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13551 rtx pat, scratch, tmp;
13552 tree form = CALL_EXPR_ARG (exp, 0);
13553 tree arg0 = CALL_EXPR_ARG (exp, 1);
13554 tree arg1 = CALL_EXPR_ARG (exp, 2);
13555 rtx op0 = expand_normal (arg0);
13556 rtx op1 = expand_normal (arg1);
13557 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13558 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13559 int form_int;
13560 enum rtx_code code;
13562 if (TREE_CODE (form) != INTEGER_CST)
13564 error ("argument 1 of __builtin_paired_predicate must be a constant");
13565 return const0_rtx;
13567 else
13568 form_int = TREE_INT_CST_LOW (form);
13570 gcc_assert (mode0 == mode1);
13572 if (arg0 == error_mark_node || arg1 == error_mark_node)
13573 return const0_rtx;
13575 if (target == 0
13576 || GET_MODE (target) != SImode
13577 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13578 target = gen_reg_rtx (SImode);
13579 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13580 op0 = copy_to_mode_reg (mode0, op0);
13581 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13582 op1 = copy_to_mode_reg (mode1, op1);
13584 scratch = gen_reg_rtx (CCFPmode);
13586 pat = GEN_FCN (icode) (scratch, op0, op1);
13587 if (!pat)
13588 return const0_rtx;
13590 emit_insn (pat);
13592 switch (form_int)
13594 /* LT bit. */
13595 case 0:
13596 code = LT;
13597 break;
13598 /* GT bit. */
13599 case 1:
13600 code = GT;
13601 break;
13602 /* EQ bit. */
13603 case 2:
13604 code = EQ;
13605 break;
13606 /* UN bit. */
13607 case 3:
13608 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13609 return target;
13610 default:
13611 error ("argument 1 of __builtin_paired_predicate is out of range");
13612 return const0_rtx;
13615 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13616 emit_move_insn (target, tmp);
13617 return target;
13620 static rtx
13621 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13623 rtx pat, scratch, tmp;
13624 tree form = CALL_EXPR_ARG (exp, 0);
13625 tree arg0 = CALL_EXPR_ARG (exp, 1);
13626 tree arg1 = CALL_EXPR_ARG (exp, 2);
13627 rtx op0 = expand_normal (arg0);
13628 rtx op1 = expand_normal (arg1);
13629 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13630 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13631 int form_int;
13632 enum rtx_code code;
13634 if (TREE_CODE (form) != INTEGER_CST)
13636 error ("argument 1 of __builtin_spe_predicate must be a constant");
13637 return const0_rtx;
13639 else
13640 form_int = TREE_INT_CST_LOW (form);
13642 gcc_assert (mode0 == mode1);
13644 if (arg0 == error_mark_node || arg1 == error_mark_node)
13645 return const0_rtx;
13647 if (target == 0
13648 || GET_MODE (target) != SImode
13649 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13650 target = gen_reg_rtx (SImode);
13652 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13653 op0 = copy_to_mode_reg (mode0, op0);
13654 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13655 op1 = copy_to_mode_reg (mode1, op1);
13657 scratch = gen_reg_rtx (CCmode);
13659 pat = GEN_FCN (icode) (scratch, op0, op1);
13660 if (! pat)
13661 return const0_rtx;
13662 emit_insn (pat);
13664 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13665 _lower_. We use one compare, but look in different bits of the
13666 CR for each variant.
13668 There are 2 elements in each SPE simd type (upper/lower). The CR
13669 bits are set as follows:
13671 BIT0 | BIT 1 | BIT 2 | BIT 3
13672 U | L | (U | L) | (U & L)
13674 So, for an "all" relationship, BIT 3 would be set.
13675 For an "any" relationship, BIT 2 would be set. Etc.
13677 Following traditional nomenclature, these bits map to:
13679 BIT0 | BIT 1 | BIT 2 | BIT 3
13680 LT | GT | EQ | OV
13682 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13685 switch (form_int)
13687 /* All variant. OV bit. */
13688 case 0:
13689 /* We need to get to the OV bit, which is the ORDERED bit. We
13690 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13691 that's ugly and will make validate_condition_mode die.
13692 So let's just use another pattern. */
13693 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13694 return target;
13695 /* Any variant. EQ bit. */
13696 case 1:
13697 code = EQ;
13698 break;
13699 /* Upper variant. LT bit. */
13700 case 2:
13701 code = LT;
13702 break;
13703 /* Lower variant. GT bit. */
13704 case 3:
13705 code = GT;
13706 break;
13707 default:
13708 error ("argument 1 of __builtin_spe_predicate is out of range");
13709 return const0_rtx;
13712 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13713 emit_move_insn (target, tmp);
13715 return target;
13718 /* The evsel builtins look like this:
13720 e = __builtin_spe_evsel_OP (a, b, c, d);
13722 and work like this:
13724 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13725 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13728 static rtx
13729 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13731 rtx pat, scratch;
13732 tree arg0 = CALL_EXPR_ARG (exp, 0);
13733 tree arg1 = CALL_EXPR_ARG (exp, 1);
13734 tree arg2 = CALL_EXPR_ARG (exp, 2);
13735 tree arg3 = CALL_EXPR_ARG (exp, 3);
13736 rtx op0 = expand_normal (arg0);
13737 rtx op1 = expand_normal (arg1);
13738 rtx op2 = expand_normal (arg2);
13739 rtx op3 = expand_normal (arg3);
13740 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
13741 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
13743 gcc_assert (mode0 == mode1);
13745 if (arg0 == error_mark_node || arg1 == error_mark_node
13746 || arg2 == error_mark_node || arg3 == error_mark_node)
13747 return const0_rtx;
13749 if (target == 0
13750 || GET_MODE (target) != mode0
13751 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13752 target = gen_reg_rtx (mode0);
13754 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13755 op0 = copy_to_mode_reg (mode0, op0);
13756 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13757 op1 = copy_to_mode_reg (mode0, op1);
13758 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13759 op2 = copy_to_mode_reg (mode0, op2);
13760 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13761 op3 = copy_to_mode_reg (mode0, op3);
13763 /* Generate the compare. */
13764 scratch = gen_reg_rtx (CCmode);
13765 pat = GEN_FCN (icode) (scratch, op0, op1);
13766 if (! pat)
13767 return const0_rtx;
13768 emit_insn (pat);
13770 if (mode0 == V2SImode)
13771 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13772 else
13773 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13775 return target;
13778 /* Raise an error message for a builtin function that is called without the
13779 appropriate target options being set. */
13781 static void
13782 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13784 size_t uns_fncode = (size_t)fncode;
13785 const char *name = rs6000_builtin_info[uns_fncode].name;
13786 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13788 gcc_assert (name != NULL);
13789 if ((fnmask & RS6000_BTM_CELL) != 0)
13790 error ("Builtin function %s is only valid for the cell processor", name);
13791 else if ((fnmask & RS6000_BTM_VSX) != 0)
13792 error ("Builtin function %s requires the -mvsx option", name);
13793 else if ((fnmask & RS6000_BTM_HTM) != 0)
13794 error ("Builtin function %s requires the -mhtm option", name);
13795 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13796 error ("Builtin function %s requires the -maltivec option", name);
13797 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13798 error ("Builtin function %s requires the -mpaired option", name);
13799 else if ((fnmask & RS6000_BTM_SPE) != 0)
13800 error ("Builtin function %s requires the -mspe option", name);
13801 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13802 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13803 error ("Builtin function %s requires the -mhard-dfp and"
13804 " -mpower8-vector options", name);
13805 else if ((fnmask & RS6000_BTM_DFP) != 0)
13806 error ("Builtin function %s requires the -mhard-dfp option", name);
13807 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13808 error ("Builtin function %s requires the -mpower8-vector option", name);
13809 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13810 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13811 error ("Builtin function %s requires the -mhard-float and"
13812 " -mlong-double-128 options", name);
13813 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13814 error ("Builtin function %s requires the -mhard-float option", name);
13815 else
13816 error ("Builtin function %s is not supported with the current options",
13817 name);
13820 /* Expand an expression EXP that calls a built-in function,
13821 with result going to TARGET if that's convenient
13822 (and in mode MODE if that's convenient).
13823 SUBTARGET may be used as the target for computing one of EXP's operands.
13824 IGNORE is nonzero if the value is to be ignored. */
13826 static rtx
13827 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13828 enum machine_mode mode ATTRIBUTE_UNUSED,
13829 int ignore ATTRIBUTE_UNUSED)
13831 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13832 enum rs6000_builtins fcode
13833 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
13834 size_t uns_fcode = (size_t)fcode;
13835 const struct builtin_description *d;
13836 size_t i;
13837 rtx ret;
13838 bool success;
13839 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
13840 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
13842 if (TARGET_DEBUG_BUILTIN)
13844 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
13845 const char *name1 = rs6000_builtin_info[uns_fcode].name;
13846 const char *name2 = ((icode != CODE_FOR_nothing)
13847 ? get_insn_name ((int)icode)
13848 : "nothing");
13849 const char *name3;
13851 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
13853 default: name3 = "unknown"; break;
13854 case RS6000_BTC_SPECIAL: name3 = "special"; break;
13855 case RS6000_BTC_UNARY: name3 = "unary"; break;
13856 case RS6000_BTC_BINARY: name3 = "binary"; break;
13857 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
13858 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
13859 case RS6000_BTC_ABS: name3 = "abs"; break;
13860 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
13861 case RS6000_BTC_DST: name3 = "dst"; break;
13865 fprintf (stderr,
13866 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
13867 (name1) ? name1 : "---", fcode,
13868 (name2) ? name2 : "---", (int)icode,
13869 name3,
13870 func_valid_p ? "" : ", not valid");
13873 if (!func_valid_p)
13875 rs6000_invalid_builtin (fcode);
13877 /* Given it is invalid, just generate a normal call. */
13878 return expand_call (exp, target, ignore);
13881 switch (fcode)
13883 case RS6000_BUILTIN_RECIP:
13884 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
13886 case RS6000_BUILTIN_RECIPF:
13887 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
13889 case RS6000_BUILTIN_RSQRTF:
13890 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
13892 case RS6000_BUILTIN_RSQRT:
13893 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
13895 case POWER7_BUILTIN_BPERMD:
13896 return rs6000_expand_binop_builtin (((TARGET_64BIT)
13897 ? CODE_FOR_bpermd_di
13898 : CODE_FOR_bpermd_si), exp, target);
13900 case RS6000_BUILTIN_GET_TB:
13901 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
13902 target);
13904 case RS6000_BUILTIN_MFTB:
13905 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
13906 ? CODE_FOR_rs6000_mftb_di
13907 : CODE_FOR_rs6000_mftb_si),
13908 target);
13910 case RS6000_BUILTIN_MFFS:
13911 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
13913 case RS6000_BUILTIN_MTFSF:
13914 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
13916 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
13917 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
13919 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
13920 : (int) CODE_FOR_altivec_lvsl_direct);
13921 enum machine_mode tmode = insn_data[icode].operand[0].mode;
13922 enum machine_mode mode = insn_data[icode].operand[1].mode;
13923 tree arg;
13924 rtx op, addr, pat;
13926 gcc_assert (TARGET_ALTIVEC);
13928 arg = CALL_EXPR_ARG (exp, 0);
13929 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
13930 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
13931 addr = memory_address (mode, op);
13932 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
13933 op = addr;
13934 else
13936 /* For the load case need to negate the address. */
13937 op = gen_reg_rtx (GET_MODE (addr));
13938 emit_insn (gen_rtx_SET (VOIDmode, op,
13939 gen_rtx_NEG (GET_MODE (addr), addr)));
13941 op = gen_rtx_MEM (mode, op);
13943 if (target == 0
13944 || GET_MODE (target) != tmode
13945 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13946 target = gen_reg_rtx (tmode);
13948 pat = GEN_FCN (icode) (target, op);
13949 if (!pat)
13950 return 0;
13951 emit_insn (pat);
13953 return target;
13956 case ALTIVEC_BUILTIN_VCFUX:
13957 case ALTIVEC_BUILTIN_VCFSX:
13958 case ALTIVEC_BUILTIN_VCTUXS:
13959 case ALTIVEC_BUILTIN_VCTSXS:
13960 /* FIXME: There's got to be a nicer way to handle this case than
13961 constructing a new CALL_EXPR. */
13962 if (call_expr_nargs (exp) == 1)
13964 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
13965 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
13967 break;
13969 default:
13970 break;
13973 if (TARGET_ALTIVEC)
13975 ret = altivec_expand_builtin (exp, target, &success);
13977 if (success)
13978 return ret;
13980 if (TARGET_SPE)
13982 ret = spe_expand_builtin (exp, target, &success);
13984 if (success)
13985 return ret;
13987 if (TARGET_PAIRED_FLOAT)
13989 ret = paired_expand_builtin (exp, target, &success);
13991 if (success)
13992 return ret;
13994 if (TARGET_HTM)
13996 ret = htm_expand_builtin (exp, target, &success);
13998 if (success)
13999 return ret;
14002 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14003 gcc_assert (attr == RS6000_BTC_UNARY
14004 || attr == RS6000_BTC_BINARY
14005 || attr == RS6000_BTC_TERNARY);
14007 /* Handle simple unary operations. */
14008 d = bdesc_1arg;
14009 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14010 if (d->code == fcode)
14011 return rs6000_expand_unop_builtin (d->icode, exp, target);
14013 /* Handle simple binary operations. */
14014 d = bdesc_2arg;
14015 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14016 if (d->code == fcode)
14017 return rs6000_expand_binop_builtin (d->icode, exp, target);
14019 /* Handle simple ternary operations. */
14020 d = bdesc_3arg;
14021 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14022 if (d->code == fcode)
14023 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14025 gcc_unreachable ();
14028 static void
14029 rs6000_init_builtins (void)
14031 tree tdecl;
14032 tree ftype;
14033 enum machine_mode mode;
14035 if (TARGET_DEBUG_BUILTIN)
14036 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14037 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14038 (TARGET_SPE) ? ", spe" : "",
14039 (TARGET_ALTIVEC) ? ", altivec" : "",
14040 (TARGET_VSX) ? ", vsx" : "");
14042 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14043 V2SF_type_node = build_vector_type (float_type_node, 2);
14044 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14045 V2DF_type_node = build_vector_type (double_type_node, 2);
14046 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14047 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14048 V4SF_type_node = build_vector_type (float_type_node, 4);
14049 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14050 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14052 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14053 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14054 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14055 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14057 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14058 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14059 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14060 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14062 /* We use V1TI mode as a special container to hold __int128_t items that
14063 must live in VSX registers. */
14064 if (intTI_type_node)
14066 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14067 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14070 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14071 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14072 'vector unsigned short'. */
14074 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14075 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14076 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14077 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14078 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14080 long_integer_type_internal_node = long_integer_type_node;
14081 long_unsigned_type_internal_node = long_unsigned_type_node;
14082 long_long_integer_type_internal_node = long_long_integer_type_node;
14083 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14084 intQI_type_internal_node = intQI_type_node;
14085 uintQI_type_internal_node = unsigned_intQI_type_node;
14086 intHI_type_internal_node = intHI_type_node;
14087 uintHI_type_internal_node = unsigned_intHI_type_node;
14088 intSI_type_internal_node = intSI_type_node;
14089 uintSI_type_internal_node = unsigned_intSI_type_node;
14090 intDI_type_internal_node = intDI_type_node;
14091 uintDI_type_internal_node = unsigned_intDI_type_node;
14092 intTI_type_internal_node = intTI_type_node;
14093 uintTI_type_internal_node = unsigned_intTI_type_node;
14094 float_type_internal_node = float_type_node;
14095 double_type_internal_node = double_type_node;
14096 long_double_type_internal_node = long_double_type_node;
14097 dfloat64_type_internal_node = dfloat64_type_node;
14098 dfloat128_type_internal_node = dfloat128_type_node;
14099 void_type_internal_node = void_type_node;
14101 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14102 tree type node. */
14103 builtin_mode_to_type[QImode][0] = integer_type_node;
14104 builtin_mode_to_type[HImode][0] = integer_type_node;
14105 builtin_mode_to_type[SImode][0] = intSI_type_node;
14106 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14107 builtin_mode_to_type[DImode][0] = intDI_type_node;
14108 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14109 builtin_mode_to_type[TImode][0] = intTI_type_node;
14110 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14111 builtin_mode_to_type[SFmode][0] = float_type_node;
14112 builtin_mode_to_type[DFmode][0] = double_type_node;
14113 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14114 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14115 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14116 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14117 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14118 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14119 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14120 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14121 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14122 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14123 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14124 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14125 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14126 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14127 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14128 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14129 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14130 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14132 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14133 TYPE_NAME (bool_char_type_node) = tdecl;
14135 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14136 TYPE_NAME (bool_short_type_node) = tdecl;
14138 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14139 TYPE_NAME (bool_int_type_node) = tdecl;
14141 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14142 TYPE_NAME (pixel_type_node) = tdecl;
14144 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14145 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14146 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14147 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14148 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14150 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14151 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14153 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14154 TYPE_NAME (V16QI_type_node) = tdecl;
14156 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14157 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14159 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14160 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14162 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14163 TYPE_NAME (V8HI_type_node) = tdecl;
14165 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14166 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14168 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14169 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14171 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14172 TYPE_NAME (V4SI_type_node) = tdecl;
14174 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14175 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14177 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14178 TYPE_NAME (V4SF_type_node) = tdecl;
14180 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14181 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14183 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14184 TYPE_NAME (V2DF_type_node) = tdecl;
14186 if (TARGET_POWERPC64)
14188 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14189 TYPE_NAME (V2DI_type_node) = tdecl;
14191 tdecl = add_builtin_type ("__vector unsigned long",
14192 unsigned_V2DI_type_node);
14193 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14195 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14196 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14198 else
14200 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14201 TYPE_NAME (V2DI_type_node) = tdecl;
14203 tdecl = add_builtin_type ("__vector unsigned long long",
14204 unsigned_V2DI_type_node);
14205 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14207 tdecl = add_builtin_type ("__vector __bool long long",
14208 bool_V2DI_type_node);
14209 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14212 if (V1TI_type_node)
14214 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14215 TYPE_NAME (V1TI_type_node) = tdecl;
14217 tdecl = add_builtin_type ("__vector unsigned __int128",
14218 unsigned_V1TI_type_node);
14219 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14222 /* Paired and SPE builtins are only available if you build a compiler with
14223 the appropriate options, so only create those builtins with the
14224 appropriate compiler option. Create Altivec and VSX builtins on machines
14225 with at least the general purpose extensions (970 and newer) to allow the
14226 use of the target attribute. */
14227 if (TARGET_PAIRED_FLOAT)
14228 paired_init_builtins ();
14229 if (TARGET_SPE)
14230 spe_init_builtins ();
14231 if (TARGET_EXTRA_BUILTINS)
14232 altivec_init_builtins ();
14233 if (TARGET_HTM)
14234 htm_init_builtins ();
14236 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14237 rs6000_common_init_builtins ();
14239 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14240 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14241 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14243 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14244 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14245 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14247 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14248 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14249 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14251 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14252 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14253 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14255 mode = (TARGET_64BIT) ? DImode : SImode;
14256 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14257 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14258 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14260 ftype = build_function_type_list (unsigned_intDI_type_node,
14261 NULL_TREE);
14262 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14264 if (TARGET_64BIT)
14265 ftype = build_function_type_list (unsigned_intDI_type_node,
14266 NULL_TREE);
14267 else
14268 ftype = build_function_type_list (unsigned_intSI_type_node,
14269 NULL_TREE);
14270 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14272 ftype = build_function_type_list (double_type_node, NULL_TREE);
14273 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14275 ftype = build_function_type_list (void_type_node,
14276 intSI_type_node, double_type_node,
14277 NULL_TREE);
14278 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14280 #if TARGET_XCOFF
14281 /* AIX libm provides clog as __clog. */
14282 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14283 set_user_assembler_name (tdecl, "__clog");
14284 #endif
14286 #ifdef SUBTARGET_INIT_BUILTINS
14287 SUBTARGET_INIT_BUILTINS;
14288 #endif
14291 /* Returns the rs6000 builtin decl for CODE. */
14293 static tree
14294 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14296 HOST_WIDE_INT fnmask;
14298 if (code >= RS6000_BUILTIN_COUNT)
14299 return error_mark_node;
14301 fnmask = rs6000_builtin_info[code].mask;
14302 if ((fnmask & rs6000_builtin_mask) != fnmask)
14304 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14305 return error_mark_node;
14308 return rs6000_builtin_decls[code];
14311 static void
14312 spe_init_builtins (void)
14314 tree puint_type_node = build_pointer_type (unsigned_type_node);
14315 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14316 const struct builtin_description *d;
14317 size_t i;
14319 tree v2si_ftype_4_v2si
14320 = build_function_type_list (opaque_V2SI_type_node,
14321 opaque_V2SI_type_node,
14322 opaque_V2SI_type_node,
14323 opaque_V2SI_type_node,
14324 opaque_V2SI_type_node,
14325 NULL_TREE);
14327 tree v2sf_ftype_4_v2sf
14328 = build_function_type_list (opaque_V2SF_type_node,
14329 opaque_V2SF_type_node,
14330 opaque_V2SF_type_node,
14331 opaque_V2SF_type_node,
14332 opaque_V2SF_type_node,
14333 NULL_TREE);
14335 tree int_ftype_int_v2si_v2si
14336 = build_function_type_list (integer_type_node,
14337 integer_type_node,
14338 opaque_V2SI_type_node,
14339 opaque_V2SI_type_node,
14340 NULL_TREE);
14342 tree int_ftype_int_v2sf_v2sf
14343 = build_function_type_list (integer_type_node,
14344 integer_type_node,
14345 opaque_V2SF_type_node,
14346 opaque_V2SF_type_node,
14347 NULL_TREE);
14349 tree void_ftype_v2si_puint_int
14350 = build_function_type_list (void_type_node,
14351 opaque_V2SI_type_node,
14352 puint_type_node,
14353 integer_type_node,
14354 NULL_TREE);
14356 tree void_ftype_v2si_puint_char
14357 = build_function_type_list (void_type_node,
14358 opaque_V2SI_type_node,
14359 puint_type_node,
14360 char_type_node,
14361 NULL_TREE);
14363 tree void_ftype_v2si_pv2si_int
14364 = build_function_type_list (void_type_node,
14365 opaque_V2SI_type_node,
14366 opaque_p_V2SI_type_node,
14367 integer_type_node,
14368 NULL_TREE);
14370 tree void_ftype_v2si_pv2si_char
14371 = build_function_type_list (void_type_node,
14372 opaque_V2SI_type_node,
14373 opaque_p_V2SI_type_node,
14374 char_type_node,
14375 NULL_TREE);
14377 tree void_ftype_int
14378 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14380 tree int_ftype_void
14381 = build_function_type_list (integer_type_node, NULL_TREE);
14383 tree v2si_ftype_pv2si_int
14384 = build_function_type_list (opaque_V2SI_type_node,
14385 opaque_p_V2SI_type_node,
14386 integer_type_node,
14387 NULL_TREE);
14389 tree v2si_ftype_puint_int
14390 = build_function_type_list (opaque_V2SI_type_node,
14391 puint_type_node,
14392 integer_type_node,
14393 NULL_TREE);
14395 tree v2si_ftype_pushort_int
14396 = build_function_type_list (opaque_V2SI_type_node,
14397 pushort_type_node,
14398 integer_type_node,
14399 NULL_TREE);
14401 tree v2si_ftype_signed_char
14402 = build_function_type_list (opaque_V2SI_type_node,
14403 signed_char_type_node,
14404 NULL_TREE);
14406 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14408 /* Initialize irregular SPE builtins. */
14410 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14411 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14412 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14413 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14414 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14415 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14416 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14417 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14418 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14419 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14420 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14421 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14422 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14423 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14424 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14425 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14426 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14427 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14429 /* Loads. */
14430 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14431 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14432 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14433 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14434 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14435 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14436 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14437 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14438 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14439 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14440 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14441 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14442 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14443 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14444 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14445 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14446 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14447 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14448 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14449 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14450 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14451 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14453 /* Predicates. */
14454 d = bdesc_spe_predicates;
14455 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14457 tree type;
14459 switch (insn_data[d->icode].operand[1].mode)
14461 case V2SImode:
14462 type = int_ftype_int_v2si_v2si;
14463 break;
14464 case V2SFmode:
14465 type = int_ftype_int_v2sf_v2sf;
14466 break;
14467 default:
14468 gcc_unreachable ();
14471 def_builtin (d->name, type, d->code);
14474 /* Evsel predicates. */
14475 d = bdesc_spe_evsel;
14476 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14478 tree type;
14480 switch (insn_data[d->icode].operand[1].mode)
14482 case V2SImode:
14483 type = v2si_ftype_4_v2si;
14484 break;
14485 case V2SFmode:
14486 type = v2sf_ftype_4_v2sf;
14487 break;
14488 default:
14489 gcc_unreachable ();
14492 def_builtin (d->name, type, d->code);
14496 static void
14497 paired_init_builtins (void)
14499 const struct builtin_description *d;
14500 size_t i;
14502 tree int_ftype_int_v2sf_v2sf
14503 = build_function_type_list (integer_type_node,
14504 integer_type_node,
14505 V2SF_type_node,
14506 V2SF_type_node,
14507 NULL_TREE);
14508 tree pcfloat_type_node =
14509 build_pointer_type (build_qualified_type
14510 (float_type_node, TYPE_QUAL_CONST));
14512 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14513 long_integer_type_node,
14514 pcfloat_type_node,
14515 NULL_TREE);
14516 tree void_ftype_v2sf_long_pcfloat =
14517 build_function_type_list (void_type_node,
14518 V2SF_type_node,
14519 long_integer_type_node,
14520 pcfloat_type_node,
14521 NULL_TREE);
14524 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14525 PAIRED_BUILTIN_LX);
14528 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14529 PAIRED_BUILTIN_STX);
14531 /* Predicates. */
14532 d = bdesc_paired_preds;
14533 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14535 tree type;
14537 if (TARGET_DEBUG_BUILTIN)
14538 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14539 (int)i, get_insn_name (d->icode), (int)d->icode,
14540 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14542 switch (insn_data[d->icode].operand[1].mode)
14544 case V2SFmode:
14545 type = int_ftype_int_v2sf_v2sf;
14546 break;
14547 default:
14548 gcc_unreachable ();
14551 def_builtin (d->name, type, d->code);
14555 static void
14556 altivec_init_builtins (void)
14558 const struct builtin_description *d;
14559 size_t i;
14560 tree ftype;
14561 tree decl;
14563 tree pvoid_type_node = build_pointer_type (void_type_node);
14565 tree pcvoid_type_node
14566 = build_pointer_type (build_qualified_type (void_type_node,
14567 TYPE_QUAL_CONST));
14569 tree int_ftype_opaque
14570 = build_function_type_list (integer_type_node,
14571 opaque_V4SI_type_node, NULL_TREE);
14572 tree opaque_ftype_opaque
14573 = build_function_type_list (integer_type_node, NULL_TREE);
14574 tree opaque_ftype_opaque_int
14575 = build_function_type_list (opaque_V4SI_type_node,
14576 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14577 tree opaque_ftype_opaque_opaque_int
14578 = build_function_type_list (opaque_V4SI_type_node,
14579 opaque_V4SI_type_node, opaque_V4SI_type_node,
14580 integer_type_node, NULL_TREE);
14581 tree int_ftype_int_opaque_opaque
14582 = build_function_type_list (integer_type_node,
14583 integer_type_node, opaque_V4SI_type_node,
14584 opaque_V4SI_type_node, NULL_TREE);
14585 tree int_ftype_int_v4si_v4si
14586 = build_function_type_list (integer_type_node,
14587 integer_type_node, V4SI_type_node,
14588 V4SI_type_node, NULL_TREE);
14589 tree int_ftype_int_v2di_v2di
14590 = build_function_type_list (integer_type_node,
14591 integer_type_node, V2DI_type_node,
14592 V2DI_type_node, NULL_TREE);
14593 tree void_ftype_v4si
14594 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14595 tree v8hi_ftype_void
14596 = build_function_type_list (V8HI_type_node, NULL_TREE);
14597 tree void_ftype_void
14598 = build_function_type_list (void_type_node, NULL_TREE);
14599 tree void_ftype_int
14600 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14602 tree opaque_ftype_long_pcvoid
14603 = build_function_type_list (opaque_V4SI_type_node,
14604 long_integer_type_node, pcvoid_type_node,
14605 NULL_TREE);
14606 tree v16qi_ftype_long_pcvoid
14607 = build_function_type_list (V16QI_type_node,
14608 long_integer_type_node, pcvoid_type_node,
14609 NULL_TREE);
14610 tree v8hi_ftype_long_pcvoid
14611 = build_function_type_list (V8HI_type_node,
14612 long_integer_type_node, pcvoid_type_node,
14613 NULL_TREE);
14614 tree v4si_ftype_long_pcvoid
14615 = build_function_type_list (V4SI_type_node,
14616 long_integer_type_node, pcvoid_type_node,
14617 NULL_TREE);
14618 tree v4sf_ftype_long_pcvoid
14619 = build_function_type_list (V4SF_type_node,
14620 long_integer_type_node, pcvoid_type_node,
14621 NULL_TREE);
14622 tree v2df_ftype_long_pcvoid
14623 = build_function_type_list (V2DF_type_node,
14624 long_integer_type_node, pcvoid_type_node,
14625 NULL_TREE);
14626 tree v2di_ftype_long_pcvoid
14627 = build_function_type_list (V2DI_type_node,
14628 long_integer_type_node, pcvoid_type_node,
14629 NULL_TREE);
14631 tree void_ftype_opaque_long_pvoid
14632 = build_function_type_list (void_type_node,
14633 opaque_V4SI_type_node, long_integer_type_node,
14634 pvoid_type_node, NULL_TREE);
14635 tree void_ftype_v4si_long_pvoid
14636 = build_function_type_list (void_type_node,
14637 V4SI_type_node, long_integer_type_node,
14638 pvoid_type_node, NULL_TREE);
14639 tree void_ftype_v16qi_long_pvoid
14640 = build_function_type_list (void_type_node,
14641 V16QI_type_node, long_integer_type_node,
14642 pvoid_type_node, NULL_TREE);
14643 tree void_ftype_v8hi_long_pvoid
14644 = build_function_type_list (void_type_node,
14645 V8HI_type_node, long_integer_type_node,
14646 pvoid_type_node, NULL_TREE);
14647 tree void_ftype_v4sf_long_pvoid
14648 = build_function_type_list (void_type_node,
14649 V4SF_type_node, long_integer_type_node,
14650 pvoid_type_node, NULL_TREE);
14651 tree void_ftype_v2df_long_pvoid
14652 = build_function_type_list (void_type_node,
14653 V2DF_type_node, long_integer_type_node,
14654 pvoid_type_node, NULL_TREE);
14655 tree void_ftype_v2di_long_pvoid
14656 = build_function_type_list (void_type_node,
14657 V2DI_type_node, long_integer_type_node,
14658 pvoid_type_node, NULL_TREE);
14659 tree int_ftype_int_v8hi_v8hi
14660 = build_function_type_list (integer_type_node,
14661 integer_type_node, V8HI_type_node,
14662 V8HI_type_node, NULL_TREE);
14663 tree int_ftype_int_v16qi_v16qi
14664 = build_function_type_list (integer_type_node,
14665 integer_type_node, V16QI_type_node,
14666 V16QI_type_node, NULL_TREE);
14667 tree int_ftype_int_v4sf_v4sf
14668 = build_function_type_list (integer_type_node,
14669 integer_type_node, V4SF_type_node,
14670 V4SF_type_node, NULL_TREE);
14671 tree int_ftype_int_v2df_v2df
14672 = build_function_type_list (integer_type_node,
14673 integer_type_node, V2DF_type_node,
14674 V2DF_type_node, NULL_TREE);
14675 tree v2di_ftype_v2di
14676 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14677 tree v4si_ftype_v4si
14678 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14679 tree v8hi_ftype_v8hi
14680 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14681 tree v16qi_ftype_v16qi
14682 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14683 tree v4sf_ftype_v4sf
14684 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14685 tree v2df_ftype_v2df
14686 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14687 tree void_ftype_pcvoid_int_int
14688 = build_function_type_list (void_type_node,
14689 pcvoid_type_node, integer_type_node,
14690 integer_type_node, NULL_TREE);
14692 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14693 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14694 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14695 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14696 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14697 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14698 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14699 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14700 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14701 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14702 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14703 ALTIVEC_BUILTIN_LVXL_V2DF);
14704 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14705 ALTIVEC_BUILTIN_LVXL_V2DI);
14706 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14707 ALTIVEC_BUILTIN_LVXL_V4SF);
14708 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14709 ALTIVEC_BUILTIN_LVXL_V4SI);
14710 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14711 ALTIVEC_BUILTIN_LVXL_V8HI);
14712 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14713 ALTIVEC_BUILTIN_LVXL_V16QI);
14714 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14715 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14716 ALTIVEC_BUILTIN_LVX_V2DF);
14717 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14718 ALTIVEC_BUILTIN_LVX_V2DI);
14719 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14720 ALTIVEC_BUILTIN_LVX_V4SF);
14721 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14722 ALTIVEC_BUILTIN_LVX_V4SI);
14723 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14724 ALTIVEC_BUILTIN_LVX_V8HI);
14725 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14726 ALTIVEC_BUILTIN_LVX_V16QI);
14727 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14728 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14729 ALTIVEC_BUILTIN_STVX_V2DF);
14730 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14731 ALTIVEC_BUILTIN_STVX_V2DI);
14732 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14733 ALTIVEC_BUILTIN_STVX_V4SF);
14734 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14735 ALTIVEC_BUILTIN_STVX_V4SI);
14736 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14737 ALTIVEC_BUILTIN_STVX_V8HI);
14738 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14739 ALTIVEC_BUILTIN_STVX_V16QI);
14740 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14741 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14742 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14743 ALTIVEC_BUILTIN_STVXL_V2DF);
14744 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14745 ALTIVEC_BUILTIN_STVXL_V2DI);
14746 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14747 ALTIVEC_BUILTIN_STVXL_V4SF);
14748 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14749 ALTIVEC_BUILTIN_STVXL_V4SI);
14750 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14751 ALTIVEC_BUILTIN_STVXL_V8HI);
14752 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14753 ALTIVEC_BUILTIN_STVXL_V16QI);
14754 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14755 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14756 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14757 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14758 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14759 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14760 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14761 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14762 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14763 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14764 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14765 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14766 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14767 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14768 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14769 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14771 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14772 VSX_BUILTIN_LXVD2X_V2DF);
14773 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14774 VSX_BUILTIN_LXVD2X_V2DI);
14775 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14776 VSX_BUILTIN_LXVW4X_V4SF);
14777 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14778 VSX_BUILTIN_LXVW4X_V4SI);
14779 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14780 VSX_BUILTIN_LXVW4X_V8HI);
14781 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14782 VSX_BUILTIN_LXVW4X_V16QI);
14783 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14784 VSX_BUILTIN_STXVD2X_V2DF);
14785 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14786 VSX_BUILTIN_STXVD2X_V2DI);
14787 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14788 VSX_BUILTIN_STXVW4X_V4SF);
14789 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14790 VSX_BUILTIN_STXVW4X_V4SI);
14791 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14792 VSX_BUILTIN_STXVW4X_V8HI);
14793 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14794 VSX_BUILTIN_STXVW4X_V16QI);
14795 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14796 VSX_BUILTIN_VEC_LD);
14797 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14798 VSX_BUILTIN_VEC_ST);
14800 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14801 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14802 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14804 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14805 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14806 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14807 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14808 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14809 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14810 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14811 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14812 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14813 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14814 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14815 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14817 /* Cell builtins. */
14818 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14819 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
14820 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
14821 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
14823 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
14824 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
14825 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
14826 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
14828 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
14829 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
14830 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
14831 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
14833 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
14834 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
14835 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
14836 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
14838 /* Add the DST variants. */
14839 d = bdesc_dst;
14840 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14841 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
14843 /* Initialize the predicates. */
14844 d = bdesc_altivec_preds;
14845 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14847 enum machine_mode mode1;
14848 tree type;
14850 if (rs6000_overloaded_builtin_p (d->code))
14851 mode1 = VOIDmode;
14852 else
14853 mode1 = insn_data[d->icode].operand[1].mode;
14855 switch (mode1)
14857 case VOIDmode:
14858 type = int_ftype_int_opaque_opaque;
14859 break;
14860 case V2DImode:
14861 type = int_ftype_int_v2di_v2di;
14862 break;
14863 case V4SImode:
14864 type = int_ftype_int_v4si_v4si;
14865 break;
14866 case V8HImode:
14867 type = int_ftype_int_v8hi_v8hi;
14868 break;
14869 case V16QImode:
14870 type = int_ftype_int_v16qi_v16qi;
14871 break;
14872 case V4SFmode:
14873 type = int_ftype_int_v4sf_v4sf;
14874 break;
14875 case V2DFmode:
14876 type = int_ftype_int_v2df_v2df;
14877 break;
14878 default:
14879 gcc_unreachable ();
14882 def_builtin (d->name, type, d->code);
14885 /* Initialize the abs* operators. */
14886 d = bdesc_abs;
14887 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14889 enum machine_mode mode0;
14890 tree type;
14892 mode0 = insn_data[d->icode].operand[0].mode;
14894 switch (mode0)
14896 case V2DImode:
14897 type = v2di_ftype_v2di;
14898 break;
14899 case V4SImode:
14900 type = v4si_ftype_v4si;
14901 break;
14902 case V8HImode:
14903 type = v8hi_ftype_v8hi;
14904 break;
14905 case V16QImode:
14906 type = v16qi_ftype_v16qi;
14907 break;
14908 case V4SFmode:
14909 type = v4sf_ftype_v4sf;
14910 break;
14911 case V2DFmode:
14912 type = v2df_ftype_v2df;
14913 break;
14914 default:
14915 gcc_unreachable ();
14918 def_builtin (d->name, type, d->code);
14921 /* Initialize target builtin that implements
14922 targetm.vectorize.builtin_mask_for_load. */
14924 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
14925 v16qi_ftype_long_pcvoid,
14926 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
14927 BUILT_IN_MD, NULL, NULL_TREE);
14928 TREE_READONLY (decl) = 1;
14929 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
14930 altivec_builtin_mask_for_load = decl;
14932 /* Access to the vec_init patterns. */
14933 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
14934 integer_type_node, integer_type_node,
14935 integer_type_node, NULL_TREE);
14936 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
14938 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
14939 short_integer_type_node,
14940 short_integer_type_node,
14941 short_integer_type_node,
14942 short_integer_type_node,
14943 short_integer_type_node,
14944 short_integer_type_node,
14945 short_integer_type_node, NULL_TREE);
14946 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
14948 ftype = build_function_type_list (V16QI_type_node, char_type_node,
14949 char_type_node, char_type_node,
14950 char_type_node, char_type_node,
14951 char_type_node, char_type_node,
14952 char_type_node, char_type_node,
14953 char_type_node, char_type_node,
14954 char_type_node, char_type_node,
14955 char_type_node, char_type_node,
14956 char_type_node, NULL_TREE);
14957 def_builtin ("__builtin_vec_init_v16qi", ftype,
14958 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
14960 ftype = build_function_type_list (V4SF_type_node, float_type_node,
14961 float_type_node, float_type_node,
14962 float_type_node, NULL_TREE);
14963 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
14965 /* VSX builtins. */
14966 ftype = build_function_type_list (V2DF_type_node, double_type_node,
14967 double_type_node, NULL_TREE);
14968 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
14970 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
14971 intDI_type_node, NULL_TREE);
14972 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
14974 /* Access to the vec_set patterns. */
14975 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
14976 intSI_type_node,
14977 integer_type_node, NULL_TREE);
14978 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
14980 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
14981 intHI_type_node,
14982 integer_type_node, NULL_TREE);
14983 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
14985 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
14986 intQI_type_node,
14987 integer_type_node, NULL_TREE);
14988 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
14990 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
14991 float_type_node,
14992 integer_type_node, NULL_TREE);
14993 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
14995 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
14996 double_type_node,
14997 integer_type_node, NULL_TREE);
14998 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15000 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15001 intDI_type_node,
15002 integer_type_node, NULL_TREE);
15003 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15005 /* Access to the vec_extract patterns. */
15006 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15007 integer_type_node, NULL_TREE);
15008 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15010 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15011 integer_type_node, NULL_TREE);
15012 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15014 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15015 integer_type_node, NULL_TREE);
15016 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15018 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15019 integer_type_node, NULL_TREE);
15020 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15022 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15023 integer_type_node, NULL_TREE);
15024 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15026 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15027 integer_type_node, NULL_TREE);
15028 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15031 if (V1TI_type_node)
15033 tree v1ti_ftype_long_pcvoid
15034 = build_function_type_list (V1TI_type_node,
15035 long_integer_type_node, pcvoid_type_node,
15036 NULL_TREE);
15037 tree void_ftype_v1ti_long_pvoid
15038 = build_function_type_list (void_type_node,
15039 V1TI_type_node, long_integer_type_node,
15040 pvoid_type_node, NULL_TREE);
15041 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15042 VSX_BUILTIN_LXVD2X_V1TI);
15043 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15044 VSX_BUILTIN_STXVD2X_V1TI);
15045 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15046 NULL_TREE, NULL_TREE);
15047 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15048 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15049 intTI_type_node,
15050 integer_type_node, NULL_TREE);
15051 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15052 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15053 integer_type_node, NULL_TREE);
15054 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15059 static void
15060 htm_init_builtins (void)
15062 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15063 const struct builtin_description *d;
15064 size_t i;
15066 d = bdesc_htm;
15067 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15069 tree op[MAX_HTM_OPERANDS], type;
15070 HOST_WIDE_INT mask = d->mask;
15071 unsigned attr = rs6000_builtin_info[d->code].attr;
15072 bool void_func = (attr & RS6000_BTC_VOID);
15073 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15074 int nopnds = 0;
15075 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15076 : unsigned_type_node;
15078 if ((mask & builtin_mask) != mask)
15080 if (TARGET_DEBUG_BUILTIN)
15081 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15082 continue;
15085 if (d->name == 0)
15087 if (TARGET_DEBUG_BUILTIN)
15088 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15089 (long unsigned) i);
15090 continue;
15093 op[nopnds++] = (void_func) ? void_type_node : argtype;
15095 if (attr_args == RS6000_BTC_UNARY)
15096 op[nopnds++] = argtype;
15097 else if (attr_args == RS6000_BTC_BINARY)
15099 op[nopnds++] = argtype;
15100 op[nopnds++] = argtype;
15102 else if (attr_args == RS6000_BTC_TERNARY)
15104 op[nopnds++] = argtype;
15105 op[nopnds++] = argtype;
15106 op[nopnds++] = argtype;
15109 switch (nopnds)
15111 case 1:
15112 type = build_function_type_list (op[0], NULL_TREE);
15113 break;
15114 case 2:
15115 type = build_function_type_list (op[0], op[1], NULL_TREE);
15116 break;
15117 case 3:
15118 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15119 break;
15120 case 4:
15121 type = build_function_type_list (op[0], op[1], op[2], op[3],
15122 NULL_TREE);
15123 break;
15124 default:
15125 gcc_unreachable ();
15128 def_builtin (d->name, type, d->code);
15132 /* Hash function for builtin functions with up to 3 arguments and a return
15133 type. */
15134 hashval_t
15135 builtin_hasher::hash (builtin_hash_struct *bh)
15137 unsigned ret = 0;
15138 int i;
15140 for (i = 0; i < 4; i++)
15142 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15143 ret = (ret * 2) + bh->uns_p[i];
15146 return ret;
15149 /* Compare builtin hash entries H1 and H2 for equivalence. */
15150 bool
15151 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15153 return ((p1->mode[0] == p2->mode[0])
15154 && (p1->mode[1] == p2->mode[1])
15155 && (p1->mode[2] == p2->mode[2])
15156 && (p1->mode[3] == p2->mode[3])
15157 && (p1->uns_p[0] == p2->uns_p[0])
15158 && (p1->uns_p[1] == p2->uns_p[1])
15159 && (p1->uns_p[2] == p2->uns_p[2])
15160 && (p1->uns_p[3] == p2->uns_p[3]));
15163 /* Map types for builtin functions with an explicit return type and up to 3
15164 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15165 of the argument. */
15166 static tree
15167 builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
15168 enum machine_mode mode_arg1, enum machine_mode mode_arg2,
15169 enum rs6000_builtins builtin, const char *name)
15171 struct builtin_hash_struct h;
15172 struct builtin_hash_struct *h2;
15173 int num_args = 3;
15174 int i;
15175 tree ret_type = NULL_TREE;
15176 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15178 /* Create builtin_hash_table. */
15179 if (builtin_hash_table == NULL)
15180 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15182 h.type = NULL_TREE;
15183 h.mode[0] = mode_ret;
15184 h.mode[1] = mode_arg0;
15185 h.mode[2] = mode_arg1;
15186 h.mode[3] = mode_arg2;
15187 h.uns_p[0] = 0;
15188 h.uns_p[1] = 0;
15189 h.uns_p[2] = 0;
15190 h.uns_p[3] = 0;
15192 /* If the builtin is a type that produces unsigned results or takes unsigned
15193 arguments, and it is returned as a decl for the vectorizer (such as
15194 widening multiplies, permute), make sure the arguments and return value
15195 are type correct. */
15196 switch (builtin)
15198 /* unsigned 1 argument functions. */
15199 case CRYPTO_BUILTIN_VSBOX:
15200 case P8V_BUILTIN_VGBBD:
15201 case MISC_BUILTIN_CDTBCD:
15202 case MISC_BUILTIN_CBCDTD:
15203 h.uns_p[0] = 1;
15204 h.uns_p[1] = 1;
15205 break;
15207 /* unsigned 2 argument functions. */
15208 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15209 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15210 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15211 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15212 case CRYPTO_BUILTIN_VCIPHER:
15213 case CRYPTO_BUILTIN_VCIPHERLAST:
15214 case CRYPTO_BUILTIN_VNCIPHER:
15215 case CRYPTO_BUILTIN_VNCIPHERLAST:
15216 case CRYPTO_BUILTIN_VPMSUMB:
15217 case CRYPTO_BUILTIN_VPMSUMH:
15218 case CRYPTO_BUILTIN_VPMSUMW:
15219 case CRYPTO_BUILTIN_VPMSUMD:
15220 case CRYPTO_BUILTIN_VPMSUM:
15221 case MISC_BUILTIN_ADDG6S:
15222 case MISC_BUILTIN_DIVWEU:
15223 case MISC_BUILTIN_DIVWEUO:
15224 case MISC_BUILTIN_DIVDEU:
15225 case MISC_BUILTIN_DIVDEUO:
15226 h.uns_p[0] = 1;
15227 h.uns_p[1] = 1;
15228 h.uns_p[2] = 1;
15229 break;
15231 /* unsigned 3 argument functions. */
15232 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15233 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15234 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15235 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15236 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15237 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15238 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15239 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15240 case VSX_BUILTIN_VPERM_16QI_UNS:
15241 case VSX_BUILTIN_VPERM_8HI_UNS:
15242 case VSX_BUILTIN_VPERM_4SI_UNS:
15243 case VSX_BUILTIN_VPERM_2DI_UNS:
15244 case VSX_BUILTIN_XXSEL_16QI_UNS:
15245 case VSX_BUILTIN_XXSEL_8HI_UNS:
15246 case VSX_BUILTIN_XXSEL_4SI_UNS:
15247 case VSX_BUILTIN_XXSEL_2DI_UNS:
15248 case CRYPTO_BUILTIN_VPERMXOR:
15249 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15250 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15251 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15252 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15253 case CRYPTO_BUILTIN_VSHASIGMAW:
15254 case CRYPTO_BUILTIN_VSHASIGMAD:
15255 case CRYPTO_BUILTIN_VSHASIGMA:
15256 h.uns_p[0] = 1;
15257 h.uns_p[1] = 1;
15258 h.uns_p[2] = 1;
15259 h.uns_p[3] = 1;
15260 break;
15262 /* signed permute functions with unsigned char mask. */
15263 case ALTIVEC_BUILTIN_VPERM_16QI:
15264 case ALTIVEC_BUILTIN_VPERM_8HI:
15265 case ALTIVEC_BUILTIN_VPERM_4SI:
15266 case ALTIVEC_BUILTIN_VPERM_4SF:
15267 case ALTIVEC_BUILTIN_VPERM_2DI:
15268 case ALTIVEC_BUILTIN_VPERM_2DF:
15269 case VSX_BUILTIN_VPERM_16QI:
15270 case VSX_BUILTIN_VPERM_8HI:
15271 case VSX_BUILTIN_VPERM_4SI:
15272 case VSX_BUILTIN_VPERM_4SF:
15273 case VSX_BUILTIN_VPERM_2DI:
15274 case VSX_BUILTIN_VPERM_2DF:
15275 h.uns_p[3] = 1;
15276 break;
15278 /* unsigned args, signed return. */
15279 case VSX_BUILTIN_XVCVUXDDP_UNS:
15280 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15281 h.uns_p[1] = 1;
15282 break;
15284 /* signed args, unsigned return. */
15285 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15286 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15287 case MISC_BUILTIN_UNPACK_TD:
15288 case MISC_BUILTIN_UNPACK_V1TI:
15289 h.uns_p[0] = 1;
15290 break;
15292 /* unsigned arguments for 128-bit pack instructions. */
15293 case MISC_BUILTIN_PACK_TD:
15294 case MISC_BUILTIN_PACK_V1TI:
15295 h.uns_p[1] = 1;
15296 h.uns_p[2] = 1;
15297 break;
15299 default:
15300 break;
15303 /* Figure out how many args are present. */
15304 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15305 num_args--;
15307 if (num_args == 0)
15308 fatal_error ("internal error: builtin function %s had no type", name);
15310 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15311 if (!ret_type && h.uns_p[0])
15312 ret_type = builtin_mode_to_type[h.mode[0]][0];
15314 if (!ret_type)
15315 fatal_error ("internal error: builtin function %s had an unexpected "
15316 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15318 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15319 arg_type[i] = NULL_TREE;
15321 for (i = 0; i < num_args; i++)
15323 int m = (int) h.mode[i+1];
15324 int uns_p = h.uns_p[i+1];
15326 arg_type[i] = builtin_mode_to_type[m][uns_p];
15327 if (!arg_type[i] && uns_p)
15328 arg_type[i] = builtin_mode_to_type[m][0];
15330 if (!arg_type[i])
15331 fatal_error ("internal error: builtin function %s, argument %d "
15332 "had unexpected argument type %s", name, i,
15333 GET_MODE_NAME (m));
15336 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15337 if (*found == NULL)
15339 h2 = ggc_alloc<builtin_hash_struct> ();
15340 *h2 = h;
15341 *found = h2;
15343 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15344 arg_type[2], NULL_TREE);
15347 return (*found)->type;
15350 static void
15351 rs6000_common_init_builtins (void)
15353 const struct builtin_description *d;
15354 size_t i;
15356 tree opaque_ftype_opaque = NULL_TREE;
15357 tree opaque_ftype_opaque_opaque = NULL_TREE;
15358 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15359 tree v2si_ftype_qi = NULL_TREE;
15360 tree v2si_ftype_v2si_qi = NULL_TREE;
15361 tree v2si_ftype_int_qi = NULL_TREE;
15362 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15364 if (!TARGET_PAIRED_FLOAT)
15366 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15367 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15370 /* Paired and SPE builtins are only available if you build a compiler with
15371 the appropriate options, so only create those builtins with the
15372 appropriate compiler option. Create Altivec and VSX builtins on machines
15373 with at least the general purpose extensions (970 and newer) to allow the
15374 use of the target attribute.. */
15376 if (TARGET_EXTRA_BUILTINS)
15377 builtin_mask |= RS6000_BTM_COMMON;
15379 /* Add the ternary operators. */
15380 d = bdesc_3arg;
15381 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15383 tree type;
15384 HOST_WIDE_INT mask = d->mask;
15386 if ((mask & builtin_mask) != mask)
15388 if (TARGET_DEBUG_BUILTIN)
15389 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15390 continue;
15393 if (rs6000_overloaded_builtin_p (d->code))
15395 if (! (type = opaque_ftype_opaque_opaque_opaque))
15396 type = opaque_ftype_opaque_opaque_opaque
15397 = build_function_type_list (opaque_V4SI_type_node,
15398 opaque_V4SI_type_node,
15399 opaque_V4SI_type_node,
15400 opaque_V4SI_type_node,
15401 NULL_TREE);
15403 else
15405 enum insn_code icode = d->icode;
15406 if (d->name == 0)
15408 if (TARGET_DEBUG_BUILTIN)
15409 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15410 (long unsigned)i);
15412 continue;
15415 if (icode == CODE_FOR_nothing)
15417 if (TARGET_DEBUG_BUILTIN)
15418 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15419 d->name);
15421 continue;
15424 type = builtin_function_type (insn_data[icode].operand[0].mode,
15425 insn_data[icode].operand[1].mode,
15426 insn_data[icode].operand[2].mode,
15427 insn_data[icode].operand[3].mode,
15428 d->code, d->name);
15431 def_builtin (d->name, type, d->code);
15434 /* Add the binary operators. */
15435 d = bdesc_2arg;
15436 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15438 enum machine_mode mode0, mode1, mode2;
15439 tree type;
15440 HOST_WIDE_INT mask = d->mask;
15442 if ((mask & builtin_mask) != mask)
15444 if (TARGET_DEBUG_BUILTIN)
15445 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15446 continue;
15449 if (rs6000_overloaded_builtin_p (d->code))
15451 if (! (type = opaque_ftype_opaque_opaque))
15452 type = opaque_ftype_opaque_opaque
15453 = build_function_type_list (opaque_V4SI_type_node,
15454 opaque_V4SI_type_node,
15455 opaque_V4SI_type_node,
15456 NULL_TREE);
15458 else
15460 enum insn_code icode = d->icode;
15461 if (d->name == 0)
15463 if (TARGET_DEBUG_BUILTIN)
15464 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15465 (long unsigned)i);
15467 continue;
15470 if (icode == CODE_FOR_nothing)
15472 if (TARGET_DEBUG_BUILTIN)
15473 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15474 d->name);
15476 continue;
15479 mode0 = insn_data[icode].operand[0].mode;
15480 mode1 = insn_data[icode].operand[1].mode;
15481 mode2 = insn_data[icode].operand[2].mode;
15483 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15485 if (! (type = v2si_ftype_v2si_qi))
15486 type = v2si_ftype_v2si_qi
15487 = build_function_type_list (opaque_V2SI_type_node,
15488 opaque_V2SI_type_node,
15489 char_type_node,
15490 NULL_TREE);
15493 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15494 && mode2 == QImode)
15496 if (! (type = v2si_ftype_int_qi))
15497 type = v2si_ftype_int_qi
15498 = build_function_type_list (opaque_V2SI_type_node,
15499 integer_type_node,
15500 char_type_node,
15501 NULL_TREE);
15504 else
15505 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15506 d->code, d->name);
15509 def_builtin (d->name, type, d->code);
15512 /* Add the simple unary operators. */
15513 d = bdesc_1arg;
15514 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15516 enum machine_mode mode0, mode1;
15517 tree type;
15518 HOST_WIDE_INT mask = d->mask;
15520 if ((mask & builtin_mask) != mask)
15522 if (TARGET_DEBUG_BUILTIN)
15523 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15524 continue;
15527 if (rs6000_overloaded_builtin_p (d->code))
15529 if (! (type = opaque_ftype_opaque))
15530 type = opaque_ftype_opaque
15531 = build_function_type_list (opaque_V4SI_type_node,
15532 opaque_V4SI_type_node,
15533 NULL_TREE);
15535 else
15537 enum insn_code icode = d->icode;
15538 if (d->name == 0)
15540 if (TARGET_DEBUG_BUILTIN)
15541 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15542 (long unsigned)i);
15544 continue;
15547 if (icode == CODE_FOR_nothing)
15549 if (TARGET_DEBUG_BUILTIN)
15550 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15551 d->name);
15553 continue;
15556 mode0 = insn_data[icode].operand[0].mode;
15557 mode1 = insn_data[icode].operand[1].mode;
15559 if (mode0 == V2SImode && mode1 == QImode)
15561 if (! (type = v2si_ftype_qi))
15562 type = v2si_ftype_qi
15563 = build_function_type_list (opaque_V2SI_type_node,
15564 char_type_node,
15565 NULL_TREE);
15568 else
15569 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15570 d->code, d->name);
15573 def_builtin (d->name, type, d->code);
15577 static void
15578 rs6000_init_libfuncs (void)
15580 if (!TARGET_IEEEQUAD)
15581 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15582 if (!TARGET_XL_COMPAT)
15584 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15585 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15586 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15587 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15589 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15591 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15592 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15593 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15594 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15595 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15596 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15597 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15599 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15600 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15601 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15602 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15603 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15604 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15605 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15606 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15609 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15610 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15612 else
15614 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15615 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15616 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15617 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15619 else
15621 /* 32-bit SVR4 quad floating point routines. */
15623 set_optab_libfunc (add_optab, TFmode, "_q_add");
15624 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15625 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15626 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15627 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15628 if (TARGET_PPC_GPOPT)
15629 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15631 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15632 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15633 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15634 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15635 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15636 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15638 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15639 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15640 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15641 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15642 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15643 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15644 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15645 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15650 /* Expand a block clear operation, and return 1 if successful. Return 0
15651 if we should let the compiler generate normal code.
15653 operands[0] is the destination
15654 operands[1] is the length
15655 operands[3] is the alignment */
15658 expand_block_clear (rtx operands[])
15660 rtx orig_dest = operands[0];
15661 rtx bytes_rtx = operands[1];
15662 rtx align_rtx = operands[3];
15663 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15664 HOST_WIDE_INT align;
15665 HOST_WIDE_INT bytes;
15666 int offset;
15667 int clear_bytes;
15668 int clear_step;
15670 /* If this is not a fixed size move, just call memcpy */
15671 if (! constp)
15672 return 0;
15674 /* This must be a fixed size alignment */
15675 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15676 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15678 /* Anything to clear? */
15679 bytes = INTVAL (bytes_rtx);
15680 if (bytes <= 0)
15681 return 1;
15683 /* Use the builtin memset after a point, to avoid huge code bloat.
15684 When optimize_size, avoid any significant code bloat; calling
15685 memset is about 4 instructions, so allow for one instruction to
15686 load zero and three to do clearing. */
15687 if (TARGET_ALTIVEC && align >= 128)
15688 clear_step = 16;
15689 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15690 clear_step = 8;
15691 else if (TARGET_SPE && align >= 64)
15692 clear_step = 8;
15693 else
15694 clear_step = 4;
15696 if (optimize_size && bytes > 3 * clear_step)
15697 return 0;
15698 if (! optimize_size && bytes > 8 * clear_step)
15699 return 0;
15701 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15703 enum machine_mode mode = BLKmode;
15704 rtx dest;
15706 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15708 clear_bytes = 16;
15709 mode = V4SImode;
15711 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15713 clear_bytes = 8;
15714 mode = V2SImode;
15716 else if (bytes >= 8 && TARGET_POWERPC64
15717 && (align >= 64 || !STRICT_ALIGNMENT))
15719 clear_bytes = 8;
15720 mode = DImode;
15721 if (offset == 0 && align < 64)
15723 rtx addr;
15725 /* If the address form is reg+offset with offset not a
15726 multiple of four, reload into reg indirect form here
15727 rather than waiting for reload. This way we get one
15728 reload, not one per store. */
15729 addr = XEXP (orig_dest, 0);
15730 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15731 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15732 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15734 addr = copy_addr_to_reg (addr);
15735 orig_dest = replace_equiv_address (orig_dest, addr);
15739 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15740 { /* move 4 bytes */
15741 clear_bytes = 4;
15742 mode = SImode;
15744 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15745 { /* move 2 bytes */
15746 clear_bytes = 2;
15747 mode = HImode;
15749 else /* move 1 byte at a time */
15751 clear_bytes = 1;
15752 mode = QImode;
15755 dest = adjust_address (orig_dest, mode, offset);
15757 emit_move_insn (dest, CONST0_RTX (mode));
15760 return 1;
15764 /* Expand a block move operation, and return 1 if successful. Return 0
15765 if we should let the compiler generate normal code.
15767 operands[0] is the destination
15768 operands[1] is the source
15769 operands[2] is the length
15770 operands[3] is the alignment */
15772 #define MAX_MOVE_REG 4
15775 expand_block_move (rtx operands[])
15777 rtx orig_dest = operands[0];
15778 rtx orig_src = operands[1];
15779 rtx bytes_rtx = operands[2];
15780 rtx align_rtx = operands[3];
15781 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15782 int align;
15783 int bytes;
15784 int offset;
15785 int move_bytes;
15786 rtx stores[MAX_MOVE_REG];
15787 int num_reg = 0;
15789 /* If this is not a fixed size move, just call memcpy */
15790 if (! constp)
15791 return 0;
15793 /* This must be a fixed size alignment */
15794 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15795 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15797 /* Anything to move? */
15798 bytes = INTVAL (bytes_rtx);
15799 if (bytes <= 0)
15800 return 1;
15802 if (bytes > rs6000_block_move_inline_limit)
15803 return 0;
15805 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15807 union {
15808 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15809 rtx (*mov) (rtx, rtx);
15810 } gen_func;
15811 enum machine_mode mode = BLKmode;
15812 rtx src, dest;
15814 /* Altivec first, since it will be faster than a string move
15815 when it applies, and usually not significantly larger. */
15816 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
15818 move_bytes = 16;
15819 mode = V4SImode;
15820 gen_func.mov = gen_movv4si;
15822 else if (TARGET_SPE && bytes >= 8 && align >= 64)
15824 move_bytes = 8;
15825 mode = V2SImode;
15826 gen_func.mov = gen_movv2si;
15828 else if (TARGET_STRING
15829 && bytes > 24 /* move up to 32 bytes at a time */
15830 && ! fixed_regs[5]
15831 && ! fixed_regs[6]
15832 && ! fixed_regs[7]
15833 && ! fixed_regs[8]
15834 && ! fixed_regs[9]
15835 && ! fixed_regs[10]
15836 && ! fixed_regs[11]
15837 && ! fixed_regs[12])
15839 move_bytes = (bytes > 32) ? 32 : bytes;
15840 gen_func.movmemsi = gen_movmemsi_8reg;
15842 else if (TARGET_STRING
15843 && bytes > 16 /* move up to 24 bytes at a time */
15844 && ! fixed_regs[5]
15845 && ! fixed_regs[6]
15846 && ! fixed_regs[7]
15847 && ! fixed_regs[8]
15848 && ! fixed_regs[9]
15849 && ! fixed_regs[10])
15851 move_bytes = (bytes > 24) ? 24 : bytes;
15852 gen_func.movmemsi = gen_movmemsi_6reg;
15854 else if (TARGET_STRING
15855 && bytes > 8 /* move up to 16 bytes at a time */
15856 && ! fixed_regs[5]
15857 && ! fixed_regs[6]
15858 && ! fixed_regs[7]
15859 && ! fixed_regs[8])
15861 move_bytes = (bytes > 16) ? 16 : bytes;
15862 gen_func.movmemsi = gen_movmemsi_4reg;
15864 else if (bytes >= 8 && TARGET_POWERPC64
15865 && (align >= 64 || !STRICT_ALIGNMENT))
15867 move_bytes = 8;
15868 mode = DImode;
15869 gen_func.mov = gen_movdi;
15870 if (offset == 0 && align < 64)
15872 rtx addr;
15874 /* If the address form is reg+offset with offset not a
15875 multiple of four, reload into reg indirect form here
15876 rather than waiting for reload. This way we get one
15877 reload, not one per load and/or store. */
15878 addr = XEXP (orig_dest, 0);
15879 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15880 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15881 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15883 addr = copy_addr_to_reg (addr);
15884 orig_dest = replace_equiv_address (orig_dest, addr);
15886 addr = XEXP (orig_src, 0);
15887 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15888 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15889 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15891 addr = copy_addr_to_reg (addr);
15892 orig_src = replace_equiv_address (orig_src, addr);
15896 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
15897 { /* move up to 8 bytes at a time */
15898 move_bytes = (bytes > 8) ? 8 : bytes;
15899 gen_func.movmemsi = gen_movmemsi_2reg;
15901 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15902 { /* move 4 bytes */
15903 move_bytes = 4;
15904 mode = SImode;
15905 gen_func.mov = gen_movsi;
15907 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15908 { /* move 2 bytes */
15909 move_bytes = 2;
15910 mode = HImode;
15911 gen_func.mov = gen_movhi;
15913 else if (TARGET_STRING && bytes > 1)
15914 { /* move up to 4 bytes at a time */
15915 move_bytes = (bytes > 4) ? 4 : bytes;
15916 gen_func.movmemsi = gen_movmemsi_1reg;
15918 else /* move 1 byte at a time */
15920 move_bytes = 1;
15921 mode = QImode;
15922 gen_func.mov = gen_movqi;
15925 src = adjust_address (orig_src, mode, offset);
15926 dest = adjust_address (orig_dest, mode, offset);
15928 if (mode != BLKmode)
15930 rtx tmp_reg = gen_reg_rtx (mode);
15932 emit_insn ((*gen_func.mov) (tmp_reg, src));
15933 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
15936 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
15938 int i;
15939 for (i = 0; i < num_reg; i++)
15940 emit_insn (stores[i]);
15941 num_reg = 0;
15944 if (mode == BLKmode)
15946 /* Move the address into scratch registers. The movmemsi
15947 patterns require zero offset. */
15948 if (!REG_P (XEXP (src, 0)))
15950 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
15951 src = replace_equiv_address (src, src_reg);
15953 set_mem_size (src, move_bytes);
15955 if (!REG_P (XEXP (dest, 0)))
15957 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
15958 dest = replace_equiv_address (dest, dest_reg);
15960 set_mem_size (dest, move_bytes);
15962 emit_insn ((*gen_func.movmemsi) (dest, src,
15963 GEN_INT (move_bytes & 31),
15964 align_rtx));
15968 return 1;
15972 /* Return a string to perform a load_multiple operation.
15973 operands[0] is the vector.
15974 operands[1] is the source address.
15975 operands[2] is the first destination register. */
15977 const char *
15978 rs6000_output_load_multiple (rtx operands[3])
15980 /* We have to handle the case where the pseudo used to contain the address
15981 is assigned to one of the output registers. */
15982 int i, j;
15983 int words = XVECLEN (operands[0], 0);
15984 rtx xop[10];
15986 if (XVECLEN (operands[0], 0) == 1)
15987 return "lwz %2,0(%1)";
15989 for (i = 0; i < words; i++)
15990 if (refers_to_regno_p (REGNO (operands[2]) + i,
15991 REGNO (operands[2]) + i + 1, operands[1], 0))
15993 if (i == words-1)
15995 xop[0] = GEN_INT (4 * (words-1));
15996 xop[1] = operands[1];
15997 xop[2] = operands[2];
15998 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
15999 return "";
16001 else if (i == 0)
16003 xop[0] = GEN_INT (4 * (words-1));
16004 xop[1] = operands[1];
16005 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16006 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16007 return "";
16009 else
16011 for (j = 0; j < words; j++)
16012 if (j != i)
16014 xop[0] = GEN_INT (j * 4);
16015 xop[1] = operands[1];
16016 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16017 output_asm_insn ("lwz %2,%0(%1)", xop);
16019 xop[0] = GEN_INT (i * 4);
16020 xop[1] = operands[1];
16021 output_asm_insn ("lwz %1,%0(%1)", xop);
16022 return "";
16026 return "lswi %2,%1,%N0";
16030 /* A validation routine: say whether CODE, a condition code, and MODE
16031 match. The other alternatives either don't make sense or should
16032 never be generated. */
16034 void
16035 validate_condition_mode (enum rtx_code code, enum machine_mode mode)
16037 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16038 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16039 && GET_MODE_CLASS (mode) == MODE_CC);
16041 /* These don't make sense. */
16042 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16043 || mode != CCUNSmode);
16045 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16046 || mode == CCUNSmode);
16048 gcc_assert (mode == CCFPmode
16049 || (code != ORDERED && code != UNORDERED
16050 && code != UNEQ && code != LTGT
16051 && code != UNGT && code != UNLT
16052 && code != UNGE && code != UNLE));
16054 /* These should never be generated except for
16055 flag_finite_math_only. */
16056 gcc_assert (mode != CCFPmode
16057 || flag_finite_math_only
16058 || (code != LE && code != GE
16059 && code != UNEQ && code != LTGT
16060 && code != UNGT && code != UNLT));
16062 /* These are invalid; the information is not there. */
16063 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16067 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16068 mask required to convert the result of a rotate insn into a shift
16069 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16072 includes_lshift_p (rtx shiftop, rtx andop)
16074 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16076 shift_mask <<= INTVAL (shiftop);
16078 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16081 /* Similar, but for right shift. */
16084 includes_rshift_p (rtx shiftop, rtx andop)
16086 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16088 shift_mask >>= INTVAL (shiftop);
16090 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16093 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16094 to perform a left shift. It must have exactly SHIFTOP least
16095 significant 0's, then one or more 1's, then zero or more 0's. */
16098 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16100 if (GET_CODE (andop) == CONST_INT)
16102 HOST_WIDE_INT c, lsb, shift_mask;
16104 c = INTVAL (andop);
16105 if (c == 0 || c == ~0)
16106 return 0;
16108 shift_mask = ~0;
16109 shift_mask <<= INTVAL (shiftop);
16111 /* Find the least significant one bit. */
16112 lsb = c & -c;
16114 /* It must coincide with the LSB of the shift mask. */
16115 if (-lsb != shift_mask)
16116 return 0;
16118 /* Invert to look for the next transition (if any). */
16119 c = ~c;
16121 /* Remove the low group of ones (originally low group of zeros). */
16122 c &= -lsb;
16124 /* Again find the lsb, and check we have all 1's above. */
16125 lsb = c & -c;
16126 return c == -lsb;
16128 else
16129 return 0;
16132 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16133 to perform a left shift. It must have SHIFTOP or more least
16134 significant 0's, with the remainder of the word 1's. */
16137 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16139 if (GET_CODE (andop) == CONST_INT)
16141 HOST_WIDE_INT c, lsb, shift_mask;
16143 shift_mask = ~0;
16144 shift_mask <<= INTVAL (shiftop);
16145 c = INTVAL (andop);
16147 /* Find the least significant one bit. */
16148 lsb = c & -c;
16150 /* It must be covered by the shift mask.
16151 This test also rejects c == 0. */
16152 if ((lsb & shift_mask) == 0)
16153 return 0;
16155 /* Check we have all 1's above the transition, and reject all 1's. */
16156 return c == -lsb && lsb != 1;
16158 else
16159 return 0;
16162 /* Return 1 if operands will generate a valid arguments to rlwimi
16163 instruction for insert with right shift in 64-bit mode. The mask may
16164 not start on the first bit or stop on the last bit because wrap-around
16165 effects of instruction do not correspond to semantics of RTL insn. */
16168 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16170 if (INTVAL (startop) > 32
16171 && INTVAL (startop) < 64
16172 && INTVAL (sizeop) > 1
16173 && INTVAL (sizeop) + INTVAL (startop) < 64
16174 && INTVAL (shiftop) > 0
16175 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16176 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16177 return 1;
16179 return 0;
16182 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16183 for lfq and stfq insns iff the registers are hard registers. */
16186 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16188 /* We might have been passed a SUBREG. */
16189 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16190 return 0;
16192 /* We might have been passed non floating point registers. */
16193 if (!FP_REGNO_P (REGNO (reg1))
16194 || !FP_REGNO_P (REGNO (reg2)))
16195 return 0;
16197 return (REGNO (reg1) == REGNO (reg2) - 1);
16200 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16201 addr1 and addr2 must be in consecutive memory locations
16202 (addr2 == addr1 + 8). */
16205 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16207 rtx addr1, addr2;
16208 unsigned int reg1, reg2;
16209 int offset1, offset2;
16211 /* The mems cannot be volatile. */
16212 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16213 return 0;
16215 addr1 = XEXP (mem1, 0);
16216 addr2 = XEXP (mem2, 0);
16218 /* Extract an offset (if used) from the first addr. */
16219 if (GET_CODE (addr1) == PLUS)
16221 /* If not a REG, return zero. */
16222 if (GET_CODE (XEXP (addr1, 0)) != REG)
16223 return 0;
16224 else
16226 reg1 = REGNO (XEXP (addr1, 0));
16227 /* The offset must be constant! */
16228 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16229 return 0;
16230 offset1 = INTVAL (XEXP (addr1, 1));
16233 else if (GET_CODE (addr1) != REG)
16234 return 0;
16235 else
16237 reg1 = REGNO (addr1);
16238 /* This was a simple (mem (reg)) expression. Offset is 0. */
16239 offset1 = 0;
16242 /* And now for the second addr. */
16243 if (GET_CODE (addr2) == PLUS)
16245 /* If not a REG, return zero. */
16246 if (GET_CODE (XEXP (addr2, 0)) != REG)
16247 return 0;
16248 else
16250 reg2 = REGNO (XEXP (addr2, 0));
16251 /* The offset must be constant. */
16252 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16253 return 0;
16254 offset2 = INTVAL (XEXP (addr2, 1));
16257 else if (GET_CODE (addr2) != REG)
16258 return 0;
16259 else
16261 reg2 = REGNO (addr2);
16262 /* This was a simple (mem (reg)) expression. Offset is 0. */
16263 offset2 = 0;
16266 /* Both of these must have the same base register. */
16267 if (reg1 != reg2)
16268 return 0;
16270 /* The offset for the second addr must be 8 more than the first addr. */
16271 if (offset2 != offset1 + 8)
16272 return 0;
16274 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16275 instructions. */
16276 return 1;
16281 rs6000_secondary_memory_needed_rtx (enum machine_mode mode)
16283 static bool eliminated = false;
16284 rtx ret;
16286 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16287 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16288 else
16290 rtx mem = cfun->machine->sdmode_stack_slot;
16291 gcc_assert (mem != NULL_RTX);
16293 if (!eliminated)
16295 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16296 cfun->machine->sdmode_stack_slot = mem;
16297 eliminated = true;
16299 ret = mem;
16302 if (TARGET_DEBUG_ADDR)
16304 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16305 GET_MODE_NAME (mode));
16306 if (!ret)
16307 fprintf (stderr, "\tNULL_RTX\n");
16308 else
16309 debug_rtx (ret);
16312 return ret;
16315 /* Return the mode to be used for memory when a secondary memory
16316 location is needed. For SDmode values we need to use DDmode, in
16317 all other cases we can use the same mode. */
16318 enum machine_mode
16319 rs6000_secondary_memory_needed_mode (enum machine_mode mode)
16321 if (lra_in_progress && mode == SDmode)
16322 return DDmode;
16323 return mode;
16326 static tree
16327 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16329 /* Don't walk into types. */
16330 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16332 *walk_subtrees = 0;
16333 return NULL_TREE;
16336 switch (TREE_CODE (*tp))
16338 case VAR_DECL:
16339 case PARM_DECL:
16340 case FIELD_DECL:
16341 case RESULT_DECL:
16342 case SSA_NAME:
16343 case REAL_CST:
16344 case MEM_REF:
16345 case VIEW_CONVERT_EXPR:
16346 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16347 return *tp;
16348 break;
16349 default:
16350 break;
16353 return NULL_TREE;
16356 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16357 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16358 only work on the traditional altivec registers, note if an altivec register
16359 was chosen. */
16361 static enum rs6000_reg_type
16362 register_to_reg_type (rtx reg, bool *is_altivec)
16364 HOST_WIDE_INT regno;
16365 enum reg_class rclass;
16367 if (GET_CODE (reg) == SUBREG)
16368 reg = SUBREG_REG (reg);
16370 if (!REG_P (reg))
16371 return NO_REG_TYPE;
16373 regno = REGNO (reg);
16374 if (regno >= FIRST_PSEUDO_REGISTER)
16376 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16377 return PSEUDO_REG_TYPE;
16379 regno = true_regnum (reg);
16380 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16381 return PSEUDO_REG_TYPE;
16384 gcc_assert (regno >= 0);
16386 if (is_altivec && ALTIVEC_REGNO_P (regno))
16387 *is_altivec = true;
16389 rclass = rs6000_regno_regclass[regno];
16390 return reg_class_to_reg_type[(int)rclass];
16393 /* Helper function for rs6000_secondary_reload to return true if a move to a
16394 different register classe is really a simple move. */
16396 static bool
16397 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16398 enum rs6000_reg_type from_type,
16399 enum machine_mode mode)
16401 int size;
16403 /* Add support for various direct moves available. In this function, we only
16404 look at cases where we don't need any extra registers, and one or more
16405 simple move insns are issued. At present, 32-bit integers are not allowed
16406 in FPR/VSX registers. Single precision binary floating is not a simple
16407 move because we need to convert to the single precision memory layout.
16408 The 4-byte SDmode can be moved. */
16409 size = GET_MODE_SIZE (mode);
16410 if (TARGET_DIRECT_MOVE
16411 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16412 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16413 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16414 return true;
16416 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16417 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16418 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16419 return true;
16421 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16422 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16423 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16424 return true;
16426 return false;
16429 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16430 special direct moves that involve allocating an extra register, return the
16431 insn code of the helper function if there is such a function or
16432 CODE_FOR_nothing if not. */
16434 static bool
16435 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16436 enum rs6000_reg_type from_type,
16437 enum machine_mode mode,
16438 secondary_reload_info *sri,
16439 bool altivec_p)
16441 bool ret = false;
16442 enum insn_code icode = CODE_FOR_nothing;
16443 int cost = 0;
16444 int size = GET_MODE_SIZE (mode);
16446 if (TARGET_POWERPC64)
16448 if (size == 16)
16450 /* Handle moving 128-bit values from GPRs to VSX point registers on
16451 power8 when running in 64-bit mode using XXPERMDI to glue the two
16452 64-bit values back together. */
16453 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16455 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16456 icode = reg_addr[mode].reload_vsx_gpr;
16459 /* Handle moving 128-bit values from VSX point registers to GPRs on
16460 power8 when running in 64-bit mode using XXPERMDI to get access to the
16461 bottom 64-bit value. */
16462 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16464 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16465 icode = reg_addr[mode].reload_gpr_vsx;
16469 else if (mode == SFmode)
16471 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16473 cost = 3; /* xscvdpspn, mfvsrd, and. */
16474 icode = reg_addr[mode].reload_gpr_vsx;
16477 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16479 cost = 2; /* mtvsrz, xscvspdpn. */
16480 icode = reg_addr[mode].reload_vsx_gpr;
16485 if (TARGET_POWERPC64 && size == 16)
16487 /* Handle moving 128-bit values from GPRs to VSX point registers on
16488 power8 when running in 64-bit mode using XXPERMDI to glue the two
16489 64-bit values back together. */
16490 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16492 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16493 icode = reg_addr[mode].reload_vsx_gpr;
16496 /* Handle moving 128-bit values from VSX point registers to GPRs on
16497 power8 when running in 64-bit mode using XXPERMDI to get access to the
16498 bottom 64-bit value. */
16499 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16501 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16502 icode = reg_addr[mode].reload_gpr_vsx;
16506 else if (!TARGET_POWERPC64 && size == 8)
16508 /* Handle moving 64-bit values from GPRs to floating point registers on
16509 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16510 values back together. Altivec register classes must be handled
16511 specially since a different instruction is used, and the secondary
16512 reload support requires a single instruction class in the scratch
16513 register constraint. However, right now TFmode is not allowed in
16514 Altivec registers, so the pattern will never match. */
16515 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16517 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16518 icode = reg_addr[mode].reload_fpr_gpr;
16522 if (icode != CODE_FOR_nothing)
16524 ret = true;
16525 if (sri)
16527 sri->icode = icode;
16528 sri->extra_cost = cost;
16532 return ret;
16535 /* Return whether a move between two register classes can be done either
16536 directly (simple move) or via a pattern that uses a single extra temporary
16537 (using power8's direct move in this case. */
16539 static bool
16540 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16541 enum rs6000_reg_type from_type,
16542 enum machine_mode mode,
16543 secondary_reload_info *sri,
16544 bool altivec_p)
16546 /* Fall back to load/store reloads if either type is not a register. */
16547 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16548 return false;
16550 /* If we haven't allocated registers yet, assume the move can be done for the
16551 standard register types. */
16552 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
16553 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
16554 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
16555 return true;
16557 /* Moves to the same set of registers is a simple move for non-specialized
16558 registers. */
16559 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
16560 return true;
16562 /* Check whether a simple move can be done directly. */
16563 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
16565 if (sri)
16567 sri->icode = CODE_FOR_nothing;
16568 sri->extra_cost = 0;
16570 return true;
16573 /* Now check if we can do it in a few steps. */
16574 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
16575 altivec_p);
16578 /* Inform reload about cases where moving X with a mode MODE to a register in
16579 RCLASS requires an extra scratch or immediate register. Return the class
16580 needed for the immediate register.
16582 For VSX and Altivec, we may need a register to convert sp+offset into
16583 reg+sp.
16585 For misaligned 64-bit gpr loads and stores we need a register to
16586 convert an offset address to indirect. */
16588 static reg_class_t
16589 rs6000_secondary_reload (bool in_p,
16590 rtx x,
16591 reg_class_t rclass_i,
16592 enum machine_mode mode,
16593 secondary_reload_info *sri)
16595 enum reg_class rclass = (enum reg_class) rclass_i;
16596 reg_class_t ret = ALL_REGS;
16597 enum insn_code icode;
16598 bool default_p = false;
16600 sri->icode = CODE_FOR_nothing;
16601 icode = ((in_p)
16602 ? reg_addr[mode].reload_load
16603 : reg_addr[mode].reload_store);
16605 if (REG_P (x) || register_operand (x, mode))
16607 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
16608 bool altivec_p = (rclass == ALTIVEC_REGS);
16609 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
16611 if (!in_p)
16613 enum rs6000_reg_type exchange = to_type;
16614 to_type = from_type;
16615 from_type = exchange;
16618 /* Can we do a direct move of some sort? */
16619 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
16620 altivec_p))
16622 icode = (enum insn_code)sri->icode;
16623 default_p = false;
16624 ret = NO_REGS;
16628 /* Handle vector moves with reload helper functions. */
16629 if (ret == ALL_REGS && icode != CODE_FOR_nothing)
16631 ret = NO_REGS;
16632 sri->icode = CODE_FOR_nothing;
16633 sri->extra_cost = 0;
16635 if (GET_CODE (x) == MEM)
16637 rtx addr = XEXP (x, 0);
16639 /* Loads to and stores from gprs can do reg+offset, and wouldn't need
16640 an extra register in that case, but it would need an extra
16641 register if the addressing is reg+reg or (reg+reg)&(-16). Special
16642 case load/store quad. */
16643 if (rclass == GENERAL_REGS || rclass == BASE_REGS)
16645 if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
16646 && GET_MODE_SIZE (mode) == 16
16647 && quad_memory_operand (x, mode))
16649 sri->icode = icode;
16650 sri->extra_cost = 2;
16653 else if (!legitimate_indirect_address_p (addr, false)
16654 && !rs6000_legitimate_offset_address_p (PTImode, addr,
16655 false, true))
16657 sri->icode = icode;
16658 /* account for splitting the loads, and converting the
16659 address from reg+reg to reg. */
16660 sri->extra_cost = (((TARGET_64BIT) ? 3 : 5)
16661 + ((GET_CODE (addr) == AND) ? 1 : 0));
16664 /* Allow scalar loads to/from the traditional floating point
16665 registers, even if VSX memory is set. */
16666 else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
16667 && (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
16668 && (legitimate_indirect_address_p (addr, false)
16669 || legitimate_indirect_address_p (addr, false)
16670 || rs6000_legitimate_offset_address_p (mode, addr,
16671 false, true)))
16674 /* Loads to and stores from vector registers can only do reg+reg
16675 addressing. Altivec registers can also do (reg+reg)&(-16). Allow
16676 scalar modes loading up the traditional floating point registers
16677 to use offset addresses. */
16678 else if (rclass == VSX_REGS || rclass == ALTIVEC_REGS
16679 || rclass == FLOAT_REGS || rclass == NO_REGS)
16681 if (!VECTOR_MEM_ALTIVEC_P (mode)
16682 && GET_CODE (addr) == AND
16683 && GET_CODE (XEXP (addr, 1)) == CONST_INT
16684 && INTVAL (XEXP (addr, 1)) == -16
16685 && (legitimate_indirect_address_p (XEXP (addr, 0), false)
16686 || legitimate_indexed_address_p (XEXP (addr, 0), false)))
16688 sri->icode = icode;
16689 sri->extra_cost = ((GET_CODE (XEXP (addr, 0)) == PLUS)
16690 ? 2 : 1);
16692 else if (!legitimate_indirect_address_p (addr, false)
16693 && (rclass == NO_REGS
16694 || !legitimate_indexed_address_p (addr, false)))
16696 sri->icode = icode;
16697 sri->extra_cost = 1;
16699 else
16700 icode = CODE_FOR_nothing;
16702 /* Any other loads, including to pseudo registers which haven't been
16703 assigned to a register yet, default to require a scratch
16704 register. */
16705 else
16707 sri->icode = icode;
16708 sri->extra_cost = 2;
16711 else if (REG_P (x))
16713 int regno = true_regnum (x);
16715 icode = CODE_FOR_nothing;
16716 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16717 default_p = true;
16718 else
16720 enum reg_class xclass = REGNO_REG_CLASS (regno);
16721 enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
16722 enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
16724 /* If memory is needed, use default_secondary_reload to create the
16725 stack slot. */
16726 if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
16727 default_p = true;
16728 else
16729 ret = NO_REGS;
16732 else
16733 default_p = true;
16735 else if (TARGET_POWERPC64
16736 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
16737 && MEM_P (x)
16738 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
16740 rtx addr = XEXP (x, 0);
16741 rtx off = address_offset (addr);
16743 if (off != NULL_RTX)
16745 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
16746 unsigned HOST_WIDE_INT offset = INTVAL (off);
16748 /* We need a secondary reload when our legitimate_address_p
16749 says the address is good (as otherwise the entire address
16750 will be reloaded), and the offset is not a multiple of
16751 four or we have an address wrap. Address wrap will only
16752 occur for LO_SUMs since legitimate_offset_address_p
16753 rejects addresses for 16-byte mems that will wrap. */
16754 if (GET_CODE (addr) == LO_SUM
16755 ? (1 /* legitimate_address_p allows any offset for lo_sum */
16756 && ((offset & 3) != 0
16757 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
16758 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
16759 && (offset & 3) != 0))
16761 if (in_p)
16762 sri->icode = CODE_FOR_reload_di_load;
16763 else
16764 sri->icode = CODE_FOR_reload_di_store;
16765 sri->extra_cost = 2;
16766 ret = NO_REGS;
16768 else
16769 default_p = true;
16771 else
16772 default_p = true;
16774 else if (!TARGET_POWERPC64
16775 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
16776 && MEM_P (x)
16777 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
16779 rtx addr = XEXP (x, 0);
16780 rtx off = address_offset (addr);
16782 if (off != NULL_RTX)
16784 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
16785 unsigned HOST_WIDE_INT offset = INTVAL (off);
16787 /* We need a secondary reload when our legitimate_address_p
16788 says the address is good (as otherwise the entire address
16789 will be reloaded), and we have a wrap.
16791 legitimate_lo_sum_address_p allows LO_SUM addresses to
16792 have any offset so test for wrap in the low 16 bits.
16794 legitimate_offset_address_p checks for the range
16795 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
16796 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
16797 [0x7ff4,0x7fff] respectively, so test for the
16798 intersection of these ranges, [0x7ffc,0x7fff] and
16799 [0x7ff4,0x7ff7] respectively.
16801 Note that the address we see here may have been
16802 manipulated by legitimize_reload_address. */
16803 if (GET_CODE (addr) == LO_SUM
16804 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
16805 : offset - (0x8000 - extra) < UNITS_PER_WORD)
16807 if (in_p)
16808 sri->icode = CODE_FOR_reload_si_load;
16809 else
16810 sri->icode = CODE_FOR_reload_si_store;
16811 sri->extra_cost = 2;
16812 ret = NO_REGS;
16814 else
16815 default_p = true;
16817 else
16818 default_p = true;
16820 else
16821 default_p = true;
16823 if (default_p)
16824 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
16826 gcc_assert (ret != ALL_REGS);
16828 if (TARGET_DEBUG_ADDR)
16830 fprintf (stderr,
16831 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
16832 "mode = %s",
16833 reg_class_names[ret],
16834 in_p ? "true" : "false",
16835 reg_class_names[rclass],
16836 GET_MODE_NAME (mode));
16838 if (default_p)
16839 fprintf (stderr, ", default secondary reload");
16841 if (sri->icode != CODE_FOR_nothing)
16842 fprintf (stderr, ", reload func = %s, extra cost = %d\n",
16843 insn_data[sri->icode].name, sri->extra_cost);
16844 else
16845 fprintf (stderr, "\n");
16847 debug_rtx (x);
16850 return ret;
16853 /* Better tracing for rs6000_secondary_reload_inner. */
16855 static void
16856 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
16857 bool store_p)
16859 rtx set, clobber;
16861 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
16863 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
16864 store_p ? "store" : "load");
16866 if (store_p)
16867 set = gen_rtx_SET (VOIDmode, mem, reg);
16868 else
16869 set = gen_rtx_SET (VOIDmode, reg, mem);
16871 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
16872 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
16875 static void
16876 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
16877 bool store_p)
16879 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
16880 gcc_unreachable ();
16883 /* Fixup reload addresses for Altivec or VSX loads/stores to change SP+offset
16884 to SP+reg addressing. */
16886 void
16887 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
16889 int regno = true_regnum (reg);
16890 enum machine_mode mode = GET_MODE (reg);
16891 enum reg_class rclass;
16892 rtx addr;
16893 rtx and_op2 = NULL_RTX;
16894 rtx addr_op1;
16895 rtx addr_op2;
16896 rtx scratch_or_premodify = scratch;
16897 rtx and_rtx;
16898 rtx cc_clobber;
16900 if (TARGET_DEBUG_ADDR)
16901 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
16903 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16904 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16906 if (GET_CODE (mem) != MEM)
16907 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16909 rclass = REGNO_REG_CLASS (regno);
16910 addr = find_replacement (&XEXP (mem, 0));
16912 switch (rclass)
16914 /* GPRs can handle reg + small constant, all other addresses need to use
16915 the scratch register. */
16916 case GENERAL_REGS:
16917 case BASE_REGS:
16918 if (GET_CODE (addr) == AND)
16920 and_op2 = XEXP (addr, 1);
16921 addr = find_replacement (&XEXP (addr, 0));
16924 if (GET_CODE (addr) == PRE_MODIFY)
16926 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
16927 if (!REG_P (scratch_or_premodify))
16928 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16930 addr = find_replacement (&XEXP (addr, 1));
16931 if (GET_CODE (addr) != PLUS)
16932 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16935 if (GET_CODE (addr) == PLUS
16936 && (and_op2 != NULL_RTX
16937 || !rs6000_legitimate_offset_address_p (PTImode, addr,
16938 false, true)))
16940 /* find_replacement already recurses into both operands of
16941 PLUS so we don't need to call it here. */
16942 addr_op1 = XEXP (addr, 0);
16943 addr_op2 = XEXP (addr, 1);
16944 if (!legitimate_indirect_address_p (addr_op1, false))
16945 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
16947 if (!REG_P (addr_op2)
16948 && (GET_CODE (addr_op2) != CONST_INT
16949 || !satisfies_constraint_I (addr_op2)))
16951 if (TARGET_DEBUG_ADDR)
16953 fprintf (stderr,
16954 "\nMove plus addr to register %s, mode = %s: ",
16955 rs6000_reg_names[REGNO (scratch)],
16956 GET_MODE_NAME (mode));
16957 debug_rtx (addr_op2);
16959 rs6000_emit_move (scratch, addr_op2, Pmode);
16960 addr_op2 = scratch;
16963 emit_insn (gen_rtx_SET (VOIDmode,
16964 scratch_or_premodify,
16965 gen_rtx_PLUS (Pmode,
16966 addr_op1,
16967 addr_op2)));
16969 addr = scratch_or_premodify;
16970 scratch_or_premodify = scratch;
16972 else if (!legitimate_indirect_address_p (addr, false)
16973 && !rs6000_legitimate_offset_address_p (PTImode, addr,
16974 false, true))
16976 if (TARGET_DEBUG_ADDR)
16978 fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
16979 rs6000_reg_names[REGNO (scratch_or_premodify)],
16980 GET_MODE_NAME (mode));
16981 debug_rtx (addr);
16983 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
16984 addr = scratch_or_premodify;
16985 scratch_or_premodify = scratch;
16987 break;
16989 /* Float registers can do offset+reg addressing for scalar types. */
16990 case FLOAT_REGS:
16991 if (legitimate_indirect_address_p (addr, false) /* reg */
16992 || legitimate_indexed_address_p (addr, false) /* reg+reg */
16993 || ((GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
16994 && and_op2 == NULL_RTX
16995 && scratch_or_premodify == scratch
16996 && rs6000_legitimate_offset_address_p (mode, addr, false, false)))
16997 break;
16999 /* If this isn't a legacy floating point load/store, fall through to the
17000 VSX defaults. */
17002 /* VSX/Altivec registers can only handle reg+reg addressing. Move other
17003 addresses into a scratch register. */
17004 case VSX_REGS:
17005 case ALTIVEC_REGS:
17007 /* With float regs, we need to handle the AND ourselves, since we can't
17008 use the Altivec instruction with an implicit AND -16. Allow scalar
17009 loads to float registers to use reg+offset even if VSX. */
17010 if (GET_CODE (addr) == AND
17011 && (rclass != ALTIVEC_REGS || GET_MODE_SIZE (mode) != 16
17012 || GET_CODE (XEXP (addr, 1)) != CONST_INT
17013 || INTVAL (XEXP (addr, 1)) != -16
17014 || !VECTOR_MEM_ALTIVEC_P (mode)))
17016 and_op2 = XEXP (addr, 1);
17017 addr = find_replacement (&XEXP (addr, 0));
17020 /* If we aren't using a VSX load, save the PRE_MODIFY register and use it
17021 as the address later. */
17022 if (GET_CODE (addr) == PRE_MODIFY
17023 && ((ALTIVEC_OR_VSX_VECTOR_MODE (mode)
17024 && (rclass != FLOAT_REGS
17025 || (GET_MODE_SIZE (mode) != 4 && GET_MODE_SIZE (mode) != 8)))
17026 || and_op2 != NULL_RTX
17027 || !legitimate_indexed_address_p (XEXP (addr, 1), false)))
17029 scratch_or_premodify = find_replacement (&XEXP (addr, 0));
17030 if (!legitimate_indirect_address_p (scratch_or_premodify, false))
17031 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17033 addr = find_replacement (&XEXP (addr, 1));
17034 if (GET_CODE (addr) != PLUS)
17035 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17038 if (legitimate_indirect_address_p (addr, false) /* reg */
17039 || legitimate_indexed_address_p (addr, false) /* reg+reg */
17040 || (GET_CODE (addr) == AND /* Altivec memory */
17041 && rclass == ALTIVEC_REGS
17042 && GET_CODE (XEXP (addr, 1)) == CONST_INT
17043 && INTVAL (XEXP (addr, 1)) == -16
17044 && (legitimate_indirect_address_p (XEXP (addr, 0), false)
17045 || legitimate_indexed_address_p (XEXP (addr, 0), false))))
17048 else if (GET_CODE (addr) == PLUS)
17050 addr_op1 = XEXP (addr, 0);
17051 addr_op2 = XEXP (addr, 1);
17052 if (!REG_P (addr_op1))
17053 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17055 if (TARGET_DEBUG_ADDR)
17057 fprintf (stderr, "\nMove plus addr to register %s, mode = %s: ",
17058 rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
17059 debug_rtx (addr_op2);
17061 rs6000_emit_move (scratch, addr_op2, Pmode);
17062 emit_insn (gen_rtx_SET (VOIDmode,
17063 scratch_or_premodify,
17064 gen_rtx_PLUS (Pmode,
17065 addr_op1,
17066 scratch)));
17067 addr = scratch_or_premodify;
17068 scratch_or_premodify = scratch;
17071 else if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == CONST
17072 || GET_CODE (addr) == CONST_INT || GET_CODE (addr) == LO_SUM
17073 || REG_P (addr))
17075 if (TARGET_DEBUG_ADDR)
17077 fprintf (stderr, "\nMove addr to register %s, mode = %s: ",
17078 rs6000_reg_names[REGNO (scratch_or_premodify)],
17079 GET_MODE_NAME (mode));
17080 debug_rtx (addr);
17083 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17084 addr = scratch_or_premodify;
17085 scratch_or_premodify = scratch;
17088 else
17089 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17091 break;
17093 default:
17094 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17097 /* If the original address involved a pre-modify that we couldn't use the VSX
17098 memory instruction with update, and we haven't taken care of already,
17099 store the address in the pre-modify register and use that as the
17100 address. */
17101 if (scratch_or_premodify != scratch && scratch_or_premodify != addr)
17103 emit_insn (gen_rtx_SET (VOIDmode, scratch_or_premodify, addr));
17104 addr = scratch_or_premodify;
17107 /* If the original address involved an AND -16 and we couldn't use an ALTIVEC
17108 memory instruction, recreate the AND now, including the clobber which is
17109 generated by the general ANDSI3/ANDDI3 patterns for the
17110 andi. instruction. */
17111 if (and_op2 != NULL_RTX)
17113 if (! legitimate_indirect_address_p (addr, false))
17115 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17116 addr = scratch;
17119 if (TARGET_DEBUG_ADDR)
17121 fprintf (stderr, "\nAnd addr to register %s, mode = %s: ",
17122 rs6000_reg_names[REGNO (scratch)], GET_MODE_NAME (mode));
17123 debug_rtx (and_op2);
17126 and_rtx = gen_rtx_SET (VOIDmode,
17127 scratch,
17128 gen_rtx_AND (Pmode,
17129 addr,
17130 and_op2));
17132 cc_clobber = gen_rtx_CLOBBER (CCmode, gen_rtx_SCRATCH (CCmode));
17133 emit_insn (gen_rtx_PARALLEL (VOIDmode,
17134 gen_rtvec (2, and_rtx, cc_clobber)));
17135 addr = scratch;
17138 /* Adjust the address if it changed. */
17139 if (addr != XEXP (mem, 0))
17141 mem = replace_equiv_address_nv (mem, addr);
17142 if (TARGET_DEBUG_ADDR)
17143 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17146 /* Now create the move. */
17147 if (store_p)
17148 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17149 else
17150 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17152 return;
17155 /* Convert reloads involving 64-bit gprs and misaligned offset
17156 addressing, or multiple 32-bit gprs and offsets that are too large,
17157 to use indirect addressing. */
17159 void
17160 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17162 int regno = true_regnum (reg);
17163 enum reg_class rclass;
17164 rtx addr;
17165 rtx scratch_or_premodify = scratch;
17167 if (TARGET_DEBUG_ADDR)
17169 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17170 store_p ? "store" : "load");
17171 fprintf (stderr, "reg:\n");
17172 debug_rtx (reg);
17173 fprintf (stderr, "mem:\n");
17174 debug_rtx (mem);
17175 fprintf (stderr, "scratch:\n");
17176 debug_rtx (scratch);
17179 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17180 gcc_assert (GET_CODE (mem) == MEM);
17181 rclass = REGNO_REG_CLASS (regno);
17182 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17183 addr = XEXP (mem, 0);
17185 if (GET_CODE (addr) == PRE_MODIFY)
17187 scratch_or_premodify = XEXP (addr, 0);
17188 gcc_assert (REG_P (scratch_or_premodify));
17189 addr = XEXP (addr, 1);
17191 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17193 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17195 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17197 /* Now create the move. */
17198 if (store_p)
17199 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17200 else
17201 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17203 return;
17206 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17207 this function has any SDmode references. If we are on a power7 or later, we
17208 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17209 can load/store the value. */
17211 static void
17212 rs6000_alloc_sdmode_stack_slot (void)
17214 tree t;
17215 basic_block bb;
17216 gimple_stmt_iterator gsi;
17218 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17219 /* We use a different approach for dealing with the secondary
17220 memory in LRA. */
17221 if (ira_use_lra_p)
17222 return;
17224 if (TARGET_NO_SDMODE_STACK)
17225 return;
17227 FOR_EACH_BB_FN (bb, cfun)
17228 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17230 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17231 if (ret)
17233 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17234 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17235 SDmode, 0);
17236 return;
17240 /* Check for any SDmode parameters of the function. */
17241 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17243 if (TREE_TYPE (t) == error_mark_node)
17244 continue;
17246 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17247 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17249 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17250 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17251 SDmode, 0);
17252 return;
17257 static void
17258 rs6000_instantiate_decls (void)
17260 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17261 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17264 /* Given an rtx X being reloaded into a reg required to be
17265 in class CLASS, return the class of reg to actually use.
17266 In general this is just CLASS; but on some machines
17267 in some cases it is preferable to use a more restrictive class.
17269 On the RS/6000, we have to return NO_REGS when we want to reload a
17270 floating-point CONST_DOUBLE to force it to be copied to memory.
17272 We also don't want to reload integer values into floating-point
17273 registers if we can at all help it. In fact, this can
17274 cause reload to die, if it tries to generate a reload of CTR
17275 into a FP register and discovers it doesn't have the memory location
17276 required.
17278 ??? Would it be a good idea to have reload do the converse, that is
17279 try to reload floating modes into FP registers if possible?
17282 static enum reg_class
17283 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17285 enum machine_mode mode = GET_MODE (x);
17287 if (TARGET_VSX && x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
17288 return rclass;
17290 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
17291 && (rclass == ALTIVEC_REGS || rclass == VSX_REGS)
17292 && easy_vector_constant (x, mode))
17293 return ALTIVEC_REGS;
17295 if ((CONSTANT_P (x) || GET_CODE (x) == PLUS))
17297 if (reg_class_subset_p (GENERAL_REGS, rclass))
17298 return GENERAL_REGS;
17299 if (reg_class_subset_p (BASE_REGS, rclass))
17300 return BASE_REGS;
17301 return NO_REGS;
17304 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17305 return GENERAL_REGS;
17307 /* For VSX, prefer the traditional registers for 64-bit values because we can
17308 use the non-VSX loads. Prefer the Altivec registers if Altivec is
17309 handling the vector operations (i.e. V16QI, V8HI, and V4SI), or if we
17310 prefer Altivec loads.. */
17311 if (rclass == VSX_REGS)
17313 if (MEM_P (x) && reg_addr[mode].scalar_in_vmx_p)
17315 rtx addr = XEXP (x, 0);
17316 if (rs6000_legitimate_offset_address_p (mode, addr, false, true)
17317 || legitimate_lo_sum_address_p (mode, addr, false))
17318 return FLOAT_REGS;
17320 else if (GET_MODE_SIZE (mode) <= 8 && !reg_addr[mode].scalar_in_vmx_p)
17321 return FLOAT_REGS;
17323 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17324 || mode == V1TImode)
17325 return ALTIVEC_REGS;
17327 return rclass;
17330 return rclass;
17333 /* Debug version of rs6000_preferred_reload_class. */
17334 static enum reg_class
17335 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17337 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17339 fprintf (stderr,
17340 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17341 "mode = %s, x:\n",
17342 reg_class_names[ret], reg_class_names[rclass],
17343 GET_MODE_NAME (GET_MODE (x)));
17344 debug_rtx (x);
17346 return ret;
17349 /* If we are copying between FP or AltiVec registers and anything else, we need
17350 a memory location. The exception is when we are targeting ppc64 and the
17351 move to/from fpr to gpr instructions are available. Also, under VSX, you
17352 can copy vector registers from the FP register set to the Altivec register
17353 set and vice versa. */
17355 static bool
17356 rs6000_secondary_memory_needed (enum reg_class from_class,
17357 enum reg_class to_class,
17358 enum machine_mode mode)
17360 enum rs6000_reg_type from_type, to_type;
17361 bool altivec_p = ((from_class == ALTIVEC_REGS)
17362 || (to_class == ALTIVEC_REGS));
17364 /* If a simple/direct move is available, we don't need secondary memory */
17365 from_type = reg_class_to_reg_type[(int)from_class];
17366 to_type = reg_class_to_reg_type[(int)to_class];
17368 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17369 (secondary_reload_info *)0, altivec_p))
17370 return false;
17372 /* If we have a floating point or vector register class, we need to use
17373 memory to transfer the data. */
17374 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17375 return true;
17377 return false;
17380 /* Debug version of rs6000_secondary_memory_needed. */
17381 static bool
17382 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17383 enum reg_class to_class,
17384 enum machine_mode mode)
17386 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17388 fprintf (stderr,
17389 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17390 "to_class = %s, mode = %s\n",
17391 ret ? "true" : "false",
17392 reg_class_names[from_class],
17393 reg_class_names[to_class],
17394 GET_MODE_NAME (mode));
17396 return ret;
17399 /* Return the register class of a scratch register needed to copy IN into
17400 or out of a register in RCLASS in MODE. If it can be done directly,
17401 NO_REGS is returned. */
17403 static enum reg_class
17404 rs6000_secondary_reload_class (enum reg_class rclass, enum machine_mode mode,
17405 rtx in)
17407 int regno;
17409 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17410 #if TARGET_MACHO
17411 && MACHOPIC_INDIRECT
17412 #endif
17415 /* We cannot copy a symbolic operand directly into anything
17416 other than BASE_REGS for TARGET_ELF. So indicate that a
17417 register from BASE_REGS is needed as an intermediate
17418 register.
17420 On Darwin, pic addresses require a load from memory, which
17421 needs a base register. */
17422 if (rclass != BASE_REGS
17423 && (GET_CODE (in) == SYMBOL_REF
17424 || GET_CODE (in) == HIGH
17425 || GET_CODE (in) == LABEL_REF
17426 || GET_CODE (in) == CONST))
17427 return BASE_REGS;
17430 if (GET_CODE (in) == REG)
17432 regno = REGNO (in);
17433 if (regno >= FIRST_PSEUDO_REGISTER)
17435 regno = true_regnum (in);
17436 if (regno >= FIRST_PSEUDO_REGISTER)
17437 regno = -1;
17440 else if (GET_CODE (in) == SUBREG)
17442 regno = true_regnum (in);
17443 if (regno >= FIRST_PSEUDO_REGISTER)
17444 regno = -1;
17446 else
17447 regno = -1;
17449 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17450 into anything. */
17451 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17452 || (regno >= 0 && INT_REGNO_P (regno)))
17453 return NO_REGS;
17455 /* Constants, memory, and FP registers can go into FP registers. */
17456 if ((regno == -1 || FP_REGNO_P (regno))
17457 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17458 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17460 /* Memory, and FP/altivec registers can go into fp/altivec registers under
17461 VSX. However, for scalar variables, use the traditional floating point
17462 registers so that we can use offset+register addressing. */
17463 if (TARGET_VSX
17464 && (regno == -1 || VSX_REGNO_P (regno))
17465 && VSX_REG_CLASS_P (rclass))
17467 if (GET_MODE_SIZE (mode) < 16)
17468 return FLOAT_REGS;
17470 return NO_REGS;
17473 /* Memory, and AltiVec registers can go into AltiVec registers. */
17474 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17475 && rclass == ALTIVEC_REGS)
17476 return NO_REGS;
17478 /* We can copy among the CR registers. */
17479 if ((rclass == CR_REGS || rclass == CR0_REGS)
17480 && regno >= 0 && CR_REGNO_P (regno))
17481 return NO_REGS;
17483 /* Otherwise, we need GENERAL_REGS. */
17484 return GENERAL_REGS;
17487 /* Debug version of rs6000_secondary_reload_class. */
17488 static enum reg_class
17489 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17490 enum machine_mode mode, rtx in)
17492 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17493 fprintf (stderr,
17494 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17495 "mode = %s, input rtx:\n",
17496 reg_class_names[ret], reg_class_names[rclass],
17497 GET_MODE_NAME (mode));
17498 debug_rtx (in);
17500 return ret;
17503 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17505 static bool
17506 rs6000_cannot_change_mode_class (enum machine_mode from,
17507 enum machine_mode to,
17508 enum reg_class rclass)
17510 unsigned from_size = GET_MODE_SIZE (from);
17511 unsigned to_size = GET_MODE_SIZE (to);
17513 if (from_size != to_size)
17515 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17517 if (reg_classes_intersect_p (xclass, rclass))
17519 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17520 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17522 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17523 single register under VSX because the scalar part of the register
17524 is in the upper 64-bits, and not the lower 64-bits. Types like
17525 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17526 IEEE floating point can't overlap, and neither can small
17527 values. */
17529 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17530 return true;
17532 /* TDmode in floating-mode registers must always go into a register
17533 pair with the most significant word in the even-numbered register
17534 to match ISA requirements. In little-endian mode, this does not
17535 match subreg numbering, so we cannot allow subregs. */
17536 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17537 return true;
17539 if (from_size < 8 || to_size < 8)
17540 return true;
17542 if (from_size == 8 && (8 * to_nregs) != to_size)
17543 return true;
17545 if (to_size == 8 && (8 * from_nregs) != from_size)
17546 return true;
17548 return false;
17550 else
17551 return false;
17554 if (TARGET_E500_DOUBLE
17555 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17556 || (((to) == TFmode) + ((from) == TFmode)) == 1
17557 || (((to) == DDmode) + ((from) == DDmode)) == 1
17558 || (((to) == TDmode) + ((from) == TDmode)) == 1
17559 || (((to) == DImode) + ((from) == DImode)) == 1))
17560 return true;
17562 /* Since the VSX register set includes traditional floating point registers
17563 and altivec registers, just check for the size being different instead of
17564 trying to check whether the modes are vector modes. Otherwise it won't
17565 allow say DF and DI to change classes. For types like TFmode and TDmode
17566 that take 2 64-bit registers, rather than a single 128-bit register, don't
17567 allow subregs of those types to other 128 bit types. */
17568 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17570 unsigned num_regs = (from_size + 15) / 16;
17571 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17572 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17573 return true;
17575 return (from_size != 8 && from_size != 16);
17578 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17579 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17580 return true;
17582 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17583 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17584 return true;
17586 return false;
17589 /* Debug version of rs6000_cannot_change_mode_class. */
17590 static bool
17591 rs6000_debug_cannot_change_mode_class (enum machine_mode from,
17592 enum machine_mode to,
17593 enum reg_class rclass)
17595 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17597 fprintf (stderr,
17598 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17599 "to = %s, rclass = %s\n",
17600 ret ? "true" : "false",
17601 GET_MODE_NAME (from), GET_MODE_NAME (to),
17602 reg_class_names[rclass]);
17604 return ret;
17607 /* Return a string to do a move operation of 128 bits of data. */
17609 const char *
17610 rs6000_output_move_128bit (rtx operands[])
17612 rtx dest = operands[0];
17613 rtx src = operands[1];
17614 enum machine_mode mode = GET_MODE (dest);
17615 int dest_regno;
17616 int src_regno;
17617 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17618 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17620 if (REG_P (dest))
17622 dest_regno = REGNO (dest);
17623 dest_gpr_p = INT_REGNO_P (dest_regno);
17624 dest_fp_p = FP_REGNO_P (dest_regno);
17625 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17626 dest_vsx_p = dest_fp_p | dest_vmx_p;
17628 else
17630 dest_regno = -1;
17631 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17634 if (REG_P (src))
17636 src_regno = REGNO (src);
17637 src_gpr_p = INT_REGNO_P (src_regno);
17638 src_fp_p = FP_REGNO_P (src_regno);
17639 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17640 src_vsx_p = src_fp_p | src_vmx_p;
17642 else
17644 src_regno = -1;
17645 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17648 /* Register moves. */
17649 if (dest_regno >= 0 && src_regno >= 0)
17651 if (dest_gpr_p)
17653 if (src_gpr_p)
17654 return "#";
17656 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
17657 return "#";
17660 else if (TARGET_VSX && dest_vsx_p)
17662 if (src_vsx_p)
17663 return "xxlor %x0,%x1,%x1";
17665 else if (TARGET_DIRECT_MOVE && src_gpr_p)
17666 return "#";
17669 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
17670 return "vor %0,%1,%1";
17672 else if (dest_fp_p && src_fp_p)
17673 return "#";
17676 /* Loads. */
17677 else if (dest_regno >= 0 && MEM_P (src))
17679 if (dest_gpr_p)
17681 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17682 return "lq %0,%1";
17683 else
17684 return "#";
17687 else if (TARGET_ALTIVEC && dest_vmx_p
17688 && altivec_indexed_or_indirect_operand (src, mode))
17689 return "lvx %0,%y1";
17691 else if (TARGET_VSX && dest_vsx_p)
17693 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17694 return "lxvw4x %x0,%y1";
17695 else
17696 return "lxvd2x %x0,%y1";
17699 else if (TARGET_ALTIVEC && dest_vmx_p)
17700 return "lvx %0,%y1";
17702 else if (dest_fp_p)
17703 return "#";
17706 /* Stores. */
17707 else if (src_regno >= 0 && MEM_P (dest))
17709 if (src_gpr_p)
17711 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17712 return "stq %1,%0";
17713 else
17714 return "#";
17717 else if (TARGET_ALTIVEC && src_vmx_p
17718 && altivec_indexed_or_indirect_operand (src, mode))
17719 return "stvx %1,%y0";
17721 else if (TARGET_VSX && src_vsx_p)
17723 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17724 return "stxvw4x %x1,%y0";
17725 else
17726 return "stxvd2x %x1,%y0";
17729 else if (TARGET_ALTIVEC && src_vmx_p)
17730 return "stvx %1,%y0";
17732 else if (src_fp_p)
17733 return "#";
17736 /* Constants. */
17737 else if (dest_regno >= 0
17738 && (GET_CODE (src) == CONST_INT
17739 || GET_CODE (src) == CONST_WIDE_INT
17740 || GET_CODE (src) == CONST_DOUBLE
17741 || GET_CODE (src) == CONST_VECTOR))
17743 if (dest_gpr_p)
17744 return "#";
17746 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
17747 return "xxlxor %x0,%x0,%x0";
17749 else if (TARGET_ALTIVEC && dest_vmx_p)
17750 return output_vec_const_move (operands);
17753 if (TARGET_DEBUG_ADDR)
17755 fprintf (stderr, "\n===== Bad 128 bit move:\n");
17756 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
17759 gcc_unreachable ();
17762 /* Validate a 128-bit move. */
17763 bool
17764 rs6000_move_128bit_ok_p (rtx operands[])
17766 enum machine_mode mode = GET_MODE (operands[0]);
17767 return (gpc_reg_operand (operands[0], mode)
17768 || gpc_reg_operand (operands[1], mode));
17771 /* Return true if a 128-bit move needs to be split. */
17772 bool
17773 rs6000_split_128bit_ok_p (rtx operands[])
17775 if (!reload_completed)
17776 return false;
17778 if (!gpr_or_gpr_p (operands[0], operands[1]))
17779 return false;
17781 if (quad_load_store_p (operands[0], operands[1]))
17782 return false;
17784 return true;
17788 /* Given a comparison operation, return the bit number in CCR to test. We
17789 know this is a valid comparison.
17791 SCC_P is 1 if this is for an scc. That means that %D will have been
17792 used instead of %C, so the bits will be in different places.
17794 Return -1 if OP isn't a valid comparison for some reason. */
17797 ccr_bit (rtx op, int scc_p)
17799 enum rtx_code code = GET_CODE (op);
17800 enum machine_mode cc_mode;
17801 int cc_regnum;
17802 int base_bit;
17803 rtx reg;
17805 if (!COMPARISON_P (op))
17806 return -1;
17808 reg = XEXP (op, 0);
17810 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
17812 cc_mode = GET_MODE (reg);
17813 cc_regnum = REGNO (reg);
17814 base_bit = 4 * (cc_regnum - CR0_REGNO);
17816 validate_condition_mode (code, cc_mode);
17818 /* When generating a sCOND operation, only positive conditions are
17819 allowed. */
17820 gcc_assert (!scc_p
17821 || code == EQ || code == GT || code == LT || code == UNORDERED
17822 || code == GTU || code == LTU);
17824 switch (code)
17826 case NE:
17827 return scc_p ? base_bit + 3 : base_bit + 2;
17828 case EQ:
17829 return base_bit + 2;
17830 case GT: case GTU: case UNLE:
17831 return base_bit + 1;
17832 case LT: case LTU: case UNGE:
17833 return base_bit;
17834 case ORDERED: case UNORDERED:
17835 return base_bit + 3;
17837 case GE: case GEU:
17838 /* If scc, we will have done a cror to put the bit in the
17839 unordered position. So test that bit. For integer, this is ! LT
17840 unless this is an scc insn. */
17841 return scc_p ? base_bit + 3 : base_bit;
17843 case LE: case LEU:
17844 return scc_p ? base_bit + 3 : base_bit + 1;
17846 default:
17847 gcc_unreachable ();
17851 /* Return the GOT register. */
17854 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
17856 /* The second flow pass currently (June 1999) can't update
17857 regs_ever_live without disturbing other parts of the compiler, so
17858 update it here to make the prolog/epilogue code happy. */
17859 if (!can_create_pseudo_p ()
17860 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
17861 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
17863 crtl->uses_pic_offset_table = 1;
17865 return pic_offset_table_rtx;
17868 static rs6000_stack_t stack_info;
17870 /* Function to init struct machine_function.
17871 This will be called, via a pointer variable,
17872 from push_function_context. */
17874 static struct machine_function *
17875 rs6000_init_machine_status (void)
17877 stack_info.reload_completed = 0;
17878 return ggc_cleared_alloc<machine_function> ();
17881 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
17884 extract_MB (rtx op)
17886 int i;
17887 unsigned long val = INTVAL (op);
17889 /* If the high bit is zero, the value is the first 1 bit we find
17890 from the left. */
17891 if ((val & 0x80000000) == 0)
17893 gcc_assert (val & 0xffffffff);
17895 i = 1;
17896 while (((val <<= 1) & 0x80000000) == 0)
17897 ++i;
17898 return i;
17901 /* If the high bit is set and the low bit is not, or the mask is all
17902 1's, the value is zero. */
17903 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
17904 return 0;
17906 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
17907 from the right. */
17908 i = 31;
17909 while (((val >>= 1) & 1) != 0)
17910 --i;
17912 return i;
17916 extract_ME (rtx op)
17918 int i;
17919 unsigned long val = INTVAL (op);
17921 /* If the low bit is zero, the value is the first 1 bit we find from
17922 the right. */
17923 if ((val & 1) == 0)
17925 gcc_assert (val & 0xffffffff);
17927 i = 30;
17928 while (((val >>= 1) & 1) == 0)
17929 --i;
17931 return i;
17934 /* If the low bit is set and the high bit is not, or the mask is all
17935 1's, the value is 31. */
17936 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
17937 return 31;
17939 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
17940 from the left. */
17941 i = 0;
17942 while (((val <<= 1) & 0x80000000) != 0)
17943 ++i;
17945 return i;
17948 /* Write out a function code label. */
17950 void
17951 rs6000_output_function_entry (FILE *file, const char *fname)
17953 if (fname[0] != '.')
17955 switch (DEFAULT_ABI)
17957 default:
17958 gcc_unreachable ();
17960 case ABI_AIX:
17961 if (DOT_SYMBOLS)
17962 putc ('.', file);
17963 else
17964 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
17965 break;
17967 case ABI_ELFv2:
17968 case ABI_V4:
17969 case ABI_DARWIN:
17970 break;
17974 RS6000_OUTPUT_BASENAME (file, fname);
17977 /* Print an operand. Recognize special options, documented below. */
17979 #if TARGET_ELF
17980 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
17981 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
17982 #else
17983 #define SMALL_DATA_RELOC "sda21"
17984 #define SMALL_DATA_REG 0
17985 #endif
17987 void
17988 print_operand (FILE *file, rtx x, int code)
17990 int i;
17991 unsigned HOST_WIDE_INT uval;
17993 switch (code)
17995 /* %a is output_address. */
17997 case 'b':
17998 /* If constant, low-order 16 bits of constant, unsigned.
17999 Otherwise, write normally. */
18000 if (INT_P (x))
18001 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18002 else
18003 print_operand (file, x, 0);
18004 return;
18006 case 'B':
18007 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18008 for 64-bit mask direction. */
18009 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18010 return;
18012 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18013 output_operand. */
18015 case 'D':
18016 /* Like 'J' but get to the GT bit only. */
18017 gcc_assert (REG_P (x));
18019 /* Bit 1 is GT bit. */
18020 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18022 /* Add one for shift count in rlinm for scc. */
18023 fprintf (file, "%d", i + 1);
18024 return;
18026 case 'e':
18027 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18028 if (! INT_P (x))
18030 output_operand_lossage ("invalid %%e value");
18031 return;
18034 uval = INTVAL (x);
18035 if ((uval & 0xffff) == 0 && uval != 0)
18036 putc ('s', file);
18037 return;
18039 case 'E':
18040 /* X is a CR register. Print the number of the EQ bit of the CR */
18041 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18042 output_operand_lossage ("invalid %%E value");
18043 else
18044 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18045 return;
18047 case 'f':
18048 /* X is a CR register. Print the shift count needed to move it
18049 to the high-order four bits. */
18050 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18051 output_operand_lossage ("invalid %%f value");
18052 else
18053 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18054 return;
18056 case 'F':
18057 /* Similar, but print the count for the rotate in the opposite
18058 direction. */
18059 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18060 output_operand_lossage ("invalid %%F value");
18061 else
18062 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18063 return;
18065 case 'G':
18066 /* X is a constant integer. If it is negative, print "m",
18067 otherwise print "z". This is to make an aze or ame insn. */
18068 if (GET_CODE (x) != CONST_INT)
18069 output_operand_lossage ("invalid %%G value");
18070 else if (INTVAL (x) >= 0)
18071 putc ('z', file);
18072 else
18073 putc ('m', file);
18074 return;
18076 case 'h':
18077 /* If constant, output low-order five bits. Otherwise, write
18078 normally. */
18079 if (INT_P (x))
18080 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18081 else
18082 print_operand (file, x, 0);
18083 return;
18085 case 'H':
18086 /* If constant, output low-order six bits. Otherwise, write
18087 normally. */
18088 if (INT_P (x))
18089 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18090 else
18091 print_operand (file, x, 0);
18092 return;
18094 case 'I':
18095 /* Print `i' if this is a constant, else nothing. */
18096 if (INT_P (x))
18097 putc ('i', file);
18098 return;
18100 case 'j':
18101 /* Write the bit number in CCR for jump. */
18102 i = ccr_bit (x, 0);
18103 if (i == -1)
18104 output_operand_lossage ("invalid %%j code");
18105 else
18106 fprintf (file, "%d", i);
18107 return;
18109 case 'J':
18110 /* Similar, but add one for shift count in rlinm for scc and pass
18111 scc flag to `ccr_bit'. */
18112 i = ccr_bit (x, 1);
18113 if (i == -1)
18114 output_operand_lossage ("invalid %%J code");
18115 else
18116 /* If we want bit 31, write a shift count of zero, not 32. */
18117 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18118 return;
18120 case 'k':
18121 /* X must be a constant. Write the 1's complement of the
18122 constant. */
18123 if (! INT_P (x))
18124 output_operand_lossage ("invalid %%k value");
18125 else
18126 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18127 return;
18129 case 'K':
18130 /* X must be a symbolic constant on ELF. Write an
18131 expression suitable for an 'addi' that adds in the low 16
18132 bits of the MEM. */
18133 if (GET_CODE (x) == CONST)
18135 if (GET_CODE (XEXP (x, 0)) != PLUS
18136 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18137 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18138 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18139 output_operand_lossage ("invalid %%K value");
18141 print_operand_address (file, x);
18142 fputs ("@l", file);
18143 return;
18145 /* %l is output_asm_label. */
18147 case 'L':
18148 /* Write second word of DImode or DFmode reference. Works on register
18149 or non-indexed memory only. */
18150 if (REG_P (x))
18151 fputs (reg_names[REGNO (x) + 1], file);
18152 else if (MEM_P (x))
18154 /* Handle possible auto-increment. Since it is pre-increment and
18155 we have already done it, we can just use an offset of word. */
18156 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18157 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18158 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18159 UNITS_PER_WORD));
18160 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18161 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18162 UNITS_PER_WORD));
18163 else
18164 output_address (XEXP (adjust_address_nv (x, SImode,
18165 UNITS_PER_WORD),
18166 0));
18168 if (small_data_operand (x, GET_MODE (x)))
18169 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18170 reg_names[SMALL_DATA_REG]);
18172 return;
18174 case 'm':
18175 /* MB value for a mask operand. */
18176 if (! mask_operand (x, SImode))
18177 output_operand_lossage ("invalid %%m value");
18179 fprintf (file, "%d", extract_MB (x));
18180 return;
18182 case 'M':
18183 /* ME value for a mask operand. */
18184 if (! mask_operand (x, SImode))
18185 output_operand_lossage ("invalid %%M value");
18187 fprintf (file, "%d", extract_ME (x));
18188 return;
18190 /* %n outputs the negative of its operand. */
18192 case 'N':
18193 /* Write the number of elements in the vector times 4. */
18194 if (GET_CODE (x) != PARALLEL)
18195 output_operand_lossage ("invalid %%N value");
18196 else
18197 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18198 return;
18200 case 'O':
18201 /* Similar, but subtract 1 first. */
18202 if (GET_CODE (x) != PARALLEL)
18203 output_operand_lossage ("invalid %%O value");
18204 else
18205 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18206 return;
18208 case 'p':
18209 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18210 if (! INT_P (x)
18211 || INTVAL (x) < 0
18212 || (i = exact_log2 (INTVAL (x))) < 0)
18213 output_operand_lossage ("invalid %%p value");
18214 else
18215 fprintf (file, "%d", i);
18216 return;
18218 case 'P':
18219 /* The operand must be an indirect memory reference. The result
18220 is the register name. */
18221 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18222 || REGNO (XEXP (x, 0)) >= 32)
18223 output_operand_lossage ("invalid %%P value");
18224 else
18225 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18226 return;
18228 case 'q':
18229 /* This outputs the logical code corresponding to a boolean
18230 expression. The expression may have one or both operands
18231 negated (if one, only the first one). For condition register
18232 logical operations, it will also treat the negated
18233 CR codes as NOTs, but not handle NOTs of them. */
18235 const char *const *t = 0;
18236 const char *s;
18237 enum rtx_code code = GET_CODE (x);
18238 static const char * const tbl[3][3] = {
18239 { "and", "andc", "nor" },
18240 { "or", "orc", "nand" },
18241 { "xor", "eqv", "xor" } };
18243 if (code == AND)
18244 t = tbl[0];
18245 else if (code == IOR)
18246 t = tbl[1];
18247 else if (code == XOR)
18248 t = tbl[2];
18249 else
18250 output_operand_lossage ("invalid %%q value");
18252 if (GET_CODE (XEXP (x, 0)) != NOT)
18253 s = t[0];
18254 else
18256 if (GET_CODE (XEXP (x, 1)) == NOT)
18257 s = t[2];
18258 else
18259 s = t[1];
18262 fputs (s, file);
18264 return;
18266 case 'Q':
18267 if (! TARGET_MFCRF)
18268 return;
18269 fputc (',', file);
18270 /* FALLTHRU */
18272 case 'R':
18273 /* X is a CR register. Print the mask for `mtcrf'. */
18274 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18275 output_operand_lossage ("invalid %%R value");
18276 else
18277 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18278 return;
18280 case 's':
18281 /* Low 5 bits of 32 - value */
18282 if (! INT_P (x))
18283 output_operand_lossage ("invalid %%s value");
18284 else
18285 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18286 return;
18288 case 'S':
18289 /* PowerPC64 mask position. All 0's is excluded.
18290 CONST_INT 32-bit mask is considered sign-extended so any
18291 transition must occur within the CONST_INT, not on the boundary. */
18292 if (! mask64_operand (x, DImode))
18293 output_operand_lossage ("invalid %%S value");
18295 uval = INTVAL (x);
18297 if (uval & 1) /* Clear Left */
18299 #if HOST_BITS_PER_WIDE_INT > 64
18300 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18301 #endif
18302 i = 64;
18304 else /* Clear Right */
18306 uval = ~uval;
18307 #if HOST_BITS_PER_WIDE_INT > 64
18308 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18309 #endif
18310 i = 63;
18312 while (uval != 0)
18313 --i, uval >>= 1;
18314 gcc_assert (i >= 0);
18315 fprintf (file, "%d", i);
18316 return;
18318 case 't':
18319 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18320 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18322 /* Bit 3 is OV bit. */
18323 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18325 /* If we want bit 31, write a shift count of zero, not 32. */
18326 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18327 return;
18329 case 'T':
18330 /* Print the symbolic name of a branch target register. */
18331 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18332 && REGNO (x) != CTR_REGNO))
18333 output_operand_lossage ("invalid %%T value");
18334 else if (REGNO (x) == LR_REGNO)
18335 fputs ("lr", file);
18336 else
18337 fputs ("ctr", file);
18338 return;
18340 case 'u':
18341 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18342 for use in unsigned operand. */
18343 if (! INT_P (x))
18345 output_operand_lossage ("invalid %%u value");
18346 return;
18349 uval = INTVAL (x);
18350 if ((uval & 0xffff) == 0)
18351 uval >>= 16;
18353 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18354 return;
18356 case 'v':
18357 /* High-order 16 bits of constant for use in signed operand. */
18358 if (! INT_P (x))
18359 output_operand_lossage ("invalid %%v value");
18360 else
18361 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18362 (INTVAL (x) >> 16) & 0xffff);
18363 return;
18365 case 'U':
18366 /* Print `u' if this has an auto-increment or auto-decrement. */
18367 if (MEM_P (x)
18368 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18369 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18370 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18371 putc ('u', file);
18372 return;
18374 case 'V':
18375 /* Print the trap code for this operand. */
18376 switch (GET_CODE (x))
18378 case EQ:
18379 fputs ("eq", file); /* 4 */
18380 break;
18381 case NE:
18382 fputs ("ne", file); /* 24 */
18383 break;
18384 case LT:
18385 fputs ("lt", file); /* 16 */
18386 break;
18387 case LE:
18388 fputs ("le", file); /* 20 */
18389 break;
18390 case GT:
18391 fputs ("gt", file); /* 8 */
18392 break;
18393 case GE:
18394 fputs ("ge", file); /* 12 */
18395 break;
18396 case LTU:
18397 fputs ("llt", file); /* 2 */
18398 break;
18399 case LEU:
18400 fputs ("lle", file); /* 6 */
18401 break;
18402 case GTU:
18403 fputs ("lgt", file); /* 1 */
18404 break;
18405 case GEU:
18406 fputs ("lge", file); /* 5 */
18407 break;
18408 default:
18409 gcc_unreachable ();
18411 break;
18413 case 'w':
18414 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18415 normally. */
18416 if (INT_P (x))
18417 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18418 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18419 else
18420 print_operand (file, x, 0);
18421 return;
18423 case 'W':
18424 /* MB value for a PowerPC64 rldic operand. */
18425 i = clz_hwi (INTVAL (x));
18427 fprintf (file, "%d", i);
18428 return;
18430 case 'x':
18431 /* X is a FPR or Altivec register used in a VSX context. */
18432 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18433 output_operand_lossage ("invalid %%x value");
18434 else
18436 int reg = REGNO (x);
18437 int vsx_reg = (FP_REGNO_P (reg)
18438 ? reg - 32
18439 : reg - FIRST_ALTIVEC_REGNO + 32);
18441 #ifdef TARGET_REGNAMES
18442 if (TARGET_REGNAMES)
18443 fprintf (file, "%%vs%d", vsx_reg);
18444 else
18445 #endif
18446 fprintf (file, "%d", vsx_reg);
18448 return;
18450 case 'X':
18451 if (MEM_P (x)
18452 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18453 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18454 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18455 putc ('x', file);
18456 return;
18458 case 'Y':
18459 /* Like 'L', for third word of TImode/PTImode */
18460 if (REG_P (x))
18461 fputs (reg_names[REGNO (x) + 2], file);
18462 else if (MEM_P (x))
18464 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18465 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18466 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18467 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18468 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18469 else
18470 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18471 if (small_data_operand (x, GET_MODE (x)))
18472 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18473 reg_names[SMALL_DATA_REG]);
18475 return;
18477 case 'z':
18478 /* X is a SYMBOL_REF. Write out the name preceded by a
18479 period and without any trailing data in brackets. Used for function
18480 names. If we are configured for System V (or the embedded ABI) on
18481 the PowerPC, do not emit the period, since those systems do not use
18482 TOCs and the like. */
18483 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18485 /* For macho, check to see if we need a stub. */
18486 if (TARGET_MACHO)
18488 const char *name = XSTR (x, 0);
18489 #if TARGET_MACHO
18490 if (darwin_emit_branch_islands
18491 && MACHOPIC_INDIRECT
18492 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18493 name = machopic_indirection_name (x, /*stub_p=*/true);
18494 #endif
18495 assemble_name (file, name);
18497 else if (!DOT_SYMBOLS)
18498 assemble_name (file, XSTR (x, 0));
18499 else
18500 rs6000_output_function_entry (file, XSTR (x, 0));
18501 return;
18503 case 'Z':
18504 /* Like 'L', for last word of TImode/PTImode. */
18505 if (REG_P (x))
18506 fputs (reg_names[REGNO (x) + 3], file);
18507 else if (MEM_P (x))
18509 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18510 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18511 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18512 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18513 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18514 else
18515 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18516 if (small_data_operand (x, GET_MODE (x)))
18517 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18518 reg_names[SMALL_DATA_REG]);
18520 return;
18522 /* Print AltiVec or SPE memory operand. */
18523 case 'y':
18525 rtx tmp;
18527 gcc_assert (MEM_P (x));
18529 tmp = XEXP (x, 0);
18531 /* Ugly hack because %y is overloaded. */
18532 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18533 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18534 || GET_MODE (x) == TFmode
18535 || GET_MODE (x) == TImode
18536 || GET_MODE (x) == PTImode))
18538 /* Handle [reg]. */
18539 if (REG_P (tmp))
18541 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18542 break;
18544 /* Handle [reg+UIMM]. */
18545 else if (GET_CODE (tmp) == PLUS &&
18546 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18548 int x;
18550 gcc_assert (REG_P (XEXP (tmp, 0)));
18552 x = INTVAL (XEXP (tmp, 1));
18553 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18554 break;
18557 /* Fall through. Must be [reg+reg]. */
18559 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18560 && GET_CODE (tmp) == AND
18561 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18562 && INTVAL (XEXP (tmp, 1)) == -16)
18563 tmp = XEXP (tmp, 0);
18564 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18565 && GET_CODE (tmp) == PRE_MODIFY)
18566 tmp = XEXP (tmp, 1);
18567 if (REG_P (tmp))
18568 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18569 else
18571 if (GET_CODE (tmp) != PLUS
18572 || !REG_P (XEXP (tmp, 0))
18573 || !REG_P (XEXP (tmp, 1)))
18575 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18576 break;
18579 if (REGNO (XEXP (tmp, 0)) == 0)
18580 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18581 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18582 else
18583 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18584 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18586 break;
18589 case 0:
18590 if (REG_P (x))
18591 fprintf (file, "%s", reg_names[REGNO (x)]);
18592 else if (MEM_P (x))
18594 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18595 know the width from the mode. */
18596 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18597 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18598 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18599 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18600 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18601 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18602 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18603 output_address (XEXP (XEXP (x, 0), 1));
18604 else
18605 output_address (XEXP (x, 0));
18607 else
18609 if (toc_relative_expr_p (x, false))
18610 /* This hack along with a corresponding hack in
18611 rs6000_output_addr_const_extra arranges to output addends
18612 where the assembler expects to find them. eg.
18613 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18614 without this hack would be output as "x@toc+4". We
18615 want "x+4@toc". */
18616 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18617 else
18618 output_addr_const (file, x);
18620 return;
18622 case '&':
18623 if (const char *name = get_some_local_dynamic_name ())
18624 assemble_name (file, name);
18625 else
18626 output_operand_lossage ("'%%&' used without any "
18627 "local dynamic TLS references");
18628 return;
18630 default:
18631 output_operand_lossage ("invalid %%xn code");
18635 /* Print the address of an operand. */
18637 void
18638 print_operand_address (FILE *file, rtx x)
18640 if (REG_P (x))
18641 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
18642 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
18643 || GET_CODE (x) == LABEL_REF)
18645 output_addr_const (file, x);
18646 if (small_data_operand (x, GET_MODE (x)))
18647 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18648 reg_names[SMALL_DATA_REG]);
18649 else
18650 gcc_assert (!TARGET_TOC);
18652 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18653 && REG_P (XEXP (x, 1)))
18655 if (REGNO (XEXP (x, 0)) == 0)
18656 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
18657 reg_names[ REGNO (XEXP (x, 0)) ]);
18658 else
18659 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
18660 reg_names[ REGNO (XEXP (x, 1)) ]);
18662 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18663 && GET_CODE (XEXP (x, 1)) == CONST_INT)
18664 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
18665 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
18666 #if TARGET_MACHO
18667 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18668 && CONSTANT_P (XEXP (x, 1)))
18670 fprintf (file, "lo16(");
18671 output_addr_const (file, XEXP (x, 1));
18672 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18674 #endif
18675 #if TARGET_ELF
18676 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18677 && CONSTANT_P (XEXP (x, 1)))
18679 output_addr_const (file, XEXP (x, 1));
18680 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18682 #endif
18683 else if (toc_relative_expr_p (x, false))
18685 /* This hack along with a corresponding hack in
18686 rs6000_output_addr_const_extra arranges to output addends
18687 where the assembler expects to find them. eg.
18688 (lo_sum (reg 9)
18689 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
18690 without this hack would be output as "x@toc+8@l(9)". We
18691 want "x+8@toc@l(9)". */
18692 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18693 if (GET_CODE (x) == LO_SUM)
18694 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
18695 else
18696 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
18698 else
18699 gcc_unreachable ();
18702 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
18704 static bool
18705 rs6000_output_addr_const_extra (FILE *file, rtx x)
18707 if (GET_CODE (x) == UNSPEC)
18708 switch (XINT (x, 1))
18710 case UNSPEC_TOCREL:
18711 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
18712 && REG_P (XVECEXP (x, 0, 1))
18713 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
18714 output_addr_const (file, XVECEXP (x, 0, 0));
18715 if (x == tocrel_base && tocrel_offset != const0_rtx)
18717 if (INTVAL (tocrel_offset) >= 0)
18718 fprintf (file, "+");
18719 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
18721 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
18723 putc ('-', file);
18724 assemble_name (file, toc_label_name);
18726 else if (TARGET_ELF)
18727 fputs ("@toc", file);
18728 return true;
18730 #if TARGET_MACHO
18731 case UNSPEC_MACHOPIC_OFFSET:
18732 output_addr_const (file, XVECEXP (x, 0, 0));
18733 putc ('-', file);
18734 machopic_output_function_base_name (file);
18735 return true;
18736 #endif
18738 return false;
18741 /* Target hook for assembling integer objects. The PowerPC version has
18742 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
18743 is defined. It also needs to handle DI-mode objects on 64-bit
18744 targets. */
18746 static bool
18747 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
18749 #ifdef RELOCATABLE_NEEDS_FIXUP
18750 /* Special handling for SI values. */
18751 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
18753 static int recurse = 0;
18755 /* For -mrelocatable, we mark all addresses that need to be fixed up in
18756 the .fixup section. Since the TOC section is already relocated, we
18757 don't need to mark it here. We used to skip the text section, but it
18758 should never be valid for relocated addresses to be placed in the text
18759 section. */
18760 if (TARGET_RELOCATABLE
18761 && in_section != toc_section
18762 && !recurse
18763 && !CONST_SCALAR_INT_P (x)
18764 && CONSTANT_P (x))
18766 char buf[256];
18768 recurse = 1;
18769 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
18770 fixuplabelno++;
18771 ASM_OUTPUT_LABEL (asm_out_file, buf);
18772 fprintf (asm_out_file, "\t.long\t(");
18773 output_addr_const (asm_out_file, x);
18774 fprintf (asm_out_file, ")@fixup\n");
18775 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
18776 ASM_OUTPUT_ALIGN (asm_out_file, 2);
18777 fprintf (asm_out_file, "\t.long\t");
18778 assemble_name (asm_out_file, buf);
18779 fprintf (asm_out_file, "\n\t.previous\n");
18780 recurse = 0;
18781 return true;
18783 /* Remove initial .'s to turn a -mcall-aixdesc function
18784 address into the address of the descriptor, not the function
18785 itself. */
18786 else if (GET_CODE (x) == SYMBOL_REF
18787 && XSTR (x, 0)[0] == '.'
18788 && DEFAULT_ABI == ABI_AIX)
18790 const char *name = XSTR (x, 0);
18791 while (*name == '.')
18792 name++;
18794 fprintf (asm_out_file, "\t.long\t%s\n", name);
18795 return true;
18798 #endif /* RELOCATABLE_NEEDS_FIXUP */
18799 return default_assemble_integer (x, size, aligned_p);
18802 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
18803 /* Emit an assembler directive to set symbol visibility for DECL to
18804 VISIBILITY_TYPE. */
18806 static void
18807 rs6000_assemble_visibility (tree decl, int vis)
18809 if (TARGET_XCOFF)
18810 return;
18812 /* Functions need to have their entry point symbol visibility set as
18813 well as their descriptor symbol visibility. */
18814 if (DEFAULT_ABI == ABI_AIX
18815 && DOT_SYMBOLS
18816 && TREE_CODE (decl) == FUNCTION_DECL)
18818 static const char * const visibility_types[] = {
18819 NULL, "internal", "hidden", "protected"
18822 const char *name, *type;
18824 name = ((* targetm.strip_name_encoding)
18825 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
18826 type = visibility_types[vis];
18828 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
18829 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
18831 else
18832 default_assemble_visibility (decl, vis);
18834 #endif
18836 enum rtx_code
18837 rs6000_reverse_condition (enum machine_mode mode, enum rtx_code code)
18839 /* Reversal of FP compares takes care -- an ordered compare
18840 becomes an unordered compare and vice versa. */
18841 if (mode == CCFPmode
18842 && (!flag_finite_math_only
18843 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
18844 || code == UNEQ || code == LTGT))
18845 return reverse_condition_maybe_unordered (code);
18846 else
18847 return reverse_condition (code);
18850 /* Generate a compare for CODE. Return a brand-new rtx that
18851 represents the result of the compare. */
18853 static rtx
18854 rs6000_generate_compare (rtx cmp, enum machine_mode mode)
18856 enum machine_mode comp_mode;
18857 rtx compare_result;
18858 enum rtx_code code = GET_CODE (cmp);
18859 rtx op0 = XEXP (cmp, 0);
18860 rtx op1 = XEXP (cmp, 1);
18862 if (FLOAT_MODE_P (mode))
18863 comp_mode = CCFPmode;
18864 else if (code == GTU || code == LTU
18865 || code == GEU || code == LEU)
18866 comp_mode = CCUNSmode;
18867 else if ((code == EQ || code == NE)
18868 && unsigned_reg_p (op0)
18869 && (unsigned_reg_p (op1)
18870 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
18871 /* These are unsigned values, perhaps there will be a later
18872 ordering compare that can be shared with this one. */
18873 comp_mode = CCUNSmode;
18874 else
18875 comp_mode = CCmode;
18877 /* If we have an unsigned compare, make sure we don't have a signed value as
18878 an immediate. */
18879 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
18880 && INTVAL (op1) < 0)
18882 op0 = copy_rtx_if_shared (op0);
18883 op1 = force_reg (GET_MODE (op0), op1);
18884 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
18887 /* First, the compare. */
18888 compare_result = gen_reg_rtx (comp_mode);
18890 /* E500 FP compare instructions on the GPRs. Yuck! */
18891 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
18892 && FLOAT_MODE_P (mode))
18894 rtx cmp, or_result, compare_result2;
18895 enum machine_mode op_mode = GET_MODE (op0);
18896 bool reverse_p;
18898 if (op_mode == VOIDmode)
18899 op_mode = GET_MODE (op1);
18901 /* First reverse the condition codes that aren't directly supported. */
18902 switch (code)
18904 case NE:
18905 case UNLT:
18906 case UNLE:
18907 case UNGT:
18908 case UNGE:
18909 code = reverse_condition_maybe_unordered (code);
18910 reverse_p = true;
18911 break;
18913 case EQ:
18914 case LT:
18915 case LE:
18916 case GT:
18917 case GE:
18918 reverse_p = false;
18919 break;
18921 default:
18922 gcc_unreachable ();
18925 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
18926 This explains the following mess. */
18928 switch (code)
18930 case EQ:
18931 switch (op_mode)
18933 case SFmode:
18934 cmp = (flag_finite_math_only && !flag_trapping_math)
18935 ? gen_tstsfeq_gpr (compare_result, op0, op1)
18936 : gen_cmpsfeq_gpr (compare_result, op0, op1);
18937 break;
18939 case DFmode:
18940 cmp = (flag_finite_math_only && !flag_trapping_math)
18941 ? gen_tstdfeq_gpr (compare_result, op0, op1)
18942 : gen_cmpdfeq_gpr (compare_result, op0, op1);
18943 break;
18945 case TFmode:
18946 cmp = (flag_finite_math_only && !flag_trapping_math)
18947 ? gen_tsttfeq_gpr (compare_result, op0, op1)
18948 : gen_cmptfeq_gpr (compare_result, op0, op1);
18949 break;
18951 default:
18952 gcc_unreachable ();
18954 break;
18956 case GT:
18957 case GE:
18958 switch (op_mode)
18960 case SFmode:
18961 cmp = (flag_finite_math_only && !flag_trapping_math)
18962 ? gen_tstsfgt_gpr (compare_result, op0, op1)
18963 : gen_cmpsfgt_gpr (compare_result, op0, op1);
18964 break;
18966 case DFmode:
18967 cmp = (flag_finite_math_only && !flag_trapping_math)
18968 ? gen_tstdfgt_gpr (compare_result, op0, op1)
18969 : gen_cmpdfgt_gpr (compare_result, op0, op1);
18970 break;
18972 case TFmode:
18973 cmp = (flag_finite_math_only && !flag_trapping_math)
18974 ? gen_tsttfgt_gpr (compare_result, op0, op1)
18975 : gen_cmptfgt_gpr (compare_result, op0, op1);
18976 break;
18978 default:
18979 gcc_unreachable ();
18981 break;
18983 case LT:
18984 case LE:
18985 switch (op_mode)
18987 case SFmode:
18988 cmp = (flag_finite_math_only && !flag_trapping_math)
18989 ? gen_tstsflt_gpr (compare_result, op0, op1)
18990 : gen_cmpsflt_gpr (compare_result, op0, op1);
18991 break;
18993 case DFmode:
18994 cmp = (flag_finite_math_only && !flag_trapping_math)
18995 ? gen_tstdflt_gpr (compare_result, op0, op1)
18996 : gen_cmpdflt_gpr (compare_result, op0, op1);
18997 break;
18999 case TFmode:
19000 cmp = (flag_finite_math_only && !flag_trapping_math)
19001 ? gen_tsttflt_gpr (compare_result, op0, op1)
19002 : gen_cmptflt_gpr (compare_result, op0, op1);
19003 break;
19005 default:
19006 gcc_unreachable ();
19008 break;
19010 default:
19011 gcc_unreachable ();
19014 /* Synthesize LE and GE from LT/GT || EQ. */
19015 if (code == LE || code == GE)
19017 emit_insn (cmp);
19019 compare_result2 = gen_reg_rtx (CCFPmode);
19021 /* Do the EQ. */
19022 switch (op_mode)
19024 case SFmode:
19025 cmp = (flag_finite_math_only && !flag_trapping_math)
19026 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19027 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19028 break;
19030 case DFmode:
19031 cmp = (flag_finite_math_only && !flag_trapping_math)
19032 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19033 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19034 break;
19036 case TFmode:
19037 cmp = (flag_finite_math_only && !flag_trapping_math)
19038 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19039 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19040 break;
19042 default:
19043 gcc_unreachable ();
19046 emit_insn (cmp);
19048 /* OR them together. */
19049 or_result = gen_reg_rtx (CCFPmode);
19050 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19051 compare_result2);
19052 compare_result = or_result;
19055 code = reverse_p ? NE : EQ;
19057 emit_insn (cmp);
19059 else
19061 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19062 CLOBBERs to match cmptf_internal2 pattern. */
19063 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19064 && GET_MODE (op0) == TFmode
19065 && !TARGET_IEEEQUAD
19066 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19067 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19068 gen_rtvec (10,
19069 gen_rtx_SET (VOIDmode,
19070 compare_result,
19071 gen_rtx_COMPARE (comp_mode, op0, op1)),
19072 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19073 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19074 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19075 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19076 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19077 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19078 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19079 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19080 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19081 else if (GET_CODE (op1) == UNSPEC
19082 && XINT (op1, 1) == UNSPEC_SP_TEST)
19084 rtx op1b = XVECEXP (op1, 0, 0);
19085 comp_mode = CCEQmode;
19086 compare_result = gen_reg_rtx (CCEQmode);
19087 if (TARGET_64BIT)
19088 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19089 else
19090 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19092 else
19093 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19094 gen_rtx_COMPARE (comp_mode, op0, op1)));
19097 /* Some kinds of FP comparisons need an OR operation;
19098 under flag_finite_math_only we don't bother. */
19099 if (FLOAT_MODE_P (mode)
19100 && !flag_finite_math_only
19101 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19102 && (code == LE || code == GE
19103 || code == UNEQ || code == LTGT
19104 || code == UNGT || code == UNLT))
19106 enum rtx_code or1, or2;
19107 rtx or1_rtx, or2_rtx, compare2_rtx;
19108 rtx or_result = gen_reg_rtx (CCEQmode);
19110 switch (code)
19112 case LE: or1 = LT; or2 = EQ; break;
19113 case GE: or1 = GT; or2 = EQ; break;
19114 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19115 case LTGT: or1 = LT; or2 = GT; break;
19116 case UNGT: or1 = UNORDERED; or2 = GT; break;
19117 case UNLT: or1 = UNORDERED; or2 = LT; break;
19118 default: gcc_unreachable ();
19120 validate_condition_mode (or1, comp_mode);
19121 validate_condition_mode (or2, comp_mode);
19122 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19123 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19124 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19125 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19126 const_true_rtx);
19127 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19129 compare_result = or_result;
19130 code = EQ;
19133 validate_condition_mode (code, GET_MODE (compare_result));
19135 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19139 /* Emit the RTL for an sISEL pattern. */
19141 void
19142 rs6000_emit_sISEL (enum machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19144 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19147 void
19148 rs6000_emit_sCOND (enum machine_mode mode, rtx operands[])
19150 rtx condition_rtx;
19151 enum machine_mode op_mode;
19152 enum rtx_code cond_code;
19153 rtx result = operands[0];
19155 if (TARGET_ISEL && (mode == SImode || mode == DImode))
19157 rs6000_emit_sISEL (mode, operands);
19158 return;
19161 condition_rtx = rs6000_generate_compare (operands[1], mode);
19162 cond_code = GET_CODE (condition_rtx);
19164 if (FLOAT_MODE_P (mode)
19165 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19167 rtx t;
19169 PUT_MODE (condition_rtx, SImode);
19170 t = XEXP (condition_rtx, 0);
19172 gcc_assert (cond_code == NE || cond_code == EQ);
19174 if (cond_code == NE)
19175 emit_insn (gen_e500_flip_gt_bit (t, t));
19177 emit_insn (gen_move_from_CR_gt_bit (result, t));
19178 return;
19181 if (cond_code == NE
19182 || cond_code == GE || cond_code == LE
19183 || cond_code == GEU || cond_code == LEU
19184 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19186 rtx not_result = gen_reg_rtx (CCEQmode);
19187 rtx not_op, rev_cond_rtx;
19188 enum machine_mode cc_mode;
19190 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19192 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19193 SImode, XEXP (condition_rtx, 0), const0_rtx);
19194 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19195 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19196 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19199 op_mode = GET_MODE (XEXP (operands[1], 0));
19200 if (op_mode == VOIDmode)
19201 op_mode = GET_MODE (XEXP (operands[1], 1));
19203 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19205 PUT_MODE (condition_rtx, DImode);
19206 convert_move (result, condition_rtx, 0);
19208 else
19210 PUT_MODE (condition_rtx, SImode);
19211 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19215 /* Emit a branch of kind CODE to location LOC. */
19217 void
19218 rs6000_emit_cbranch (enum machine_mode mode, rtx operands[])
19220 rtx condition_rtx, loc_ref;
19222 condition_rtx = rs6000_generate_compare (operands[0], mode);
19223 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19224 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19225 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19226 loc_ref, pc_rtx)));
19229 /* Return the string to output a conditional branch to LABEL, which is
19230 the operand template of the label, or NULL if the branch is really a
19231 conditional return.
19233 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19234 condition code register and its mode specifies what kind of
19235 comparison we made.
19237 REVERSED is nonzero if we should reverse the sense of the comparison.
19239 INSN is the insn. */
19241 char *
19242 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19244 static char string[64];
19245 enum rtx_code code = GET_CODE (op);
19246 rtx cc_reg = XEXP (op, 0);
19247 enum machine_mode mode = GET_MODE (cc_reg);
19248 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19249 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19250 int really_reversed = reversed ^ need_longbranch;
19251 char *s = string;
19252 const char *ccode;
19253 const char *pred;
19254 rtx note;
19256 validate_condition_mode (code, mode);
19258 /* Work out which way this really branches. We could use
19259 reverse_condition_maybe_unordered here always but this
19260 makes the resulting assembler clearer. */
19261 if (really_reversed)
19263 /* Reversal of FP compares takes care -- an ordered compare
19264 becomes an unordered compare and vice versa. */
19265 if (mode == CCFPmode)
19266 code = reverse_condition_maybe_unordered (code);
19267 else
19268 code = reverse_condition (code);
19271 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19273 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19274 to the GT bit. */
19275 switch (code)
19277 case EQ:
19278 /* Opposite of GT. */
19279 code = GT;
19280 break;
19282 case NE:
19283 code = UNLE;
19284 break;
19286 default:
19287 gcc_unreachable ();
19291 switch (code)
19293 /* Not all of these are actually distinct opcodes, but
19294 we distinguish them for clarity of the resulting assembler. */
19295 case NE: case LTGT:
19296 ccode = "ne"; break;
19297 case EQ: case UNEQ:
19298 ccode = "eq"; break;
19299 case GE: case GEU:
19300 ccode = "ge"; break;
19301 case GT: case GTU: case UNGT:
19302 ccode = "gt"; break;
19303 case LE: case LEU:
19304 ccode = "le"; break;
19305 case LT: case LTU: case UNLT:
19306 ccode = "lt"; break;
19307 case UNORDERED: ccode = "un"; break;
19308 case ORDERED: ccode = "nu"; break;
19309 case UNGE: ccode = "nl"; break;
19310 case UNLE: ccode = "ng"; break;
19311 default:
19312 gcc_unreachable ();
19315 /* Maybe we have a guess as to how likely the branch is. */
19316 pred = "";
19317 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19318 if (note != NULL_RTX)
19320 /* PROB is the difference from 50%. */
19321 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19323 /* Only hint for highly probable/improbable branches on newer
19324 cpus as static prediction overrides processor dynamic
19325 prediction. For older cpus we may as well always hint, but
19326 assume not taken for branches that are very close to 50% as a
19327 mispredicted taken branch is more expensive than a
19328 mispredicted not-taken branch. */
19329 if (rs6000_always_hint
19330 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19331 && br_prob_note_reliable_p (note)))
19333 if (abs (prob) > REG_BR_PROB_BASE / 20
19334 && ((prob > 0) ^ need_longbranch))
19335 pred = "+";
19336 else
19337 pred = "-";
19341 if (label == NULL)
19342 s += sprintf (s, "b%slr%s ", ccode, pred);
19343 else
19344 s += sprintf (s, "b%s%s ", ccode, pred);
19346 /* We need to escape any '%' characters in the reg_names string.
19347 Assume they'd only be the first character.... */
19348 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19349 *s++ = '%';
19350 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19352 if (label != NULL)
19354 /* If the branch distance was too far, we may have to use an
19355 unconditional branch to go the distance. */
19356 if (need_longbranch)
19357 s += sprintf (s, ",$+8\n\tb %s", label);
19358 else
19359 s += sprintf (s, ",%s", label);
19362 return string;
19365 /* Return the string to flip the GT bit on a CR. */
19366 char *
19367 output_e500_flip_gt_bit (rtx dst, rtx src)
19369 static char string[64];
19370 int a, b;
19372 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19373 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19375 /* GT bit. */
19376 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19377 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19379 sprintf (string, "crnot %d,%d", a, b);
19380 return string;
19383 /* Return insn for VSX or Altivec comparisons. */
19385 static rtx
19386 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19388 rtx mask;
19389 enum machine_mode mode = GET_MODE (op0);
19391 switch (code)
19393 default:
19394 break;
19396 case GE:
19397 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19398 return NULL_RTX;
19400 case EQ:
19401 case GT:
19402 case GTU:
19403 case ORDERED:
19404 case UNORDERED:
19405 case UNEQ:
19406 case LTGT:
19407 mask = gen_reg_rtx (mode);
19408 emit_insn (gen_rtx_SET (VOIDmode,
19409 mask,
19410 gen_rtx_fmt_ee (code, mode, op0, op1)));
19411 return mask;
19414 return NULL_RTX;
19417 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19418 DMODE is expected destination mode. This is a recursive function. */
19420 static rtx
19421 rs6000_emit_vector_compare (enum rtx_code rcode,
19422 rtx op0, rtx op1,
19423 enum machine_mode dmode)
19425 rtx mask;
19426 bool swap_operands = false;
19427 bool try_again = false;
19429 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19430 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19432 /* See if the comparison works as is. */
19433 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19434 if (mask)
19435 return mask;
19437 switch (rcode)
19439 case LT:
19440 rcode = GT;
19441 swap_operands = true;
19442 try_again = true;
19443 break;
19444 case LTU:
19445 rcode = GTU;
19446 swap_operands = true;
19447 try_again = true;
19448 break;
19449 case NE:
19450 case UNLE:
19451 case UNLT:
19452 case UNGE:
19453 case UNGT:
19454 /* Invert condition and try again.
19455 e.g., A != B becomes ~(A==B). */
19457 enum rtx_code rev_code;
19458 enum insn_code nor_code;
19459 rtx mask2;
19461 rev_code = reverse_condition_maybe_unordered (rcode);
19462 if (rev_code == UNKNOWN)
19463 return NULL_RTX;
19465 nor_code = optab_handler (one_cmpl_optab, dmode);
19466 if (nor_code == CODE_FOR_nothing)
19467 return NULL_RTX;
19469 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19470 if (!mask2)
19471 return NULL_RTX;
19473 mask = gen_reg_rtx (dmode);
19474 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19475 return mask;
19477 break;
19478 case GE:
19479 case GEU:
19480 case LE:
19481 case LEU:
19482 /* Try GT/GTU/LT/LTU OR EQ */
19484 rtx c_rtx, eq_rtx;
19485 enum insn_code ior_code;
19486 enum rtx_code new_code;
19488 switch (rcode)
19490 case GE:
19491 new_code = GT;
19492 break;
19494 case GEU:
19495 new_code = GTU;
19496 break;
19498 case LE:
19499 new_code = LT;
19500 break;
19502 case LEU:
19503 new_code = LTU;
19504 break;
19506 default:
19507 gcc_unreachable ();
19510 ior_code = optab_handler (ior_optab, dmode);
19511 if (ior_code == CODE_FOR_nothing)
19512 return NULL_RTX;
19514 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19515 if (!c_rtx)
19516 return NULL_RTX;
19518 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19519 if (!eq_rtx)
19520 return NULL_RTX;
19522 mask = gen_reg_rtx (dmode);
19523 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19524 return mask;
19526 break;
19527 default:
19528 return NULL_RTX;
19531 if (try_again)
19533 if (swap_operands)
19535 rtx tmp;
19536 tmp = op0;
19537 op0 = op1;
19538 op1 = tmp;
19541 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19542 if (mask)
19543 return mask;
19546 /* You only get two chances. */
19547 return NULL_RTX;
19550 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19551 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19552 operands for the relation operation COND. */
19555 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19556 rtx cond, rtx cc_op0, rtx cc_op1)
19558 enum machine_mode dest_mode = GET_MODE (dest);
19559 enum machine_mode mask_mode = GET_MODE (cc_op0);
19560 enum rtx_code rcode = GET_CODE (cond);
19561 enum machine_mode cc_mode = CCmode;
19562 rtx mask;
19563 rtx cond2;
19564 rtx tmp;
19565 bool invert_move = false;
19567 if (VECTOR_UNIT_NONE_P (dest_mode))
19568 return 0;
19570 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19571 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19573 switch (rcode)
19575 /* Swap operands if we can, and fall back to doing the operation as
19576 specified, and doing a NOR to invert the test. */
19577 case NE:
19578 case UNLE:
19579 case UNLT:
19580 case UNGE:
19581 case UNGT:
19582 /* Invert condition and try again.
19583 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19584 invert_move = true;
19585 rcode = reverse_condition_maybe_unordered (rcode);
19586 if (rcode == UNKNOWN)
19587 return 0;
19588 break;
19590 /* Mark unsigned tests with CCUNSmode. */
19591 case GTU:
19592 case GEU:
19593 case LTU:
19594 case LEU:
19595 cc_mode = CCUNSmode;
19596 break;
19598 default:
19599 break;
19602 /* Get the vector mask for the given relational operations. */
19603 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19605 if (!mask)
19606 return 0;
19608 if (invert_move)
19610 tmp = op_true;
19611 op_true = op_false;
19612 op_false = tmp;
19615 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19616 CONST0_RTX (dest_mode));
19617 emit_insn (gen_rtx_SET (VOIDmode,
19618 dest,
19619 gen_rtx_IF_THEN_ELSE (dest_mode,
19620 cond2,
19621 op_true,
19622 op_false)));
19623 return 1;
19626 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
19627 operands of the last comparison is nonzero/true, FALSE_COND if it
19628 is zero/false. Return 0 if the hardware has no such operation. */
19631 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19633 enum rtx_code code = GET_CODE (op);
19634 rtx op0 = XEXP (op, 0);
19635 rtx op1 = XEXP (op, 1);
19636 REAL_VALUE_TYPE c1;
19637 enum machine_mode compare_mode = GET_MODE (op0);
19638 enum machine_mode result_mode = GET_MODE (dest);
19639 rtx temp;
19640 bool is_against_zero;
19642 /* These modes should always match. */
19643 if (GET_MODE (op1) != compare_mode
19644 /* In the isel case however, we can use a compare immediate, so
19645 op1 may be a small constant. */
19646 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
19647 return 0;
19648 if (GET_MODE (true_cond) != result_mode)
19649 return 0;
19650 if (GET_MODE (false_cond) != result_mode)
19651 return 0;
19653 /* Don't allow using floating point comparisons for integer results for
19654 now. */
19655 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
19656 return 0;
19658 /* First, work out if the hardware can do this at all, or
19659 if it's too slow.... */
19660 if (!FLOAT_MODE_P (compare_mode))
19662 if (TARGET_ISEL)
19663 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
19664 return 0;
19666 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
19667 && SCALAR_FLOAT_MODE_P (compare_mode))
19668 return 0;
19670 is_against_zero = op1 == CONST0_RTX (compare_mode);
19672 /* A floating-point subtract might overflow, underflow, or produce
19673 an inexact result, thus changing the floating-point flags, so it
19674 can't be generated if we care about that. It's safe if one side
19675 of the construct is zero, since then no subtract will be
19676 generated. */
19677 if (SCALAR_FLOAT_MODE_P (compare_mode)
19678 && flag_trapping_math && ! is_against_zero)
19679 return 0;
19681 /* Eliminate half of the comparisons by switching operands, this
19682 makes the remaining code simpler. */
19683 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
19684 || code == LTGT || code == LT || code == UNLE)
19686 code = reverse_condition_maybe_unordered (code);
19687 temp = true_cond;
19688 true_cond = false_cond;
19689 false_cond = temp;
19692 /* UNEQ and LTGT take four instructions for a comparison with zero,
19693 it'll probably be faster to use a branch here too. */
19694 if (code == UNEQ && HONOR_NANS (compare_mode))
19695 return 0;
19697 if (GET_CODE (op1) == CONST_DOUBLE)
19698 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
19700 /* We're going to try to implement comparisons by performing
19701 a subtract, then comparing against zero. Unfortunately,
19702 Inf - Inf is NaN which is not zero, and so if we don't
19703 know that the operand is finite and the comparison
19704 would treat EQ different to UNORDERED, we can't do it. */
19705 if (HONOR_INFINITIES (compare_mode)
19706 && code != GT && code != UNGE
19707 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
19708 /* Constructs of the form (a OP b ? a : b) are safe. */
19709 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
19710 || (! rtx_equal_p (op0, true_cond)
19711 && ! rtx_equal_p (op1, true_cond))))
19712 return 0;
19714 /* At this point we know we can use fsel. */
19716 /* Reduce the comparison to a comparison against zero. */
19717 if (! is_against_zero)
19719 temp = gen_reg_rtx (compare_mode);
19720 emit_insn (gen_rtx_SET (VOIDmode, temp,
19721 gen_rtx_MINUS (compare_mode, op0, op1)));
19722 op0 = temp;
19723 op1 = CONST0_RTX (compare_mode);
19726 /* If we don't care about NaNs we can reduce some of the comparisons
19727 down to faster ones. */
19728 if (! HONOR_NANS (compare_mode))
19729 switch (code)
19731 case GT:
19732 code = LE;
19733 temp = true_cond;
19734 true_cond = false_cond;
19735 false_cond = temp;
19736 break;
19737 case UNGE:
19738 code = GE;
19739 break;
19740 case UNEQ:
19741 code = EQ;
19742 break;
19743 default:
19744 break;
19747 /* Now, reduce everything down to a GE. */
19748 switch (code)
19750 case GE:
19751 break;
19753 case LE:
19754 temp = gen_reg_rtx (compare_mode);
19755 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
19756 op0 = temp;
19757 break;
19759 case ORDERED:
19760 temp = gen_reg_rtx (compare_mode);
19761 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
19762 op0 = temp;
19763 break;
19765 case EQ:
19766 temp = gen_reg_rtx (compare_mode);
19767 emit_insn (gen_rtx_SET (VOIDmode, temp,
19768 gen_rtx_NEG (compare_mode,
19769 gen_rtx_ABS (compare_mode, op0))));
19770 op0 = temp;
19771 break;
19773 case UNGE:
19774 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
19775 temp = gen_reg_rtx (result_mode);
19776 emit_insn (gen_rtx_SET (VOIDmode, temp,
19777 gen_rtx_IF_THEN_ELSE (result_mode,
19778 gen_rtx_GE (VOIDmode,
19779 op0, op1),
19780 true_cond, false_cond)));
19781 false_cond = true_cond;
19782 true_cond = temp;
19784 temp = gen_reg_rtx (compare_mode);
19785 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
19786 op0 = temp;
19787 break;
19789 case GT:
19790 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
19791 temp = gen_reg_rtx (result_mode);
19792 emit_insn (gen_rtx_SET (VOIDmode, temp,
19793 gen_rtx_IF_THEN_ELSE (result_mode,
19794 gen_rtx_GE (VOIDmode,
19795 op0, op1),
19796 true_cond, false_cond)));
19797 true_cond = false_cond;
19798 false_cond = temp;
19800 temp = gen_reg_rtx (compare_mode);
19801 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
19802 op0 = temp;
19803 break;
19805 default:
19806 gcc_unreachable ();
19809 emit_insn (gen_rtx_SET (VOIDmode, dest,
19810 gen_rtx_IF_THEN_ELSE (result_mode,
19811 gen_rtx_GE (VOIDmode,
19812 op0, op1),
19813 true_cond, false_cond)));
19814 return 1;
19817 /* Same as above, but for ints (isel). */
19819 static int
19820 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19822 rtx condition_rtx, cr;
19823 enum machine_mode mode = GET_MODE (dest);
19824 enum rtx_code cond_code;
19825 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
19826 bool signedp;
19828 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
19829 return 0;
19831 /* We still have to do the compare, because isel doesn't do a
19832 compare, it just looks at the CRx bits set by a previous compare
19833 instruction. */
19834 condition_rtx = rs6000_generate_compare (op, mode);
19835 cond_code = GET_CODE (condition_rtx);
19836 cr = XEXP (condition_rtx, 0);
19837 signedp = GET_MODE (cr) == CCmode;
19839 isel_func = (mode == SImode
19840 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
19841 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
19843 switch (cond_code)
19845 case LT: case GT: case LTU: case GTU: case EQ:
19846 /* isel handles these directly. */
19847 break;
19849 default:
19850 /* We need to swap the sense of the comparison. */
19852 rtx t = true_cond;
19853 true_cond = false_cond;
19854 false_cond = t;
19855 PUT_CODE (condition_rtx, reverse_condition (cond_code));
19857 break;
19860 false_cond = force_reg (mode, false_cond);
19861 if (true_cond != const0_rtx)
19862 true_cond = force_reg (mode, true_cond);
19864 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
19866 return 1;
19869 const char *
19870 output_isel (rtx *operands)
19872 enum rtx_code code;
19874 code = GET_CODE (operands[1]);
19876 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
19878 gcc_assert (GET_CODE (operands[2]) == REG
19879 && GET_CODE (operands[3]) == REG);
19880 PUT_CODE (operands[1], reverse_condition (code));
19881 return "isel %0,%3,%2,%j1";
19884 return "isel %0,%2,%3,%j1";
19887 void
19888 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
19890 enum machine_mode mode = GET_MODE (op0);
19891 enum rtx_code c;
19892 rtx target;
19894 /* VSX/altivec have direct min/max insns. */
19895 if ((code == SMAX || code == SMIN)
19896 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
19897 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
19899 emit_insn (gen_rtx_SET (VOIDmode,
19900 dest,
19901 gen_rtx_fmt_ee (code, mode, op0, op1)));
19902 return;
19905 if (code == SMAX || code == SMIN)
19906 c = GE;
19907 else
19908 c = GEU;
19910 if (code == SMAX || code == UMAX)
19911 target = emit_conditional_move (dest, c, op0, op1, mode,
19912 op0, op1, mode, 0);
19913 else
19914 target = emit_conditional_move (dest, c, op0, op1, mode,
19915 op1, op0, mode, 0);
19916 gcc_assert (target);
19917 if (target != dest)
19918 emit_move_insn (dest, target);
19921 /* A subroutine of the atomic operation splitters. Jump to LABEL if
19922 COND is true. Mark the jump as unlikely to be taken. */
19924 static void
19925 emit_unlikely_jump (rtx cond, rtx label)
19927 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
19928 rtx x;
19930 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
19931 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
19932 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
19935 /* A subroutine of the atomic operation splitters. Emit a load-locked
19936 instruction in MODE. For QI/HImode, possibly use a pattern than includes
19937 the zero_extend operation. */
19939 static void
19940 emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
19942 rtx (*fn) (rtx, rtx) = NULL;
19944 switch (mode)
19946 case QImode:
19947 fn = gen_load_lockedqi;
19948 break;
19949 case HImode:
19950 fn = gen_load_lockedhi;
19951 break;
19952 case SImode:
19953 if (GET_MODE (mem) == QImode)
19954 fn = gen_load_lockedqi_si;
19955 else if (GET_MODE (mem) == HImode)
19956 fn = gen_load_lockedhi_si;
19957 else
19958 fn = gen_load_lockedsi;
19959 break;
19960 case DImode:
19961 fn = gen_load_lockeddi;
19962 break;
19963 case TImode:
19964 fn = gen_load_lockedti;
19965 break;
19966 default:
19967 gcc_unreachable ();
19969 emit_insn (fn (reg, mem));
19972 /* A subroutine of the atomic operation splitters. Emit a store-conditional
19973 instruction in MODE. */
19975 static void
19976 emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
19978 rtx (*fn) (rtx, rtx, rtx) = NULL;
19980 switch (mode)
19982 case QImode:
19983 fn = gen_store_conditionalqi;
19984 break;
19985 case HImode:
19986 fn = gen_store_conditionalhi;
19987 break;
19988 case SImode:
19989 fn = gen_store_conditionalsi;
19990 break;
19991 case DImode:
19992 fn = gen_store_conditionaldi;
19993 break;
19994 case TImode:
19995 fn = gen_store_conditionalti;
19996 break;
19997 default:
19998 gcc_unreachable ();
20001 /* Emit sync before stwcx. to address PPC405 Erratum. */
20002 if (PPC405_ERRATUM77)
20003 emit_insn (gen_hwsync ());
20005 emit_insn (fn (res, mem, val));
20008 /* Expand barriers before and after a load_locked/store_cond sequence. */
20010 static rtx
20011 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20013 rtx addr = XEXP (mem, 0);
20014 int strict_p = (reload_in_progress || reload_completed);
20016 if (!legitimate_indirect_address_p (addr, strict_p)
20017 && !legitimate_indexed_address_p (addr, strict_p))
20019 addr = force_reg (Pmode, addr);
20020 mem = replace_equiv_address_nv (mem, addr);
20023 switch (model)
20025 case MEMMODEL_RELAXED:
20026 case MEMMODEL_CONSUME:
20027 case MEMMODEL_ACQUIRE:
20028 break;
20029 case MEMMODEL_RELEASE:
20030 case MEMMODEL_ACQ_REL:
20031 emit_insn (gen_lwsync ());
20032 break;
20033 case MEMMODEL_SEQ_CST:
20034 emit_insn (gen_hwsync ());
20035 break;
20036 default:
20037 gcc_unreachable ();
20039 return mem;
20042 static void
20043 rs6000_post_atomic_barrier (enum memmodel model)
20045 switch (model)
20047 case MEMMODEL_RELAXED:
20048 case MEMMODEL_CONSUME:
20049 case MEMMODEL_RELEASE:
20050 break;
20051 case MEMMODEL_ACQUIRE:
20052 case MEMMODEL_ACQ_REL:
20053 case MEMMODEL_SEQ_CST:
20054 emit_insn (gen_isync ());
20055 break;
20056 default:
20057 gcc_unreachable ();
20061 /* A subroutine of the various atomic expanders. For sub-word operations,
20062 we must adjust things to operate on SImode. Given the original MEM,
20063 return a new aligned memory. Also build and return the quantities by
20064 which to shift and mask. */
20066 static rtx
20067 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20069 rtx addr, align, shift, mask, mem;
20070 HOST_WIDE_INT shift_mask;
20071 enum machine_mode mode = GET_MODE (orig_mem);
20073 /* For smaller modes, we have to implement this via SImode. */
20074 shift_mask = (mode == QImode ? 0x18 : 0x10);
20076 addr = XEXP (orig_mem, 0);
20077 addr = force_reg (GET_MODE (addr), addr);
20079 /* Aligned memory containing subword. Generate a new memory. We
20080 do not want any of the existing MEM_ATTR data, as we're now
20081 accessing memory outside the original object. */
20082 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20083 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20084 mem = gen_rtx_MEM (SImode, align);
20085 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20086 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20087 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20089 /* Shift amount for subword relative to aligned word. */
20090 shift = gen_reg_rtx (SImode);
20091 addr = gen_lowpart (SImode, addr);
20092 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20093 if (BYTES_BIG_ENDIAN)
20094 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20095 shift, 1, OPTAB_LIB_WIDEN);
20096 *pshift = shift;
20098 /* Mask for insertion. */
20099 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20100 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20101 *pmask = mask;
20103 return mem;
20106 /* A subroutine of the various atomic expanders. For sub-word operands,
20107 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20109 static rtx
20110 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20112 rtx x;
20114 x = gen_reg_rtx (SImode);
20115 emit_insn (gen_rtx_SET (VOIDmode, x,
20116 gen_rtx_AND (SImode,
20117 gen_rtx_NOT (SImode, mask),
20118 oldval)));
20120 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20122 return x;
20125 /* A subroutine of the various atomic expanders. For sub-word operands,
20126 extract WIDE to NARROW via SHIFT. */
20128 static void
20129 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20131 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20132 wide, 1, OPTAB_LIB_WIDEN);
20133 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20136 /* Expand an atomic compare and swap operation. */
20138 void
20139 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20141 rtx boolval, retval, mem, oldval, newval, cond;
20142 rtx label1, label2, x, mask, shift;
20143 enum machine_mode mode, orig_mode;
20144 enum memmodel mod_s, mod_f;
20145 bool is_weak;
20147 boolval = operands[0];
20148 retval = operands[1];
20149 mem = operands[2];
20150 oldval = operands[3];
20151 newval = operands[4];
20152 is_weak = (INTVAL (operands[5]) != 0);
20153 mod_s = (enum memmodel) INTVAL (operands[6]);
20154 mod_f = (enum memmodel) INTVAL (operands[7]);
20155 orig_mode = mode = GET_MODE (mem);
20157 mask = shift = NULL_RTX;
20158 if (mode == QImode || mode == HImode)
20160 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20161 lwarx and shift/mask operations. With power8, we need to do the
20162 comparison in SImode, but the store is still done in QI/HImode. */
20163 oldval = convert_modes (SImode, mode, oldval, 1);
20165 if (!TARGET_SYNC_HI_QI)
20167 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20169 /* Shift and mask OLDVAL into position with the word. */
20170 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20171 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20173 /* Shift and mask NEWVAL into position within the word. */
20174 newval = convert_modes (SImode, mode, newval, 1);
20175 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20176 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20179 /* Prepare to adjust the return value. */
20180 retval = gen_reg_rtx (SImode);
20181 mode = SImode;
20183 else if (reg_overlap_mentioned_p (retval, oldval))
20184 oldval = copy_to_reg (oldval);
20186 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20188 label1 = NULL_RTX;
20189 if (!is_weak)
20191 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20192 emit_label (XEXP (label1, 0));
20194 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20196 emit_load_locked (mode, retval, mem);
20198 x = retval;
20199 if (mask)
20201 x = expand_simple_binop (SImode, AND, retval, mask,
20202 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20205 cond = gen_reg_rtx (CCmode);
20206 /* If we have TImode, synthesize a comparison. */
20207 if (mode != TImode)
20208 x = gen_rtx_COMPARE (CCmode, x, oldval);
20209 else
20211 rtx xor1_result = gen_reg_rtx (DImode);
20212 rtx xor2_result = gen_reg_rtx (DImode);
20213 rtx or_result = gen_reg_rtx (DImode);
20214 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20215 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20216 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20217 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20219 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20220 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20221 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20222 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20225 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20227 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20228 emit_unlikely_jump (x, label2);
20230 x = newval;
20231 if (mask)
20232 x = rs6000_mask_atomic_subword (retval, newval, mask);
20234 emit_store_conditional (orig_mode, cond, mem, x);
20236 if (!is_weak)
20238 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20239 emit_unlikely_jump (x, label1);
20242 if (mod_f != MEMMODEL_RELAXED)
20243 emit_label (XEXP (label2, 0));
20245 rs6000_post_atomic_barrier (mod_s);
20247 if (mod_f == MEMMODEL_RELAXED)
20248 emit_label (XEXP (label2, 0));
20250 if (shift)
20251 rs6000_finish_atomic_subword (operands[1], retval, shift);
20252 else if (mode != GET_MODE (operands[1]))
20253 convert_move (operands[1], retval, 1);
20255 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20256 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20257 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20260 /* Expand an atomic exchange operation. */
20262 void
20263 rs6000_expand_atomic_exchange (rtx operands[])
20265 rtx retval, mem, val, cond;
20266 enum machine_mode mode;
20267 enum memmodel model;
20268 rtx label, x, mask, shift;
20270 retval = operands[0];
20271 mem = operands[1];
20272 val = operands[2];
20273 model = (enum memmodel) INTVAL (operands[3]);
20274 mode = GET_MODE (mem);
20276 mask = shift = NULL_RTX;
20277 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20279 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20281 /* Shift and mask VAL into position with the word. */
20282 val = convert_modes (SImode, mode, val, 1);
20283 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20284 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20286 /* Prepare to adjust the return value. */
20287 retval = gen_reg_rtx (SImode);
20288 mode = SImode;
20291 mem = rs6000_pre_atomic_barrier (mem, model);
20293 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20294 emit_label (XEXP (label, 0));
20296 emit_load_locked (mode, retval, mem);
20298 x = val;
20299 if (mask)
20300 x = rs6000_mask_atomic_subword (retval, val, mask);
20302 cond = gen_reg_rtx (CCmode);
20303 emit_store_conditional (mode, cond, mem, x);
20305 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20306 emit_unlikely_jump (x, label);
20308 rs6000_post_atomic_barrier (model);
20310 if (shift)
20311 rs6000_finish_atomic_subword (operands[0], retval, shift);
20314 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20315 to perform. MEM is the memory on which to operate. VAL is the second
20316 operand of the binary operator. BEFORE and AFTER are optional locations to
20317 return the value of MEM either before of after the operation. MODEL_RTX
20318 is a CONST_INT containing the memory model to use. */
20320 void
20321 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20322 rtx orig_before, rtx orig_after, rtx model_rtx)
20324 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20325 enum machine_mode mode = GET_MODE (mem);
20326 enum machine_mode store_mode = mode;
20327 rtx label, x, cond, mask, shift;
20328 rtx before = orig_before, after = orig_after;
20330 mask = shift = NULL_RTX;
20331 /* On power8, we want to use SImode for the operation. On previous systems,
20332 use the operation in a subword and shift/mask to get the proper byte or
20333 halfword. */
20334 if (mode == QImode || mode == HImode)
20336 if (TARGET_SYNC_HI_QI)
20338 val = convert_modes (SImode, mode, val, 1);
20340 /* Prepare to adjust the return value. */
20341 before = gen_reg_rtx (SImode);
20342 if (after)
20343 after = gen_reg_rtx (SImode);
20344 mode = SImode;
20346 else
20348 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20350 /* Shift and mask VAL into position with the word. */
20351 val = convert_modes (SImode, mode, val, 1);
20352 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20353 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20355 switch (code)
20357 case IOR:
20358 case XOR:
20359 /* We've already zero-extended VAL. That is sufficient to
20360 make certain that it does not affect other bits. */
20361 mask = NULL;
20362 break;
20364 case AND:
20365 /* If we make certain that all of the other bits in VAL are
20366 set, that will be sufficient to not affect other bits. */
20367 x = gen_rtx_NOT (SImode, mask);
20368 x = gen_rtx_IOR (SImode, x, val);
20369 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20370 mask = NULL;
20371 break;
20373 case NOT:
20374 case PLUS:
20375 case MINUS:
20376 /* These will all affect bits outside the field and need
20377 adjustment via MASK within the loop. */
20378 break;
20380 default:
20381 gcc_unreachable ();
20384 /* Prepare to adjust the return value. */
20385 before = gen_reg_rtx (SImode);
20386 if (after)
20387 after = gen_reg_rtx (SImode);
20388 store_mode = mode = SImode;
20392 mem = rs6000_pre_atomic_barrier (mem, model);
20394 label = gen_label_rtx ();
20395 emit_label (label);
20396 label = gen_rtx_LABEL_REF (VOIDmode, label);
20398 if (before == NULL_RTX)
20399 before = gen_reg_rtx (mode);
20401 emit_load_locked (mode, before, mem);
20403 if (code == NOT)
20405 x = expand_simple_binop (mode, AND, before, val,
20406 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20407 after = expand_simple_unop (mode, NOT, x, after, 1);
20409 else
20411 after = expand_simple_binop (mode, code, before, val,
20412 after, 1, OPTAB_LIB_WIDEN);
20415 x = after;
20416 if (mask)
20418 x = expand_simple_binop (SImode, AND, after, mask,
20419 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20420 x = rs6000_mask_atomic_subword (before, x, mask);
20422 else if (store_mode != mode)
20423 x = convert_modes (store_mode, mode, x, 1);
20425 cond = gen_reg_rtx (CCmode);
20426 emit_store_conditional (store_mode, cond, mem, x);
20428 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20429 emit_unlikely_jump (x, label);
20431 rs6000_post_atomic_barrier (model);
20433 if (shift)
20435 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20436 then do the calcuations in a SImode register. */
20437 if (orig_before)
20438 rs6000_finish_atomic_subword (orig_before, before, shift);
20439 if (orig_after)
20440 rs6000_finish_atomic_subword (orig_after, after, shift);
20442 else if (store_mode != mode)
20444 /* QImode/HImode on machines with lbarx/lharx where we do the native
20445 operation and then do the calcuations in a SImode register. */
20446 if (orig_before)
20447 convert_move (orig_before, before, 1);
20448 if (orig_after)
20449 convert_move (orig_after, after, 1);
20451 else if (orig_after && after != orig_after)
20452 emit_move_insn (orig_after, after);
20455 /* Emit instructions to move SRC to DST. Called by splitters for
20456 multi-register moves. It will emit at most one instruction for
20457 each register that is accessed; that is, it won't emit li/lis pairs
20458 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20459 register. */
20461 void
20462 rs6000_split_multireg_move (rtx dst, rtx src)
20464 /* The register number of the first register being moved. */
20465 int reg;
20466 /* The mode that is to be moved. */
20467 enum machine_mode mode;
20468 /* The mode that the move is being done in, and its size. */
20469 enum machine_mode reg_mode;
20470 int reg_mode_size;
20471 /* The number of registers that will be moved. */
20472 int nregs;
20474 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20475 mode = GET_MODE (dst);
20476 nregs = hard_regno_nregs[reg][mode];
20477 if (FP_REGNO_P (reg))
20478 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20479 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20480 else if (ALTIVEC_REGNO_P (reg))
20481 reg_mode = V16QImode;
20482 else if (TARGET_E500_DOUBLE && mode == TFmode)
20483 reg_mode = DFmode;
20484 else
20485 reg_mode = word_mode;
20486 reg_mode_size = GET_MODE_SIZE (reg_mode);
20488 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20490 /* TDmode residing in FP registers is special, since the ISA requires that
20491 the lower-numbered word of a register pair is always the most significant
20492 word, even in little-endian mode. This does not match the usual subreg
20493 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20494 the appropriate constituent registers "by hand" in little-endian mode.
20496 Note we do not need to check for destructive overlap here since TDmode
20497 can only reside in even/odd register pairs. */
20498 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20500 rtx p_src, p_dst;
20501 int i;
20503 for (i = 0; i < nregs; i++)
20505 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20506 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20507 else
20508 p_src = simplify_gen_subreg (reg_mode, src, mode,
20509 i * reg_mode_size);
20511 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20512 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20513 else
20514 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20515 i * reg_mode_size);
20517 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20520 return;
20523 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20525 /* Move register range backwards, if we might have destructive
20526 overlap. */
20527 int i;
20528 for (i = nregs - 1; i >= 0; i--)
20529 emit_insn (gen_rtx_SET (VOIDmode,
20530 simplify_gen_subreg (reg_mode, dst, mode,
20531 i * reg_mode_size),
20532 simplify_gen_subreg (reg_mode, src, mode,
20533 i * reg_mode_size)));
20535 else
20537 int i;
20538 int j = -1;
20539 bool used_update = false;
20540 rtx restore_basereg = NULL_RTX;
20542 if (MEM_P (src) && INT_REGNO_P (reg))
20544 rtx breg;
20546 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20547 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20549 rtx delta_rtx;
20550 breg = XEXP (XEXP (src, 0), 0);
20551 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20552 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20553 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20554 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20555 src = replace_equiv_address (src, breg);
20557 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20559 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20561 rtx basereg = XEXP (XEXP (src, 0), 0);
20562 if (TARGET_UPDATE)
20564 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20565 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20566 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20567 used_update = true;
20569 else
20570 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20571 XEXP (XEXP (src, 0), 1)));
20572 src = replace_equiv_address (src, basereg);
20574 else
20576 rtx basereg = gen_rtx_REG (Pmode, reg);
20577 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20578 src = replace_equiv_address (src, basereg);
20582 breg = XEXP (src, 0);
20583 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20584 breg = XEXP (breg, 0);
20586 /* If the base register we are using to address memory is
20587 also a destination reg, then change that register last. */
20588 if (REG_P (breg)
20589 && REGNO (breg) >= REGNO (dst)
20590 && REGNO (breg) < REGNO (dst) + nregs)
20591 j = REGNO (breg) - REGNO (dst);
20593 else if (MEM_P (dst) && INT_REGNO_P (reg))
20595 rtx breg;
20597 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20598 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20600 rtx delta_rtx;
20601 breg = XEXP (XEXP (dst, 0), 0);
20602 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20603 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20604 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20606 /* We have to update the breg before doing the store.
20607 Use store with update, if available. */
20609 if (TARGET_UPDATE)
20611 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20612 emit_insn (TARGET_32BIT
20613 ? (TARGET_POWERPC64
20614 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20615 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20616 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20617 used_update = true;
20619 else
20620 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20621 dst = replace_equiv_address (dst, breg);
20623 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20624 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20626 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20628 rtx basereg = XEXP (XEXP (dst, 0), 0);
20629 if (TARGET_UPDATE)
20631 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20632 emit_insn (gen_rtx_SET (VOIDmode,
20633 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
20634 used_update = true;
20636 else
20637 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20638 XEXP (XEXP (dst, 0), 1)));
20639 dst = replace_equiv_address (dst, basereg);
20641 else
20643 rtx basereg = XEXP (XEXP (dst, 0), 0);
20644 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
20645 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
20646 && REG_P (basereg)
20647 && REG_P (offsetreg)
20648 && REGNO (basereg) != REGNO (offsetreg));
20649 if (REGNO (basereg) == 0)
20651 rtx tmp = offsetreg;
20652 offsetreg = basereg;
20653 basereg = tmp;
20655 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
20656 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
20657 dst = replace_equiv_address (dst, basereg);
20660 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
20661 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
20664 for (i = 0; i < nregs; i++)
20666 /* Calculate index to next subword. */
20667 ++j;
20668 if (j == nregs)
20669 j = 0;
20671 /* If compiler already emitted move of first word by
20672 store with update, no need to do anything. */
20673 if (j == 0 && used_update)
20674 continue;
20676 emit_insn (gen_rtx_SET (VOIDmode,
20677 simplify_gen_subreg (reg_mode, dst, mode,
20678 j * reg_mode_size),
20679 simplify_gen_subreg (reg_mode, src, mode,
20680 j * reg_mode_size)));
20682 if (restore_basereg != NULL_RTX)
20683 emit_insn (restore_basereg);
20688 /* This page contains routines that are used to determine what the
20689 function prologue and epilogue code will do and write them out. */
20691 static inline bool
20692 save_reg_p (int r)
20694 return !call_used_regs[r] && df_regs_ever_live_p (r);
20697 /* Return the first fixed-point register that is required to be
20698 saved. 32 if none. */
20701 first_reg_to_save (void)
20703 int first_reg;
20705 /* Find lowest numbered live register. */
20706 for (first_reg = 13; first_reg <= 31; first_reg++)
20707 if (save_reg_p (first_reg))
20708 break;
20710 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
20711 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
20712 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
20713 || (TARGET_TOC && TARGET_MINIMAL_TOC))
20714 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20715 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
20717 #if TARGET_MACHO
20718 if (flag_pic
20719 && crtl->uses_pic_offset_table
20720 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
20721 return RS6000_PIC_OFFSET_TABLE_REGNUM;
20722 #endif
20724 return first_reg;
20727 /* Similar, for FP regs. */
20730 first_fp_reg_to_save (void)
20732 int first_reg;
20734 /* Find lowest numbered live register. */
20735 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
20736 if (save_reg_p (first_reg))
20737 break;
20739 return first_reg;
20742 /* Similar, for AltiVec regs. */
20744 static int
20745 first_altivec_reg_to_save (void)
20747 int i;
20749 /* Stack frame remains as is unless we are in AltiVec ABI. */
20750 if (! TARGET_ALTIVEC_ABI)
20751 return LAST_ALTIVEC_REGNO + 1;
20753 /* On Darwin, the unwind routines are compiled without
20754 TARGET_ALTIVEC, and use save_world to save/restore the
20755 altivec registers when necessary. */
20756 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
20757 && ! TARGET_ALTIVEC)
20758 return FIRST_ALTIVEC_REGNO + 20;
20760 /* Find lowest numbered live register. */
20761 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
20762 if (save_reg_p (i))
20763 break;
20765 return i;
20768 /* Return a 32-bit mask of the AltiVec registers we need to set in
20769 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
20770 the 32-bit word is 0. */
20772 static unsigned int
20773 compute_vrsave_mask (void)
20775 unsigned int i, mask = 0;
20777 /* On Darwin, the unwind routines are compiled without
20778 TARGET_ALTIVEC, and use save_world to save/restore the
20779 call-saved altivec registers when necessary. */
20780 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
20781 && ! TARGET_ALTIVEC)
20782 mask |= 0xFFF;
20784 /* First, find out if we use _any_ altivec registers. */
20785 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
20786 if (df_regs_ever_live_p (i))
20787 mask |= ALTIVEC_REG_BIT (i);
20789 if (mask == 0)
20790 return mask;
20792 /* Next, remove the argument registers from the set. These must
20793 be in the VRSAVE mask set by the caller, so we don't need to add
20794 them in again. More importantly, the mask we compute here is
20795 used to generate CLOBBERs in the set_vrsave insn, and we do not
20796 wish the argument registers to die. */
20797 for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
20798 mask &= ~ALTIVEC_REG_BIT (i);
20800 /* Similarly, remove the return value from the set. */
20802 bool yes = false;
20803 diddle_return_value (is_altivec_return_reg, &yes);
20804 if (yes)
20805 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
20808 return mask;
20811 /* For a very restricted set of circumstances, we can cut down the
20812 size of prologues/epilogues by calling our own save/restore-the-world
20813 routines. */
20815 static void
20816 compute_save_world_info (rs6000_stack_t *info_ptr)
20818 info_ptr->world_save_p = 1;
20819 info_ptr->world_save_p
20820 = (WORLD_SAVE_P (info_ptr)
20821 && DEFAULT_ABI == ABI_DARWIN
20822 && !cfun->has_nonlocal_label
20823 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
20824 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
20825 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
20826 && info_ptr->cr_save_p);
20828 /* This will not work in conjunction with sibcalls. Make sure there
20829 are none. (This check is expensive, but seldom executed.) */
20830 if (WORLD_SAVE_P (info_ptr))
20832 rtx_insn *insn;
20833 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
20834 if (CALL_P (insn) && SIBLING_CALL_P (insn))
20836 info_ptr->world_save_p = 0;
20837 break;
20841 if (WORLD_SAVE_P (info_ptr))
20843 /* Even if we're not touching VRsave, make sure there's room on the
20844 stack for it, if it looks like we're calling SAVE_WORLD, which
20845 will attempt to save it. */
20846 info_ptr->vrsave_size = 4;
20848 /* If we are going to save the world, we need to save the link register too. */
20849 info_ptr->lr_save_p = 1;
20851 /* "Save" the VRsave register too if we're saving the world. */
20852 if (info_ptr->vrsave_mask == 0)
20853 info_ptr->vrsave_mask = compute_vrsave_mask ();
20855 /* Because the Darwin register save/restore routines only handle
20856 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
20857 check. */
20858 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
20859 && (info_ptr->first_altivec_reg_save
20860 >= FIRST_SAVED_ALTIVEC_REGNO));
20862 return;
20866 static void
20867 is_altivec_return_reg (rtx reg, void *xyes)
20869 bool *yes = (bool *) xyes;
20870 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
20871 *yes = true;
20875 /* Look for user-defined global regs in the range FIRST to LAST-1.
20876 We should not restore these, and so cannot use lmw or out-of-line
20877 restore functions if there are any. We also can't save them
20878 (well, emit frame notes for them), because frame unwinding during
20879 exception handling will restore saved registers. */
20881 static bool
20882 global_regs_p (unsigned first, unsigned last)
20884 while (first < last)
20885 if (global_regs[first++])
20886 return true;
20887 return false;
20890 /* Determine the strategy for savings/restoring registers. */
20892 enum {
20893 SAVRES_MULTIPLE = 0x1,
20894 SAVE_INLINE_FPRS = 0x2,
20895 SAVE_INLINE_GPRS = 0x4,
20896 REST_INLINE_FPRS = 0x8,
20897 REST_INLINE_GPRS = 0x10,
20898 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
20899 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
20900 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
20901 SAVE_INLINE_VRS = 0x100,
20902 REST_INLINE_VRS = 0x200
20905 static int
20906 rs6000_savres_strategy (rs6000_stack_t *info,
20907 bool using_static_chain_p)
20909 int strategy = 0;
20910 bool lr_save_p;
20912 if (TARGET_MULTIPLE
20913 && !TARGET_POWERPC64
20914 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
20915 && info->first_gp_reg_save < 31
20916 && !global_regs_p (info->first_gp_reg_save, 32))
20917 strategy |= SAVRES_MULTIPLE;
20919 if (crtl->calls_eh_return
20920 || cfun->machine->ra_need_lr)
20921 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
20922 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
20923 | SAVE_INLINE_VRS | REST_INLINE_VRS);
20925 if (info->first_fp_reg_save == 64
20926 /* The out-of-line FP routines use double-precision stores;
20927 we can't use those routines if we don't have such stores. */
20928 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
20929 || global_regs_p (info->first_fp_reg_save, 64))
20930 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20932 if (info->first_gp_reg_save == 32
20933 || (!(strategy & SAVRES_MULTIPLE)
20934 && global_regs_p (info->first_gp_reg_save, 32)))
20935 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20937 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
20938 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
20939 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20941 /* Define cutoff for using out-of-line functions to save registers. */
20942 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
20944 if (!optimize_size)
20946 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20947 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20948 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20950 else
20952 /* Prefer out-of-line restore if it will exit. */
20953 if (info->first_fp_reg_save > 61)
20954 strategy |= SAVE_INLINE_FPRS;
20955 if (info->first_gp_reg_save > 29)
20957 if (info->first_fp_reg_save == 64)
20958 strategy |= SAVE_INLINE_GPRS;
20959 else
20960 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20962 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
20963 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20966 else if (DEFAULT_ABI == ABI_DARWIN)
20968 if (info->first_fp_reg_save > 60)
20969 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20970 if (info->first_gp_reg_save > 29)
20971 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20972 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20974 else
20976 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
20977 if (info->first_fp_reg_save > 61)
20978 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
20979 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
20980 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
20983 /* Don't bother to try to save things out-of-line if r11 is occupied
20984 by the static chain. It would require too much fiddling and the
20985 static chain is rarely used anyway. FPRs are saved w.r.t the stack
20986 pointer on Darwin, and AIX uses r1 or r12. */
20987 if (using_static_chain_p
20988 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
20989 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
20990 | SAVE_INLINE_GPRS
20991 | SAVE_INLINE_VRS | REST_INLINE_VRS);
20993 /* We can only use the out-of-line routines to restore if we've
20994 saved all the registers from first_fp_reg_save in the prologue.
20995 Otherwise, we risk loading garbage. */
20996 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
20998 int i;
21000 for (i = info->first_fp_reg_save; i < 64; i++)
21001 if (!save_reg_p (i))
21003 strategy |= REST_INLINE_FPRS;
21004 break;
21008 /* If we are going to use store multiple, then don't even bother
21009 with the out-of-line routines, since the store-multiple
21010 instruction will always be smaller. */
21011 if ((strategy & SAVRES_MULTIPLE))
21012 strategy |= SAVE_INLINE_GPRS;
21014 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21015 saved is an out-of-line save or restore. Set up the value for
21016 the next test (excluding out-of-line gpr restore). */
21017 lr_save_p = (info->lr_save_p
21018 || !(strategy & SAVE_INLINE_GPRS)
21019 || !(strategy & SAVE_INLINE_FPRS)
21020 || !(strategy & SAVE_INLINE_VRS)
21021 || !(strategy & REST_INLINE_FPRS)
21022 || !(strategy & REST_INLINE_VRS));
21024 /* The situation is more complicated with load multiple. We'd
21025 prefer to use the out-of-line routines for restores, since the
21026 "exit" out-of-line routines can handle the restore of LR and the
21027 frame teardown. However if doesn't make sense to use the
21028 out-of-line routine if that is the only reason we'd need to save
21029 LR, and we can't use the "exit" out-of-line gpr restore if we
21030 have saved some fprs; In those cases it is advantageous to use
21031 load multiple when available. */
21032 if ((strategy & SAVRES_MULTIPLE)
21033 && (!lr_save_p
21034 || info->first_fp_reg_save != 64))
21035 strategy |= REST_INLINE_GPRS;
21037 /* Saving CR interferes with the exit routines used on the SPE, so
21038 just punt here. */
21039 if (TARGET_SPE_ABI
21040 && info->spe_64bit_regs_used
21041 && info->cr_save_p)
21042 strategy |= REST_INLINE_GPRS;
21044 /* We can only use load multiple or the out-of-line routines to
21045 restore if we've used store multiple or out-of-line routines
21046 in the prologue, i.e. if we've saved all the registers from
21047 first_gp_reg_save. Otherwise, we risk loading garbage. */
21048 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21049 == SAVE_INLINE_GPRS)
21051 int i;
21053 for (i = info->first_gp_reg_save; i < 32; i++)
21054 if (!save_reg_p (i))
21056 strategy |= REST_INLINE_GPRS;
21057 break;
21061 if (TARGET_ELF && TARGET_64BIT)
21063 if (!(strategy & SAVE_INLINE_FPRS))
21064 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21065 else if (!(strategy & SAVE_INLINE_GPRS)
21066 && info->first_fp_reg_save == 64)
21067 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21069 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21070 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21072 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21073 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21075 return strategy;
21078 /* Calculate the stack information for the current function. This is
21079 complicated by having two separate calling sequences, the AIX calling
21080 sequence and the V.4 calling sequence.
21082 AIX (and Darwin/Mac OS X) stack frames look like:
21083 32-bit 64-bit
21084 SP----> +---------------------------------------+
21085 | back chain to caller | 0 0
21086 +---------------------------------------+
21087 | saved CR | 4 8 (8-11)
21088 +---------------------------------------+
21089 | saved LR | 8 16
21090 +---------------------------------------+
21091 | reserved for compilers | 12 24
21092 +---------------------------------------+
21093 | reserved for binders | 16 32
21094 +---------------------------------------+
21095 | saved TOC pointer | 20 40
21096 +---------------------------------------+
21097 | Parameter save area (P) | 24 48
21098 +---------------------------------------+
21099 | Alloca space (A) | 24+P etc.
21100 +---------------------------------------+
21101 | Local variable space (L) | 24+P+A
21102 +---------------------------------------+
21103 | Float/int conversion temporary (X) | 24+P+A+L
21104 +---------------------------------------+
21105 | Save area for AltiVec registers (W) | 24+P+A+L+X
21106 +---------------------------------------+
21107 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21108 +---------------------------------------+
21109 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21110 +---------------------------------------+
21111 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21112 +---------------------------------------+
21113 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21114 +---------------------------------------+
21115 old SP->| back chain to caller's caller |
21116 +---------------------------------------+
21118 The required alignment for AIX configurations is two words (i.e., 8
21119 or 16 bytes).
21121 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21123 SP----> +---------------------------------------+
21124 | Back chain to caller | 0
21125 +---------------------------------------+
21126 | Save area for CR | 8
21127 +---------------------------------------+
21128 | Saved LR | 16
21129 +---------------------------------------+
21130 | Saved TOC pointer | 24
21131 +---------------------------------------+
21132 | Parameter save area (P) | 32
21133 +---------------------------------------+
21134 | Alloca space (A) | 32+P
21135 +---------------------------------------+
21136 | Local variable space (L) | 32+P+A
21137 +---------------------------------------+
21138 | Save area for AltiVec registers (W) | 32+P+A+L
21139 +---------------------------------------+
21140 | AltiVec alignment padding (Y) | 32+P+A+L+W
21141 +---------------------------------------+
21142 | Save area for GP registers (G) | 32+P+A+L+W+Y
21143 +---------------------------------------+
21144 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21145 +---------------------------------------+
21146 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21147 +---------------------------------------+
21150 V.4 stack frames look like:
21152 SP----> +---------------------------------------+
21153 | back chain to caller | 0
21154 +---------------------------------------+
21155 | caller's saved LR | 4
21156 +---------------------------------------+
21157 | Parameter save area (P) | 8
21158 +---------------------------------------+
21159 | Alloca space (A) | 8+P
21160 +---------------------------------------+
21161 | Varargs save area (V) | 8+P+A
21162 +---------------------------------------+
21163 | Local variable space (L) | 8+P+A+V
21164 +---------------------------------------+
21165 | Float/int conversion temporary (X) | 8+P+A+V+L
21166 +---------------------------------------+
21167 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21168 +---------------------------------------+
21169 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21170 +---------------------------------------+
21171 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21172 +---------------------------------------+
21173 | SPE: area for 64-bit GP registers |
21174 +---------------------------------------+
21175 | SPE alignment padding |
21176 +---------------------------------------+
21177 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21178 +---------------------------------------+
21179 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21180 +---------------------------------------+
21181 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21182 +---------------------------------------+
21183 old SP->| back chain to caller's caller |
21184 +---------------------------------------+
21186 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21187 given. (But note below and in sysv4.h that we require only 8 and
21188 may round up the size of our stack frame anyways. The historical
21189 reason is early versions of powerpc-linux which didn't properly
21190 align the stack at program startup. A happy side-effect is that
21191 -mno-eabi libraries can be used with -meabi programs.)
21193 The EABI configuration defaults to the V.4 layout. However,
21194 the stack alignment requirements may differ. If -mno-eabi is not
21195 given, the required stack alignment is 8 bytes; if -mno-eabi is
21196 given, the required alignment is 16 bytes. (But see V.4 comment
21197 above.) */
21199 #ifndef ABI_STACK_BOUNDARY
21200 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21201 #endif
21203 static rs6000_stack_t *
21204 rs6000_stack_info (void)
21206 rs6000_stack_t *info_ptr = &stack_info;
21207 int reg_size = TARGET_32BIT ? 4 : 8;
21208 int ehrd_size;
21209 int ehcr_size;
21210 int save_align;
21211 int first_gp;
21212 HOST_WIDE_INT non_fixed_size;
21213 bool using_static_chain_p;
21215 if (reload_completed && info_ptr->reload_completed)
21216 return info_ptr;
21218 memset (info_ptr, 0, sizeof (*info_ptr));
21219 info_ptr->reload_completed = reload_completed;
21221 if (TARGET_SPE)
21223 /* Cache value so we don't rescan instruction chain over and over. */
21224 if (cfun->machine->insn_chain_scanned_p == 0)
21225 cfun->machine->insn_chain_scanned_p
21226 = spe_func_has_64bit_regs_p () + 1;
21227 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21230 /* Select which calling sequence. */
21231 info_ptr->abi = DEFAULT_ABI;
21233 /* Calculate which registers need to be saved & save area size. */
21234 info_ptr->first_gp_reg_save = first_reg_to_save ();
21235 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21236 even if it currently looks like we won't. Reload may need it to
21237 get at a constant; if so, it will have already created a constant
21238 pool entry for it. */
21239 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21240 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21241 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21242 && crtl->uses_const_pool
21243 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21244 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21245 else
21246 first_gp = info_ptr->first_gp_reg_save;
21248 info_ptr->gp_size = reg_size * (32 - first_gp);
21250 /* For the SPE, we have an additional upper 32-bits on each GPR.
21251 Ideally we should save the entire 64-bits only when the upper
21252 half is used in SIMD instructions. Since we only record
21253 registers live (not the size they are used in), this proves
21254 difficult because we'd have to traverse the instruction chain at
21255 the right time, taking reload into account. This is a real pain,
21256 so we opt to save the GPRs in 64-bits always if but one register
21257 gets used in 64-bits. Otherwise, all the registers in the frame
21258 get saved in 32-bits.
21260 So... since when we save all GPRs (except the SP) in 64-bits, the
21261 traditional GP save area will be empty. */
21262 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21263 info_ptr->gp_size = 0;
21265 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21266 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21268 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21269 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21270 - info_ptr->first_altivec_reg_save);
21272 /* Does this function call anything? */
21273 info_ptr->calls_p = (! crtl->is_leaf
21274 || cfun->machine->ra_needs_full_frame);
21276 /* Determine if we need to save the condition code registers. */
21277 if (df_regs_ever_live_p (CR2_REGNO)
21278 || df_regs_ever_live_p (CR3_REGNO)
21279 || df_regs_ever_live_p (CR4_REGNO))
21281 info_ptr->cr_save_p = 1;
21282 if (DEFAULT_ABI == ABI_V4)
21283 info_ptr->cr_size = reg_size;
21286 /* If the current function calls __builtin_eh_return, then we need
21287 to allocate stack space for registers that will hold data for
21288 the exception handler. */
21289 if (crtl->calls_eh_return)
21291 unsigned int i;
21292 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21293 continue;
21295 /* SPE saves EH registers in 64-bits. */
21296 ehrd_size = i * (TARGET_SPE_ABI
21297 && info_ptr->spe_64bit_regs_used != 0
21298 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21300 else
21301 ehrd_size = 0;
21303 /* In the ELFv2 ABI, we also need to allocate space for separate
21304 CR field save areas if the function calls __builtin_eh_return. */
21305 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21307 /* This hard-codes that we have three call-saved CR fields. */
21308 ehcr_size = 3 * reg_size;
21309 /* We do *not* use the regular CR save mechanism. */
21310 info_ptr->cr_save_p = 0;
21312 else
21313 ehcr_size = 0;
21315 /* Determine various sizes. */
21316 info_ptr->reg_size = reg_size;
21317 info_ptr->fixed_size = RS6000_SAVE_AREA;
21318 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21319 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21320 TARGET_ALTIVEC ? 16 : 8);
21321 if (FRAME_GROWS_DOWNWARD)
21322 info_ptr->vars_size
21323 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21324 + info_ptr->parm_size,
21325 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21326 - (info_ptr->fixed_size + info_ptr->vars_size
21327 + info_ptr->parm_size);
21329 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21330 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21331 else
21332 info_ptr->spe_gp_size = 0;
21334 if (TARGET_ALTIVEC_ABI)
21335 info_ptr->vrsave_mask = compute_vrsave_mask ();
21336 else
21337 info_ptr->vrsave_mask = 0;
21339 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21340 info_ptr->vrsave_size = 4;
21341 else
21342 info_ptr->vrsave_size = 0;
21344 compute_save_world_info (info_ptr);
21346 /* Calculate the offsets. */
21347 switch (DEFAULT_ABI)
21349 case ABI_NONE:
21350 default:
21351 gcc_unreachable ();
21353 case ABI_AIX:
21354 case ABI_ELFv2:
21355 case ABI_DARWIN:
21356 info_ptr->fp_save_offset = - info_ptr->fp_size;
21357 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21359 if (TARGET_ALTIVEC_ABI)
21361 info_ptr->vrsave_save_offset
21362 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21364 /* Align stack so vector save area is on a quadword boundary.
21365 The padding goes above the vectors. */
21366 if (info_ptr->altivec_size != 0)
21367 info_ptr->altivec_padding_size
21368 = info_ptr->vrsave_save_offset & 0xF;
21369 else
21370 info_ptr->altivec_padding_size = 0;
21372 info_ptr->altivec_save_offset
21373 = info_ptr->vrsave_save_offset
21374 - info_ptr->altivec_padding_size
21375 - info_ptr->altivec_size;
21376 gcc_assert (info_ptr->altivec_size == 0
21377 || info_ptr->altivec_save_offset % 16 == 0);
21379 /* Adjust for AltiVec case. */
21380 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21382 else
21383 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21385 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21386 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21387 info_ptr->lr_save_offset = 2*reg_size;
21388 break;
21390 case ABI_V4:
21391 info_ptr->fp_save_offset = - info_ptr->fp_size;
21392 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21393 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21395 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21397 /* Align stack so SPE GPR save area is aligned on a
21398 double-word boundary. */
21399 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21400 info_ptr->spe_padding_size
21401 = 8 - (-info_ptr->cr_save_offset % 8);
21402 else
21403 info_ptr->spe_padding_size = 0;
21405 info_ptr->spe_gp_save_offset
21406 = info_ptr->cr_save_offset
21407 - info_ptr->spe_padding_size
21408 - info_ptr->spe_gp_size;
21410 /* Adjust for SPE case. */
21411 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21413 else if (TARGET_ALTIVEC_ABI)
21415 info_ptr->vrsave_save_offset
21416 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21418 /* Align stack so vector save area is on a quadword boundary. */
21419 if (info_ptr->altivec_size != 0)
21420 info_ptr->altivec_padding_size
21421 = 16 - (-info_ptr->vrsave_save_offset % 16);
21422 else
21423 info_ptr->altivec_padding_size = 0;
21425 info_ptr->altivec_save_offset
21426 = info_ptr->vrsave_save_offset
21427 - info_ptr->altivec_padding_size
21428 - info_ptr->altivec_size;
21430 /* Adjust for AltiVec case. */
21431 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21433 else
21434 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21435 info_ptr->ehrd_offset -= ehrd_size;
21436 info_ptr->lr_save_offset = reg_size;
21437 break;
21440 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21441 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21442 + info_ptr->gp_size
21443 + info_ptr->altivec_size
21444 + info_ptr->altivec_padding_size
21445 + info_ptr->spe_gp_size
21446 + info_ptr->spe_padding_size
21447 + ehrd_size
21448 + ehcr_size
21449 + info_ptr->cr_size
21450 + info_ptr->vrsave_size,
21451 save_align);
21453 non_fixed_size = (info_ptr->vars_size
21454 + info_ptr->parm_size
21455 + info_ptr->save_size);
21457 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21458 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21460 /* Determine if we need to save the link register. */
21461 if (info_ptr->calls_p
21462 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21463 && crtl->profile
21464 && !TARGET_PROFILE_KERNEL)
21465 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21466 #ifdef TARGET_RELOCATABLE
21467 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21468 #endif
21469 || rs6000_ra_ever_killed ())
21470 info_ptr->lr_save_p = 1;
21472 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21473 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21474 && call_used_regs[STATIC_CHAIN_REGNUM]);
21475 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21476 using_static_chain_p);
21478 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21479 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21480 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21481 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21482 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21483 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21484 info_ptr->lr_save_p = 1;
21486 if (info_ptr->lr_save_p)
21487 df_set_regs_ever_live (LR_REGNO, true);
21489 /* Determine if we need to allocate any stack frame:
21491 For AIX we need to push the stack if a frame pointer is needed
21492 (because the stack might be dynamically adjusted), if we are
21493 debugging, if we make calls, or if the sum of fp_save, gp_save,
21494 and local variables are more than the space needed to save all
21495 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21496 + 18*8 = 288 (GPR13 reserved).
21498 For V.4 we don't have the stack cushion that AIX uses, but assume
21499 that the debugger can handle stackless frames. */
21501 if (info_ptr->calls_p)
21502 info_ptr->push_p = 1;
21504 else if (DEFAULT_ABI == ABI_V4)
21505 info_ptr->push_p = non_fixed_size != 0;
21507 else if (frame_pointer_needed)
21508 info_ptr->push_p = 1;
21510 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21511 info_ptr->push_p = 1;
21513 else
21514 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21516 /* Zero offsets if we're not saving those registers. */
21517 if (info_ptr->fp_size == 0)
21518 info_ptr->fp_save_offset = 0;
21520 if (info_ptr->gp_size == 0)
21521 info_ptr->gp_save_offset = 0;
21523 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21524 info_ptr->altivec_save_offset = 0;
21526 /* Zero VRSAVE offset if not saved and restored. */
21527 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21528 info_ptr->vrsave_save_offset = 0;
21530 if (! TARGET_SPE_ABI
21531 || info_ptr->spe_64bit_regs_used == 0
21532 || info_ptr->spe_gp_size == 0)
21533 info_ptr->spe_gp_save_offset = 0;
21535 if (! info_ptr->lr_save_p)
21536 info_ptr->lr_save_offset = 0;
21538 if (! info_ptr->cr_save_p)
21539 info_ptr->cr_save_offset = 0;
21541 return info_ptr;
21544 /* Return true if the current function uses any GPRs in 64-bit SIMD
21545 mode. */
21547 static bool
21548 spe_func_has_64bit_regs_p (void)
21550 rtx_insn *insns, *insn;
21552 /* Functions that save and restore all the call-saved registers will
21553 need to save/restore the registers in 64-bits. */
21554 if (crtl->calls_eh_return
21555 || cfun->calls_setjmp
21556 || crtl->has_nonlocal_goto)
21557 return true;
21559 insns = get_insns ();
21561 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21563 if (INSN_P (insn))
21565 rtx i;
21567 /* FIXME: This should be implemented with attributes...
21569 (set_attr "spe64" "true")....then,
21570 if (get_spe64(insn)) return true;
21572 It's the only reliable way to do the stuff below. */
21574 i = PATTERN (insn);
21575 if (GET_CODE (i) == SET)
21577 enum machine_mode mode = GET_MODE (SET_SRC (i));
21579 if (SPE_VECTOR_MODE (mode))
21580 return true;
21581 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21582 return true;
21587 return false;
21590 static void
21591 debug_stack_info (rs6000_stack_t *info)
21593 const char *abi_string;
21595 if (! info)
21596 info = rs6000_stack_info ();
21598 fprintf (stderr, "\nStack information for function %s:\n",
21599 ((current_function_decl && DECL_NAME (current_function_decl))
21600 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21601 : "<unknown>"));
21603 switch (info->abi)
21605 default: abi_string = "Unknown"; break;
21606 case ABI_NONE: abi_string = "NONE"; break;
21607 case ABI_AIX: abi_string = "AIX"; break;
21608 case ABI_ELFv2: abi_string = "ELFv2"; break;
21609 case ABI_DARWIN: abi_string = "Darwin"; break;
21610 case ABI_V4: abi_string = "V.4"; break;
21613 fprintf (stderr, "\tABI = %5s\n", abi_string);
21615 if (TARGET_ALTIVEC_ABI)
21616 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21618 if (TARGET_SPE_ABI)
21619 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21621 if (info->first_gp_reg_save != 32)
21622 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21624 if (info->first_fp_reg_save != 64)
21625 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
21627 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
21628 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
21629 info->first_altivec_reg_save);
21631 if (info->lr_save_p)
21632 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
21634 if (info->cr_save_p)
21635 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
21637 if (info->vrsave_mask)
21638 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
21640 if (info->push_p)
21641 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
21643 if (info->calls_p)
21644 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
21646 if (info->gp_save_offset)
21647 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
21649 if (info->fp_save_offset)
21650 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
21652 if (info->altivec_save_offset)
21653 fprintf (stderr, "\taltivec_save_offset = %5d\n",
21654 info->altivec_save_offset);
21656 if (info->spe_gp_save_offset)
21657 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
21658 info->spe_gp_save_offset);
21660 if (info->vrsave_save_offset)
21661 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
21662 info->vrsave_save_offset);
21664 if (info->lr_save_offset)
21665 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
21667 if (info->cr_save_offset)
21668 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
21670 if (info->varargs_save_offset)
21671 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
21673 if (info->total_size)
21674 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21675 info->total_size);
21677 if (info->vars_size)
21678 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21679 info->vars_size);
21681 if (info->parm_size)
21682 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
21684 if (info->fixed_size)
21685 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
21687 if (info->gp_size)
21688 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
21690 if (info->spe_gp_size)
21691 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
21693 if (info->fp_size)
21694 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
21696 if (info->altivec_size)
21697 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
21699 if (info->vrsave_size)
21700 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
21702 if (info->altivec_padding_size)
21703 fprintf (stderr, "\taltivec_padding_size= %5d\n",
21704 info->altivec_padding_size);
21706 if (info->spe_padding_size)
21707 fprintf (stderr, "\tspe_padding_size = %5d\n",
21708 info->spe_padding_size);
21710 if (info->cr_size)
21711 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
21713 if (info->save_size)
21714 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
21716 if (info->reg_size != 4)
21717 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
21719 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
21721 fprintf (stderr, "\n");
21725 rs6000_return_addr (int count, rtx frame)
21727 /* Currently we don't optimize very well between prolog and body
21728 code and for PIC code the code can be actually quite bad, so
21729 don't try to be too clever here. */
21730 if (count != 0
21731 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
21733 cfun->machine->ra_needs_full_frame = 1;
21735 return
21736 gen_rtx_MEM
21737 (Pmode,
21738 memory_address
21739 (Pmode,
21740 plus_constant (Pmode,
21741 copy_to_reg
21742 (gen_rtx_MEM (Pmode,
21743 memory_address (Pmode, frame))),
21744 RETURN_ADDRESS_OFFSET)));
21747 cfun->machine->ra_need_lr = 1;
21748 return get_hard_reg_initial_val (Pmode, LR_REGNO);
21751 /* Say whether a function is a candidate for sibcall handling or not. */
21753 static bool
21754 rs6000_function_ok_for_sibcall (tree decl, tree exp)
21756 tree fntype;
21758 if (decl)
21759 fntype = TREE_TYPE (decl);
21760 else
21761 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
21763 /* We can't do it if the called function has more vector parameters
21764 than the current function; there's nowhere to put the VRsave code. */
21765 if (TARGET_ALTIVEC_ABI
21766 && TARGET_ALTIVEC_VRSAVE
21767 && !(decl && decl == current_function_decl))
21769 function_args_iterator args_iter;
21770 tree type;
21771 int nvreg = 0;
21773 /* Functions with vector parameters are required to have a
21774 prototype, so the argument type info must be available
21775 here. */
21776 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
21777 if (TREE_CODE (type) == VECTOR_TYPE
21778 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
21779 nvreg++;
21781 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
21782 if (TREE_CODE (type) == VECTOR_TYPE
21783 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
21784 nvreg--;
21786 if (nvreg > 0)
21787 return false;
21790 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
21791 functions, because the callee may have a different TOC pointer to
21792 the caller and there's no way to ensure we restore the TOC when
21793 we return. With the secure-plt SYSV ABI we can't make non-local
21794 calls when -fpic/PIC because the plt call stubs use r30. */
21795 if (DEFAULT_ABI == ABI_DARWIN
21796 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21797 && decl
21798 && !DECL_EXTERNAL (decl)
21799 && (*targetm.binds_local_p) (decl))
21800 || (DEFAULT_ABI == ABI_V4
21801 && (!TARGET_SECURE_PLT
21802 || !flag_pic
21803 || (decl
21804 && (*targetm.binds_local_p) (decl)))))
21806 tree attr_list = TYPE_ATTRIBUTES (fntype);
21808 if (!lookup_attribute ("longcall", attr_list)
21809 || lookup_attribute ("shortcall", attr_list))
21810 return true;
21813 return false;
21816 static int
21817 rs6000_ra_ever_killed (void)
21819 rtx_insn *top;
21820 rtx reg;
21821 rtx_insn *insn;
21823 if (cfun->is_thunk)
21824 return 0;
21826 if (cfun->machine->lr_save_state)
21827 return cfun->machine->lr_save_state - 1;
21829 /* regs_ever_live has LR marked as used if any sibcalls are present,
21830 but this should not force saving and restoring in the
21831 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
21832 clobbers LR, so that is inappropriate. */
21834 /* Also, the prologue can generate a store into LR that
21835 doesn't really count, like this:
21837 move LR->R0
21838 bcl to set PIC register
21839 move LR->R31
21840 move R0->LR
21842 When we're called from the epilogue, we need to avoid counting
21843 this as a store. */
21845 push_topmost_sequence ();
21846 top = get_insns ();
21847 pop_topmost_sequence ();
21848 reg = gen_rtx_REG (Pmode, LR_REGNO);
21850 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
21852 if (INSN_P (insn))
21854 if (CALL_P (insn))
21856 if (!SIBLING_CALL_P (insn))
21857 return 1;
21859 else if (find_regno_note (insn, REG_INC, LR_REGNO))
21860 return 1;
21861 else if (set_of (reg, insn) != NULL_RTX
21862 && !prologue_epilogue_contains (insn))
21863 return 1;
21866 return 0;
21869 /* Emit instructions needed to load the TOC register.
21870 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
21871 a constant pool; or for SVR4 -fpic. */
21873 void
21874 rs6000_emit_load_toc_table (int fromprolog)
21876 rtx dest;
21877 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
21879 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
21881 char buf[30];
21882 rtx lab, tmp1, tmp2, got;
21884 lab = gen_label_rtx ();
21885 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
21886 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21887 if (flag_pic == 2)
21888 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
21889 else
21890 got = rs6000_got_sym ();
21891 tmp1 = tmp2 = dest;
21892 if (!fromprolog)
21894 tmp1 = gen_reg_rtx (Pmode);
21895 tmp2 = gen_reg_rtx (Pmode);
21897 emit_insn (gen_load_toc_v4_PIC_1 (lab));
21898 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
21899 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
21900 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
21902 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
21904 emit_insn (gen_load_toc_v4_pic_si ());
21905 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
21907 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
21909 char buf[30];
21910 rtx temp0 = (fromprolog
21911 ? gen_rtx_REG (Pmode, 0)
21912 : gen_reg_rtx (Pmode));
21914 if (fromprolog)
21916 rtx symF, symL;
21918 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
21919 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21921 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
21922 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21924 emit_insn (gen_load_toc_v4_PIC_1 (symF));
21925 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
21926 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
21928 else
21930 rtx tocsym, lab;
21932 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
21933 lab = gen_label_rtx ();
21934 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
21935 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
21936 if (TARGET_LINK_STACK)
21937 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
21938 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
21940 emit_insn (gen_addsi3 (dest, temp0, dest));
21942 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
21944 /* This is for AIX code running in non-PIC ELF32. */
21945 char buf[30];
21946 rtx realsym;
21947 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
21948 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
21950 emit_insn (gen_elf_high (dest, realsym));
21951 emit_insn (gen_elf_low (dest, dest, realsym));
21953 else
21955 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21957 if (TARGET_32BIT)
21958 emit_insn (gen_load_toc_aix_si (dest));
21959 else
21960 emit_insn (gen_load_toc_aix_di (dest));
21964 /* Emit instructions to restore the link register after determining where
21965 its value has been stored. */
21967 void
21968 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
21970 rs6000_stack_t *info = rs6000_stack_info ();
21971 rtx operands[2];
21973 operands[0] = source;
21974 operands[1] = scratch;
21976 if (info->lr_save_p)
21978 rtx frame_rtx = stack_pointer_rtx;
21979 HOST_WIDE_INT sp_offset = 0;
21980 rtx tmp;
21982 if (frame_pointer_needed
21983 || cfun->calls_alloca
21984 || info->total_size > 32767)
21986 tmp = gen_frame_mem (Pmode, frame_rtx);
21987 emit_move_insn (operands[1], tmp);
21988 frame_rtx = operands[1];
21990 else if (info->push_p)
21991 sp_offset = info->total_size;
21993 tmp = plus_constant (Pmode, frame_rtx,
21994 info->lr_save_offset + sp_offset);
21995 tmp = gen_frame_mem (Pmode, tmp);
21996 emit_move_insn (tmp, operands[0]);
21998 else
21999 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22001 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22002 state of lr_save_p so any change from here on would be a bug. In
22003 particular, stop rs6000_ra_ever_killed from considering the SET
22004 of lr we may have added just above. */
22005 cfun->machine->lr_save_state = info->lr_save_p + 1;
22008 static GTY(()) alias_set_type set = -1;
22010 alias_set_type
22011 get_TOC_alias_set (void)
22013 if (set == -1)
22014 set = new_alias_set ();
22015 return set;
22018 /* This returns nonzero if the current function uses the TOC. This is
22019 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22020 is generated by the ABI_V4 load_toc_* patterns. */
22021 #if TARGET_ELF
22022 static int
22023 uses_TOC (void)
22025 rtx_insn *insn;
22027 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22028 if (INSN_P (insn))
22030 rtx pat = PATTERN (insn);
22031 int i;
22033 if (GET_CODE (pat) == PARALLEL)
22034 for (i = 0; i < XVECLEN (pat, 0); i++)
22036 rtx sub = XVECEXP (pat, 0, i);
22037 if (GET_CODE (sub) == USE)
22039 sub = XEXP (sub, 0);
22040 if (GET_CODE (sub) == UNSPEC
22041 && XINT (sub, 1) == UNSPEC_TOC)
22042 return 1;
22046 return 0;
22048 #endif
22051 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22053 rtx tocrel, tocreg, hi;
22055 if (TARGET_DEBUG_ADDR)
22057 if (GET_CODE (symbol) == SYMBOL_REF)
22058 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22059 XSTR (symbol, 0));
22060 else
22062 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22063 GET_RTX_NAME (GET_CODE (symbol)));
22064 debug_rtx (symbol);
22068 if (!can_create_pseudo_p ())
22069 df_set_regs_ever_live (TOC_REGISTER, true);
22071 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22072 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22073 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22074 return tocrel;
22076 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22077 if (largetoc_reg != NULL)
22079 emit_move_insn (largetoc_reg, hi);
22080 hi = largetoc_reg;
22082 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22085 /* Issue assembly directives that create a reference to the given DWARF
22086 FRAME_TABLE_LABEL from the current function section. */
22087 void
22088 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22090 fprintf (asm_out_file, "\t.ref %s\n",
22091 (* targetm.strip_name_encoding) (frame_table_label));
22094 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22095 and the change to the stack pointer. */
22097 static void
22098 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22100 rtvec p;
22101 int i;
22102 rtx regs[3];
22104 i = 0;
22105 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22106 if (hard_frame_needed)
22107 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22108 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22109 || (hard_frame_needed
22110 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22111 regs[i++] = fp;
22113 p = rtvec_alloc (i);
22114 while (--i >= 0)
22116 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22117 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22120 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22123 /* Emit the correct code for allocating stack space, as insns.
22124 If COPY_REG, make sure a copy of the old frame is left there.
22125 The generated code may use hard register 0 as a temporary. */
22127 static void
22128 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22130 rtx_insn *insn;
22131 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22132 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22133 rtx todec = gen_int_mode (-size, Pmode);
22134 rtx par, set, mem;
22136 if (INTVAL (todec) != -size)
22138 warning (0, "stack frame too large");
22139 emit_insn (gen_trap ());
22140 return;
22143 if (crtl->limit_stack)
22145 if (REG_P (stack_limit_rtx)
22146 && REGNO (stack_limit_rtx) > 1
22147 && REGNO (stack_limit_rtx) <= 31)
22149 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22150 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22151 const0_rtx));
22153 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22154 && TARGET_32BIT
22155 && DEFAULT_ABI == ABI_V4)
22157 rtx toload = gen_rtx_CONST (VOIDmode,
22158 gen_rtx_PLUS (Pmode,
22159 stack_limit_rtx,
22160 GEN_INT (size)));
22162 emit_insn (gen_elf_high (tmp_reg, toload));
22163 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22164 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22165 const0_rtx));
22167 else
22168 warning (0, "stack limit expression is not supported");
22171 if (copy_reg)
22173 if (copy_off != 0)
22174 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22175 else
22176 emit_move_insn (copy_reg, stack_reg);
22179 if (size > 32767)
22181 /* Need a note here so that try_split doesn't get confused. */
22182 if (get_last_insn () == NULL_RTX)
22183 emit_note (NOTE_INSN_DELETED);
22184 insn = emit_move_insn (tmp_reg, todec);
22185 try_split (PATTERN (insn), insn, 0);
22186 todec = tmp_reg;
22189 insn = emit_insn (TARGET_32BIT
22190 ? gen_movsi_update_stack (stack_reg, stack_reg,
22191 todec, stack_reg)
22192 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22193 todec, stack_reg));
22194 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22195 it now and set the alias set/attributes. The above gen_*_update
22196 calls will generate a PARALLEL with the MEM set being the first
22197 operation. */
22198 par = PATTERN (insn);
22199 gcc_assert (GET_CODE (par) == PARALLEL);
22200 set = XVECEXP (par, 0, 0);
22201 gcc_assert (GET_CODE (set) == SET);
22202 mem = SET_DEST (set);
22203 gcc_assert (MEM_P (mem));
22204 MEM_NOTRAP_P (mem) = 1;
22205 set_mem_alias_set (mem, get_frame_alias_set ());
22207 RTX_FRAME_RELATED_P (insn) = 1;
22208 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22209 gen_rtx_SET (VOIDmode, stack_reg,
22210 gen_rtx_PLUS (Pmode, stack_reg,
22211 GEN_INT (-size))));
22214 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22216 #if PROBE_INTERVAL > 32768
22217 #error Cannot use indexed addressing mode for stack probing
22218 #endif
22220 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22221 inclusive. These are offsets from the current stack pointer. */
22223 static void
22224 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22226 /* See if we have a constant small number of probes to generate. If so,
22227 that's the easy case. */
22228 if (first + size <= 32768)
22230 HOST_WIDE_INT i;
22232 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22233 it exceeds SIZE. If only one probe is needed, this will not
22234 generate any code. Then probe at FIRST + SIZE. */
22235 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22236 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22237 -(first + i)));
22239 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22240 -(first + size)));
22243 /* Otherwise, do the same as above, but in a loop. Note that we must be
22244 extra careful with variables wrapping around because we might be at
22245 the very top (or the very bottom) of the address space and we have
22246 to be able to handle this case properly; in particular, we use an
22247 equality test for the loop condition. */
22248 else
22250 HOST_WIDE_INT rounded_size;
22251 rtx r12 = gen_rtx_REG (Pmode, 12);
22252 rtx r0 = gen_rtx_REG (Pmode, 0);
22254 /* Sanity check for the addressing mode we're going to use. */
22255 gcc_assert (first <= 32768);
22257 /* Step 1: round SIZE to the previous multiple of the interval. */
22259 rounded_size = size & -PROBE_INTERVAL;
22262 /* Step 2: compute initial and final value of the loop counter. */
22264 /* TEST_ADDR = SP + FIRST. */
22265 emit_insn (gen_rtx_SET (VOIDmode, r12,
22266 plus_constant (Pmode, stack_pointer_rtx,
22267 -first)));
22269 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22270 if (rounded_size > 32768)
22272 emit_move_insn (r0, GEN_INT (-rounded_size));
22273 emit_insn (gen_rtx_SET (VOIDmode, r0,
22274 gen_rtx_PLUS (Pmode, r12, r0)));
22276 else
22277 emit_insn (gen_rtx_SET (VOIDmode, r0,
22278 plus_constant (Pmode, r12, -rounded_size)));
22281 /* Step 3: the loop
22283 while (TEST_ADDR != LAST_ADDR)
22285 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22286 probe at TEST_ADDR
22289 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22290 until it is equal to ROUNDED_SIZE. */
22292 if (TARGET_64BIT)
22293 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22294 else
22295 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22298 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22299 that SIZE is equal to ROUNDED_SIZE. */
22301 if (size != rounded_size)
22302 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22306 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22307 absolute addresses. */
22309 const char *
22310 output_probe_stack_range (rtx reg1, rtx reg2)
22312 static int labelno = 0;
22313 char loop_lab[32], end_lab[32];
22314 rtx xops[2];
22316 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22317 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22319 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22321 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22322 xops[0] = reg1;
22323 xops[1] = reg2;
22324 if (TARGET_64BIT)
22325 output_asm_insn ("cmpd 0,%0,%1", xops);
22326 else
22327 output_asm_insn ("cmpw 0,%0,%1", xops);
22329 fputs ("\tbeq 0,", asm_out_file);
22330 assemble_name_raw (asm_out_file, end_lab);
22331 fputc ('\n', asm_out_file);
22333 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22334 xops[1] = GEN_INT (-PROBE_INTERVAL);
22335 output_asm_insn ("addi %0,%0,%1", xops);
22337 /* Probe at TEST_ADDR and branch. */
22338 xops[1] = gen_rtx_REG (Pmode, 0);
22339 output_asm_insn ("stw %1,0(%0)", xops);
22340 fprintf (asm_out_file, "\tb ");
22341 assemble_name_raw (asm_out_file, loop_lab);
22342 fputc ('\n', asm_out_file);
22344 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22346 return "";
22349 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22350 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22351 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22352 deduce these equivalences by itself so it wasn't necessary to hold
22353 its hand so much. Don't be tempted to always supply d2_f_d_e with
22354 the actual cfa register, ie. r31 when we are using a hard frame
22355 pointer. That fails when saving regs off r1, and sched moves the
22356 r31 setup past the reg saves. */
22358 static rtx
22359 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22360 rtx reg2, rtx rreg, rtx split_reg)
22362 rtx real, temp;
22364 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22366 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22367 int i;
22369 gcc_checking_assert (val == 0);
22370 real = PATTERN (insn);
22371 if (GET_CODE (real) == PARALLEL)
22372 for (i = 0; i < XVECLEN (real, 0); i++)
22373 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22375 rtx set = XVECEXP (real, 0, i);
22377 RTX_FRAME_RELATED_P (set) = 1;
22379 RTX_FRAME_RELATED_P (insn) = 1;
22380 return insn;
22383 /* copy_rtx will not make unique copies of registers, so we need to
22384 ensure we don't have unwanted sharing here. */
22385 if (reg == reg2)
22386 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22388 if (reg == rreg)
22389 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22391 real = copy_rtx (PATTERN (insn));
22393 if (reg2 != NULL_RTX)
22394 real = replace_rtx (real, reg2, rreg);
22396 if (REGNO (reg) == STACK_POINTER_REGNUM)
22397 gcc_checking_assert (val == 0);
22398 else
22399 real = replace_rtx (real, reg,
22400 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22401 STACK_POINTER_REGNUM),
22402 GEN_INT (val)));
22404 /* We expect that 'real' is either a SET or a PARALLEL containing
22405 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22406 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22408 if (GET_CODE (real) == SET)
22410 rtx set = real;
22412 temp = simplify_rtx (SET_SRC (set));
22413 if (temp)
22414 SET_SRC (set) = temp;
22415 temp = simplify_rtx (SET_DEST (set));
22416 if (temp)
22417 SET_DEST (set) = temp;
22418 if (GET_CODE (SET_DEST (set)) == MEM)
22420 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22421 if (temp)
22422 XEXP (SET_DEST (set), 0) = temp;
22425 else
22427 int i;
22429 gcc_assert (GET_CODE (real) == PARALLEL);
22430 for (i = 0; i < XVECLEN (real, 0); i++)
22431 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22433 rtx set = XVECEXP (real, 0, i);
22435 temp = simplify_rtx (SET_SRC (set));
22436 if (temp)
22437 SET_SRC (set) = temp;
22438 temp = simplify_rtx (SET_DEST (set));
22439 if (temp)
22440 SET_DEST (set) = temp;
22441 if (GET_CODE (SET_DEST (set)) == MEM)
22443 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22444 if (temp)
22445 XEXP (SET_DEST (set), 0) = temp;
22447 RTX_FRAME_RELATED_P (set) = 1;
22451 /* If a store insn has been split into multiple insns, the
22452 true source register is given by split_reg. */
22453 if (split_reg != NULL_RTX)
22454 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22456 RTX_FRAME_RELATED_P (insn) = 1;
22457 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22459 return insn;
22462 /* Returns an insn that has a vrsave set operation with the
22463 appropriate CLOBBERs. */
22465 static rtx
22466 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22468 int nclobs, i;
22469 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22470 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22472 clobs[0]
22473 = gen_rtx_SET (VOIDmode,
22474 vrsave,
22475 gen_rtx_UNSPEC_VOLATILE (SImode,
22476 gen_rtvec (2, reg, vrsave),
22477 UNSPECV_SET_VRSAVE));
22479 nclobs = 1;
22481 /* We need to clobber the registers in the mask so the scheduler
22482 does not move sets to VRSAVE before sets of AltiVec registers.
22484 However, if the function receives nonlocal gotos, reload will set
22485 all call saved registers live. We will end up with:
22487 (set (reg 999) (mem))
22488 (parallel [ (set (reg vrsave) (unspec blah))
22489 (clobber (reg 999))])
22491 The clobber will cause the store into reg 999 to be dead, and
22492 flow will attempt to delete an epilogue insn. In this case, we
22493 need an unspec use/set of the register. */
22495 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22496 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22498 if (!epiloguep || call_used_regs [i])
22499 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22500 gen_rtx_REG (V4SImode, i));
22501 else
22503 rtx reg = gen_rtx_REG (V4SImode, i);
22505 clobs[nclobs++]
22506 = gen_rtx_SET (VOIDmode,
22507 reg,
22508 gen_rtx_UNSPEC (V4SImode,
22509 gen_rtvec (1, reg), 27));
22513 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22515 for (i = 0; i < nclobs; ++i)
22516 XVECEXP (insn, 0, i) = clobs[i];
22518 return insn;
22521 static rtx
22522 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22524 rtx addr, mem;
22526 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22527 mem = gen_frame_mem (GET_MODE (reg), addr);
22528 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22531 static rtx
22532 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22534 return gen_frame_set (reg, frame_reg, offset, false);
22537 static rtx
22538 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22540 return gen_frame_set (reg, frame_reg, offset, true);
22543 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22544 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22546 static rtx
22547 emit_frame_save (rtx frame_reg, enum machine_mode mode,
22548 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22550 rtx reg, insn;
22552 /* Some cases that need register indexed addressing. */
22553 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22554 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22555 || (TARGET_E500_DOUBLE && mode == DFmode)
22556 || (TARGET_SPE_ABI
22557 && SPE_VECTOR_MODE (mode)
22558 && !SPE_CONST_OFFSET_OK (offset))));
22560 reg = gen_rtx_REG (mode, regno);
22561 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22562 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22563 NULL_RTX, NULL_RTX, NULL_RTX);
22566 /* Emit an offset memory reference suitable for a frame store, while
22567 converting to a valid addressing mode. */
22569 static rtx
22570 gen_frame_mem_offset (enum machine_mode mode, rtx reg, int offset)
22572 rtx int_rtx, offset_rtx;
22574 int_rtx = GEN_INT (offset);
22576 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22577 || (TARGET_E500_DOUBLE && mode == DFmode))
22579 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22580 emit_move_insn (offset_rtx, int_rtx);
22582 else
22583 offset_rtx = int_rtx;
22585 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22588 #ifndef TARGET_FIX_AND_CONTINUE
22589 #define TARGET_FIX_AND_CONTINUE 0
22590 #endif
22592 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22593 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22594 #define LAST_SAVRES_REGISTER 31
22595 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22597 enum {
22598 SAVRES_LR = 0x1,
22599 SAVRES_SAVE = 0x2,
22600 SAVRES_REG = 0x0c,
22601 SAVRES_GPR = 0,
22602 SAVRES_FPR = 4,
22603 SAVRES_VR = 8
22606 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22608 /* Temporary holding space for an out-of-line register save/restore
22609 routine name. */
22610 static char savres_routine_name[30];
22612 /* Return the name for an out-of-line register save/restore routine.
22613 We are saving/restoring GPRs if GPR is true. */
22615 static char *
22616 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22618 const char *prefix = "";
22619 const char *suffix = "";
22621 /* Different targets are supposed to define
22622 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22623 routine name could be defined with:
22625 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
22627 This is a nice idea in practice, but in reality, things are
22628 complicated in several ways:
22630 - ELF targets have save/restore routines for GPRs.
22632 - SPE targets use different prefixes for 32/64-bit registers, and
22633 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
22635 - PPC64 ELF targets have routines for save/restore of GPRs that
22636 differ in what they do with the link register, so having a set
22637 prefix doesn't work. (We only use one of the save routines at
22638 the moment, though.)
22640 - PPC32 elf targets have "exit" versions of the restore routines
22641 that restore the link register and can save some extra space.
22642 These require an extra suffix. (There are also "tail" versions
22643 of the restore routines and "GOT" versions of the save routines,
22644 but we don't generate those at present. Same problems apply,
22645 though.)
22647 We deal with all this by synthesizing our own prefix/suffix and
22648 using that for the simple sprintf call shown above. */
22649 if (TARGET_SPE)
22651 /* No floating point saves on the SPE. */
22652 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
22654 if ((sel & SAVRES_SAVE))
22655 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
22656 else
22657 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
22659 if ((sel & SAVRES_LR))
22660 suffix = "_x";
22662 else if (DEFAULT_ABI == ABI_V4)
22664 if (TARGET_64BIT)
22665 goto aix_names;
22667 if ((sel & SAVRES_REG) == SAVRES_GPR)
22668 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
22669 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22670 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
22671 else if ((sel & SAVRES_REG) == SAVRES_VR)
22672 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22673 else
22674 abort ();
22676 if ((sel & SAVRES_LR))
22677 suffix = "_x";
22679 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22681 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
22682 /* No out-of-line save/restore routines for GPRs on AIX. */
22683 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
22684 #endif
22686 aix_names:
22687 if ((sel & SAVRES_REG) == SAVRES_GPR)
22688 prefix = ((sel & SAVRES_SAVE)
22689 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
22690 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
22691 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22693 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
22694 if ((sel & SAVRES_LR))
22695 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
22696 else
22697 #endif
22699 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
22700 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
22703 else if ((sel & SAVRES_REG) == SAVRES_VR)
22704 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22705 else
22706 abort ();
22709 if (DEFAULT_ABI == ABI_DARWIN)
22711 /* The Darwin approach is (slightly) different, in order to be
22712 compatible with code generated by the system toolchain. There is a
22713 single symbol for the start of save sequence, and the code here
22714 embeds an offset into that code on the basis of the first register
22715 to be saved. */
22716 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
22717 if ((sel & SAVRES_REG) == SAVRES_GPR)
22718 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
22719 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
22720 (regno - 13) * 4, prefix, regno);
22721 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22722 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
22723 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
22724 else if ((sel & SAVRES_REG) == SAVRES_VR)
22725 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
22726 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
22727 else
22728 abort ();
22730 else
22731 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
22733 return savres_routine_name;
22736 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
22737 We are saving/restoring GPRs if GPR is true. */
22739 static rtx
22740 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
22742 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
22743 ? info->first_gp_reg_save
22744 : (sel & SAVRES_REG) == SAVRES_FPR
22745 ? info->first_fp_reg_save - 32
22746 : (sel & SAVRES_REG) == SAVRES_VR
22747 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
22748 : -1);
22749 rtx sym;
22750 int select = sel;
22752 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
22753 versions of the gpr routines. */
22754 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
22755 && info->spe_64bit_regs_used)
22756 select ^= SAVRES_FPR ^ SAVRES_GPR;
22758 /* Don't generate bogus routine names. */
22759 gcc_assert (FIRST_SAVRES_REGISTER <= regno
22760 && regno <= LAST_SAVRES_REGISTER
22761 && select >= 0 && select <= 12);
22763 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
22765 if (sym == NULL)
22767 char *name;
22769 name = rs6000_savres_routine_name (info, regno, sel);
22771 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
22772 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
22773 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
22776 return sym;
22779 /* Emit a sequence of insns, including a stack tie if needed, for
22780 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
22781 reset the stack pointer, but move the base of the frame into
22782 reg UPDT_REGNO for use by out-of-line register restore routines. */
22784 static rtx
22785 rs6000_emit_stack_reset (rs6000_stack_t *info,
22786 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
22787 unsigned updt_regno)
22789 rtx updt_reg_rtx;
22791 /* This blockage is needed so that sched doesn't decide to move
22792 the sp change before the register restores. */
22793 if (DEFAULT_ABI == ABI_V4
22794 || (TARGET_SPE_ABI
22795 && info->spe_64bit_regs_used != 0
22796 && info->first_gp_reg_save != 32))
22797 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
22799 /* If we are restoring registers out-of-line, we will be using the
22800 "exit" variants of the restore routines, which will reset the
22801 stack for us. But we do need to point updt_reg into the
22802 right place for those routines. */
22803 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
22805 if (frame_off != 0)
22806 return emit_insn (gen_add3_insn (updt_reg_rtx,
22807 frame_reg_rtx, GEN_INT (frame_off)));
22808 else if (REGNO (frame_reg_rtx) != updt_regno)
22809 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
22811 return NULL_RTX;
22814 /* Return the register number used as a pointer by out-of-line
22815 save/restore functions. */
22817 static inline unsigned
22818 ptr_regno_for_savres (int sel)
22820 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22821 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
22822 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
22825 /* Construct a parallel rtx describing the effect of a call to an
22826 out-of-line register save/restore routine, and emit the insn
22827 or jump_insn as appropriate. */
22829 static rtx
22830 rs6000_emit_savres_rtx (rs6000_stack_t *info,
22831 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
22832 enum machine_mode reg_mode, int sel)
22834 int i;
22835 int offset, start_reg, end_reg, n_regs, use_reg;
22836 int reg_size = GET_MODE_SIZE (reg_mode);
22837 rtx sym;
22838 rtvec p;
22839 rtx par, insn;
22841 offset = 0;
22842 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
22843 ? info->first_gp_reg_save
22844 : (sel & SAVRES_REG) == SAVRES_FPR
22845 ? info->first_fp_reg_save
22846 : (sel & SAVRES_REG) == SAVRES_VR
22847 ? info->first_altivec_reg_save
22848 : -1);
22849 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
22850 ? 32
22851 : (sel & SAVRES_REG) == SAVRES_FPR
22852 ? 64
22853 : (sel & SAVRES_REG) == SAVRES_VR
22854 ? LAST_ALTIVEC_REGNO + 1
22855 : -1);
22856 n_regs = end_reg - start_reg;
22857 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
22858 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
22859 + n_regs);
22861 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
22862 RTVEC_ELT (p, offset++) = ret_rtx;
22864 RTVEC_ELT (p, offset++)
22865 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
22867 sym = rs6000_savres_routine_sym (info, sel);
22868 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
22870 use_reg = ptr_regno_for_savres (sel);
22871 if ((sel & SAVRES_REG) == SAVRES_VR)
22873 /* Vector regs are saved/restored using [reg+reg] addressing. */
22874 RTVEC_ELT (p, offset++)
22875 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
22876 RTVEC_ELT (p, offset++)
22877 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
22879 else
22880 RTVEC_ELT (p, offset++)
22881 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
22883 for (i = 0; i < end_reg - start_reg; i++)
22884 RTVEC_ELT (p, i + offset)
22885 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
22886 frame_reg_rtx, save_area_offset + reg_size * i,
22887 (sel & SAVRES_SAVE) != 0);
22889 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
22890 RTVEC_ELT (p, i + offset)
22891 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
22893 par = gen_rtx_PARALLEL (VOIDmode, p);
22895 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
22897 insn = emit_jump_insn (par);
22898 JUMP_LABEL (insn) = ret_rtx;
22900 else
22901 insn = emit_insn (par);
22902 return insn;
22905 /* Emit code to store CR fields that need to be saved into REG. */
22907 static void
22908 rs6000_emit_move_from_cr (rtx reg)
22910 /* Only the ELFv2 ABI allows storing only selected fields. */
22911 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
22913 int i, cr_reg[8], count = 0;
22915 /* Collect CR fields that must be saved. */
22916 for (i = 0; i < 8; i++)
22917 if (save_reg_p (CR0_REGNO + i))
22918 cr_reg[count++] = i;
22920 /* If it's just a single one, use mfcrf. */
22921 if (count == 1)
22923 rtvec p = rtvec_alloc (1);
22924 rtvec r = rtvec_alloc (2);
22925 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
22926 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
22927 RTVEC_ELT (p, 0)
22928 = gen_rtx_SET (VOIDmode, reg,
22929 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
22931 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
22932 return;
22935 /* ??? It might be better to handle count == 2 / 3 cases here
22936 as well, using logical operations to combine the values. */
22939 emit_insn (gen_movesi_from_cr (reg));
22942 /* Determine whether the gp REG is really used. */
22944 static bool
22945 rs6000_reg_live_or_pic_offset_p (int reg)
22947 /* If the function calls eh_return, claim used all the registers that would
22948 be checked for liveness otherwise. This is required for the PIC offset
22949 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
22950 register allocation purposes in this case. */
22952 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
22953 && (!call_used_regs[reg]
22954 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
22955 && !TARGET_SINGLE_PIC_BASE
22956 && TARGET_TOC && TARGET_MINIMAL_TOC)))
22957 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
22958 && !TARGET_SINGLE_PIC_BASE
22959 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
22960 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
22963 /* Emit function prologue as insns. */
22965 void
22966 rs6000_emit_prologue (void)
22968 rs6000_stack_t *info = rs6000_stack_info ();
22969 enum machine_mode reg_mode = Pmode;
22970 int reg_size = TARGET_32BIT ? 4 : 8;
22971 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22972 rtx frame_reg_rtx = sp_reg_rtx;
22973 unsigned int cr_save_regno;
22974 rtx cr_save_rtx = NULL_RTX;
22975 rtx insn;
22976 int strategy;
22977 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
22978 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
22979 && call_used_regs[STATIC_CHAIN_REGNUM]);
22980 /* Offset to top of frame for frame_reg and sp respectively. */
22981 HOST_WIDE_INT frame_off = 0;
22982 HOST_WIDE_INT sp_off = 0;
22984 #ifdef ENABLE_CHECKING
22985 /* Track and check usage of r0, r11, r12. */
22986 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
22987 #define START_USE(R) do \
22989 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
22990 reg_inuse |= 1 << (R); \
22991 } while (0)
22992 #define END_USE(R) do \
22994 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
22995 reg_inuse &= ~(1 << (R)); \
22996 } while (0)
22997 #define NOT_INUSE(R) do \
22999 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23000 } while (0)
23001 #else
23002 #define START_USE(R) do {} while (0)
23003 #define END_USE(R) do {} while (0)
23004 #define NOT_INUSE(R) do {} while (0)
23005 #endif
23007 if (DEFAULT_ABI == ABI_ELFv2)
23009 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23011 /* With -mminimal-toc we may generate an extra use of r2 below. */
23012 if (!TARGET_SINGLE_PIC_BASE
23013 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23014 cfun->machine->r2_setup_needed = true;
23018 if (flag_stack_usage_info)
23019 current_function_static_stack_size = info->total_size;
23021 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23023 HOST_WIDE_INT size = info->total_size;
23025 if (crtl->is_leaf && !cfun->calls_alloca)
23027 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23028 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23029 size - STACK_CHECK_PROTECT);
23031 else if (size > 0)
23032 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23035 if (TARGET_FIX_AND_CONTINUE)
23037 /* gdb on darwin arranges to forward a function from the old
23038 address by modifying the first 5 instructions of the function
23039 to branch to the overriding function. This is necessary to
23040 permit function pointers that point to the old function to
23041 actually forward to the new function. */
23042 emit_insn (gen_nop ());
23043 emit_insn (gen_nop ());
23044 emit_insn (gen_nop ());
23045 emit_insn (gen_nop ());
23046 emit_insn (gen_nop ());
23049 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23051 reg_mode = V2SImode;
23052 reg_size = 8;
23055 /* Handle world saves specially here. */
23056 if (WORLD_SAVE_P (info))
23058 int i, j, sz;
23059 rtx treg;
23060 rtvec p;
23061 rtx reg0;
23063 /* save_world expects lr in r0. */
23064 reg0 = gen_rtx_REG (Pmode, 0);
23065 if (info->lr_save_p)
23067 insn = emit_move_insn (reg0,
23068 gen_rtx_REG (Pmode, LR_REGNO));
23069 RTX_FRAME_RELATED_P (insn) = 1;
23072 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23073 assumptions about the offsets of various bits of the stack
23074 frame. */
23075 gcc_assert (info->gp_save_offset == -220
23076 && info->fp_save_offset == -144
23077 && info->lr_save_offset == 8
23078 && info->cr_save_offset == 4
23079 && info->push_p
23080 && info->lr_save_p
23081 && (!crtl->calls_eh_return
23082 || info->ehrd_offset == -432)
23083 && info->vrsave_save_offset == -224
23084 && info->altivec_save_offset == -416);
23086 treg = gen_rtx_REG (SImode, 11);
23087 emit_move_insn (treg, GEN_INT (-info->total_size));
23089 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23090 in R11. It also clobbers R12, so beware! */
23092 /* Preserve CR2 for save_world prologues */
23093 sz = 5;
23094 sz += 32 - info->first_gp_reg_save;
23095 sz += 64 - info->first_fp_reg_save;
23096 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23097 p = rtvec_alloc (sz);
23098 j = 0;
23099 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23100 gen_rtx_REG (SImode,
23101 LR_REGNO));
23102 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23103 gen_rtx_SYMBOL_REF (Pmode,
23104 "*save_world"));
23105 /* We do floats first so that the instruction pattern matches
23106 properly. */
23107 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23108 RTVEC_ELT (p, j++)
23109 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23110 ? DFmode : SFmode,
23111 info->first_fp_reg_save + i),
23112 frame_reg_rtx,
23113 info->fp_save_offset + frame_off + 8 * i);
23114 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23115 RTVEC_ELT (p, j++)
23116 = gen_frame_store (gen_rtx_REG (V4SImode,
23117 info->first_altivec_reg_save + i),
23118 frame_reg_rtx,
23119 info->altivec_save_offset + frame_off + 16 * i);
23120 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23121 RTVEC_ELT (p, j++)
23122 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23123 frame_reg_rtx,
23124 info->gp_save_offset + frame_off + reg_size * i);
23126 /* CR register traditionally saved as CR2. */
23127 RTVEC_ELT (p, j++)
23128 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23129 frame_reg_rtx, info->cr_save_offset + frame_off);
23130 /* Explain about use of R0. */
23131 if (info->lr_save_p)
23132 RTVEC_ELT (p, j++)
23133 = gen_frame_store (reg0,
23134 frame_reg_rtx, info->lr_save_offset + frame_off);
23135 /* Explain what happens to the stack pointer. */
23137 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23138 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23141 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23142 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23143 treg, GEN_INT (-info->total_size), NULL_RTX);
23144 sp_off = frame_off = info->total_size;
23147 strategy = info->savres_strategy;
23149 /* For V.4, update stack before we do any saving and set back pointer. */
23150 if (! WORLD_SAVE_P (info)
23151 && info->push_p
23152 && (DEFAULT_ABI == ABI_V4
23153 || crtl->calls_eh_return))
23155 bool need_r11 = (TARGET_SPE
23156 ? (!(strategy & SAVE_INLINE_GPRS)
23157 && info->spe_64bit_regs_used == 0)
23158 : (!(strategy & SAVE_INLINE_FPRS)
23159 || !(strategy & SAVE_INLINE_GPRS)
23160 || !(strategy & SAVE_INLINE_VRS)));
23161 int ptr_regno = -1;
23162 rtx ptr_reg = NULL_RTX;
23163 int ptr_off = 0;
23165 if (info->total_size < 32767)
23166 frame_off = info->total_size;
23167 else if (need_r11)
23168 ptr_regno = 11;
23169 else if (info->cr_save_p
23170 || info->lr_save_p
23171 || info->first_fp_reg_save < 64
23172 || info->first_gp_reg_save < 32
23173 || info->altivec_size != 0
23174 || info->vrsave_mask != 0
23175 || crtl->calls_eh_return)
23176 ptr_regno = 12;
23177 else
23179 /* The prologue won't be saving any regs so there is no need
23180 to set up a frame register to access any frame save area.
23181 We also won't be using frame_off anywhere below, but set
23182 the correct value anyway to protect against future
23183 changes to this function. */
23184 frame_off = info->total_size;
23186 if (ptr_regno != -1)
23188 /* Set up the frame offset to that needed by the first
23189 out-of-line save function. */
23190 START_USE (ptr_regno);
23191 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23192 frame_reg_rtx = ptr_reg;
23193 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23194 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23195 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23196 ptr_off = info->gp_save_offset + info->gp_size;
23197 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23198 ptr_off = info->altivec_save_offset + info->altivec_size;
23199 frame_off = -ptr_off;
23201 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23202 sp_off = info->total_size;
23203 if (frame_reg_rtx != sp_reg_rtx)
23204 rs6000_emit_stack_tie (frame_reg_rtx, false);
23207 /* If we use the link register, get it into r0. */
23208 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23210 rtx addr, reg, mem;
23212 reg = gen_rtx_REG (Pmode, 0);
23213 START_USE (0);
23214 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23215 RTX_FRAME_RELATED_P (insn) = 1;
23217 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23218 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23220 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23221 GEN_INT (info->lr_save_offset + frame_off));
23222 mem = gen_rtx_MEM (Pmode, addr);
23223 /* This should not be of rs6000_sr_alias_set, because of
23224 __builtin_return_address. */
23226 insn = emit_move_insn (mem, reg);
23227 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23228 NULL_RTX, NULL_RTX, NULL_RTX);
23229 END_USE (0);
23233 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23234 r12 will be needed by out-of-line gpr restore. */
23235 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23236 && !(strategy & (SAVE_INLINE_GPRS
23237 | SAVE_NOINLINE_GPRS_SAVES_LR))
23238 ? 11 : 12);
23239 if (!WORLD_SAVE_P (info)
23240 && info->cr_save_p
23241 && REGNO (frame_reg_rtx) != cr_save_regno
23242 && !(using_static_chain_p && cr_save_regno == 11))
23244 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23245 START_USE (cr_save_regno);
23246 rs6000_emit_move_from_cr (cr_save_rtx);
23249 /* Do any required saving of fpr's. If only one or two to save, do
23250 it ourselves. Otherwise, call function. */
23251 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23253 int i;
23254 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23255 if (save_reg_p (info->first_fp_reg_save + i))
23256 emit_frame_save (frame_reg_rtx,
23257 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23258 ? DFmode : SFmode),
23259 info->first_fp_reg_save + i,
23260 info->fp_save_offset + frame_off + 8 * i,
23261 sp_off - frame_off);
23263 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23265 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23266 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23267 unsigned ptr_regno = ptr_regno_for_savres (sel);
23268 rtx ptr_reg = frame_reg_rtx;
23270 if (REGNO (frame_reg_rtx) == ptr_regno)
23271 gcc_checking_assert (frame_off == 0);
23272 else
23274 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23275 NOT_INUSE (ptr_regno);
23276 emit_insn (gen_add3_insn (ptr_reg,
23277 frame_reg_rtx, GEN_INT (frame_off)));
23279 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23280 info->fp_save_offset,
23281 info->lr_save_offset,
23282 DFmode, sel);
23283 rs6000_frame_related (insn, ptr_reg, sp_off,
23284 NULL_RTX, NULL_RTX, NULL_RTX);
23285 if (lr)
23286 END_USE (0);
23289 /* Save GPRs. This is done as a PARALLEL if we are using
23290 the store-multiple instructions. */
23291 if (!WORLD_SAVE_P (info)
23292 && TARGET_SPE_ABI
23293 && info->spe_64bit_regs_used != 0
23294 && info->first_gp_reg_save != 32)
23296 int i;
23297 rtx spe_save_area_ptr;
23298 HOST_WIDE_INT save_off;
23299 int ool_adjust = 0;
23301 /* Determine whether we can address all of the registers that need
23302 to be saved with an offset from frame_reg_rtx that fits in
23303 the small const field for SPE memory instructions. */
23304 int spe_regs_addressable
23305 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23306 + reg_size * (32 - info->first_gp_reg_save - 1))
23307 && (strategy & SAVE_INLINE_GPRS));
23309 if (spe_regs_addressable)
23311 spe_save_area_ptr = frame_reg_rtx;
23312 save_off = frame_off;
23314 else
23316 /* Make r11 point to the start of the SPE save area. We need
23317 to be careful here if r11 is holding the static chain. If
23318 it is, then temporarily save it in r0. */
23319 HOST_WIDE_INT offset;
23321 if (!(strategy & SAVE_INLINE_GPRS))
23322 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23323 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23324 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23325 save_off = frame_off - offset;
23327 if (using_static_chain_p)
23329 rtx r0 = gen_rtx_REG (Pmode, 0);
23331 START_USE (0);
23332 gcc_assert (info->first_gp_reg_save > 11);
23334 emit_move_insn (r0, spe_save_area_ptr);
23336 else if (REGNO (frame_reg_rtx) != 11)
23337 START_USE (11);
23339 emit_insn (gen_addsi3 (spe_save_area_ptr,
23340 frame_reg_rtx, GEN_INT (offset)));
23341 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23342 frame_off = -info->spe_gp_save_offset + ool_adjust;
23345 if ((strategy & SAVE_INLINE_GPRS))
23347 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23348 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23349 emit_frame_save (spe_save_area_ptr, reg_mode,
23350 info->first_gp_reg_save + i,
23351 (info->spe_gp_save_offset + save_off
23352 + reg_size * i),
23353 sp_off - save_off);
23355 else
23357 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23358 info->spe_gp_save_offset + save_off,
23359 0, reg_mode,
23360 SAVRES_SAVE | SAVRES_GPR);
23362 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23363 NULL_RTX, NULL_RTX, NULL_RTX);
23366 /* Move the static chain pointer back. */
23367 if (!spe_regs_addressable)
23369 if (using_static_chain_p)
23371 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23372 END_USE (0);
23374 else if (REGNO (frame_reg_rtx) != 11)
23375 END_USE (11);
23378 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23380 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23381 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23382 unsigned ptr_regno = ptr_regno_for_savres (sel);
23383 rtx ptr_reg = frame_reg_rtx;
23384 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23385 int end_save = info->gp_save_offset + info->gp_size;
23386 int ptr_off;
23388 if (!ptr_set_up)
23389 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23391 /* Need to adjust r11 (r12) if we saved any FPRs. */
23392 if (end_save + frame_off != 0)
23394 rtx offset = GEN_INT (end_save + frame_off);
23396 if (ptr_set_up)
23397 frame_off = -end_save;
23398 else
23399 NOT_INUSE (ptr_regno);
23400 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23402 else if (!ptr_set_up)
23404 NOT_INUSE (ptr_regno);
23405 emit_move_insn (ptr_reg, frame_reg_rtx);
23407 ptr_off = -end_save;
23408 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23409 info->gp_save_offset + ptr_off,
23410 info->lr_save_offset + ptr_off,
23411 reg_mode, sel);
23412 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23413 NULL_RTX, NULL_RTX, NULL_RTX);
23414 if (lr)
23415 END_USE (0);
23417 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23419 rtvec p;
23420 int i;
23421 p = rtvec_alloc (32 - info->first_gp_reg_save);
23422 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23423 RTVEC_ELT (p, i)
23424 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23425 frame_reg_rtx,
23426 info->gp_save_offset + frame_off + reg_size * i);
23427 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23428 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23429 NULL_RTX, NULL_RTX, NULL_RTX);
23431 else if (!WORLD_SAVE_P (info))
23433 int i;
23434 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23435 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23436 emit_frame_save (frame_reg_rtx, reg_mode,
23437 info->first_gp_reg_save + i,
23438 info->gp_save_offset + frame_off + reg_size * i,
23439 sp_off - frame_off);
23442 if (crtl->calls_eh_return)
23444 unsigned int i;
23445 rtvec p;
23447 for (i = 0; ; ++i)
23449 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23450 if (regno == INVALID_REGNUM)
23451 break;
23454 p = rtvec_alloc (i);
23456 for (i = 0; ; ++i)
23458 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23459 if (regno == INVALID_REGNUM)
23460 break;
23462 insn
23463 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23464 sp_reg_rtx,
23465 info->ehrd_offset + sp_off + reg_size * (int) i);
23466 RTVEC_ELT (p, i) = insn;
23467 RTX_FRAME_RELATED_P (insn) = 1;
23470 insn = emit_insn (gen_blockage ());
23471 RTX_FRAME_RELATED_P (insn) = 1;
23472 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23475 /* In AIX ABI we need to make sure r2 is really saved. */
23476 if (TARGET_AIX && crtl->calls_eh_return)
23478 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23479 rtx save_insn, join_insn, note;
23480 long toc_restore_insn;
23482 tmp_reg = gen_rtx_REG (Pmode, 11);
23483 tmp_reg_si = gen_rtx_REG (SImode, 11);
23484 if (using_static_chain_p)
23486 START_USE (0);
23487 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23489 else
23490 START_USE (11);
23491 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23492 /* Peek at instruction to which this function returns. If it's
23493 restoring r2, then we know we've already saved r2. We can't
23494 unconditionally save r2 because the value we have will already
23495 be updated if we arrived at this function via a plt call or
23496 toc adjusting stub. */
23497 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23498 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23499 + RS6000_TOC_SAVE_SLOT);
23500 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23501 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23502 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23503 validate_condition_mode (EQ, CCUNSmode);
23504 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23505 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23506 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23507 toc_save_done = gen_label_rtx ();
23508 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23509 gen_rtx_EQ (VOIDmode, compare_result,
23510 const0_rtx),
23511 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23512 pc_rtx);
23513 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23514 JUMP_LABEL (jump) = toc_save_done;
23515 LABEL_NUSES (toc_save_done) += 1;
23517 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23518 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23519 sp_off - frame_off);
23521 emit_label (toc_save_done);
23523 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23524 have a CFG that has different saves along different paths.
23525 Move the note to a dummy blockage insn, which describes that
23526 R2 is unconditionally saved after the label. */
23527 /* ??? An alternate representation might be a special insn pattern
23528 containing both the branch and the store. That might let the
23529 code that minimizes the number of DW_CFA_advance opcodes better
23530 freedom in placing the annotations. */
23531 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23532 if (note)
23533 remove_note (save_insn, note);
23534 else
23535 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23536 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23537 RTX_FRAME_RELATED_P (save_insn) = 0;
23539 join_insn = emit_insn (gen_blockage ());
23540 REG_NOTES (join_insn) = note;
23541 RTX_FRAME_RELATED_P (join_insn) = 1;
23543 if (using_static_chain_p)
23545 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23546 END_USE (0);
23548 else
23549 END_USE (11);
23552 /* Save CR if we use any that must be preserved. */
23553 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23555 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23556 GEN_INT (info->cr_save_offset + frame_off));
23557 rtx mem = gen_frame_mem (SImode, addr);
23559 /* If we didn't copy cr before, do so now using r0. */
23560 if (cr_save_rtx == NULL_RTX)
23562 START_USE (0);
23563 cr_save_rtx = gen_rtx_REG (SImode, 0);
23564 rs6000_emit_move_from_cr (cr_save_rtx);
23567 /* Saving CR requires a two-instruction sequence: one instruction
23568 to move the CR to a general-purpose register, and a second
23569 instruction that stores the GPR to memory.
23571 We do not emit any DWARF CFI records for the first of these,
23572 because we cannot properly represent the fact that CR is saved in
23573 a register. One reason is that we cannot express that multiple
23574 CR fields are saved; another reason is that on 64-bit, the size
23575 of the CR register in DWARF (4 bytes) differs from the size of
23576 a general-purpose register.
23578 This means if any intervening instruction were to clobber one of
23579 the call-saved CR fields, we'd have incorrect CFI. To prevent
23580 this from happening, we mark the store to memory as a use of
23581 those CR fields, which prevents any such instruction from being
23582 scheduled in between the two instructions. */
23583 rtx crsave_v[9];
23584 int n_crsave = 0;
23585 int i;
23587 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23588 for (i = 0; i < 8; i++)
23589 if (save_reg_p (CR0_REGNO + i))
23590 crsave_v[n_crsave++]
23591 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23593 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23594 gen_rtvec_v (n_crsave, crsave_v)));
23595 END_USE (REGNO (cr_save_rtx));
23597 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23598 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23599 so we need to construct a frame expression manually. */
23600 RTX_FRAME_RELATED_P (insn) = 1;
23602 /* Update address to be stack-pointer relative, like
23603 rs6000_frame_related would do. */
23604 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23605 GEN_INT (info->cr_save_offset + sp_off));
23606 mem = gen_frame_mem (SImode, addr);
23608 if (DEFAULT_ABI == ABI_ELFv2)
23610 /* In the ELFv2 ABI we generate separate CFI records for each
23611 CR field that was actually saved. They all point to the
23612 same 32-bit stack slot. */
23613 rtx crframe[8];
23614 int n_crframe = 0;
23616 for (i = 0; i < 8; i++)
23617 if (save_reg_p (CR0_REGNO + i))
23619 crframe[n_crframe]
23620 = gen_rtx_SET (VOIDmode, mem,
23621 gen_rtx_REG (SImode, CR0_REGNO + i));
23623 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23624 n_crframe++;
23627 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23628 gen_rtx_PARALLEL (VOIDmode,
23629 gen_rtvec_v (n_crframe, crframe)));
23631 else
23633 /* In other ABIs, by convention, we use a single CR regnum to
23634 represent the fact that all call-saved CR fields are saved.
23635 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
23636 rtx set = gen_rtx_SET (VOIDmode, mem,
23637 gen_rtx_REG (SImode, CR2_REGNO));
23638 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
23642 /* In the ELFv2 ABI we need to save all call-saved CR fields into
23643 *separate* slots if the routine calls __builtin_eh_return, so
23644 that they can be independently restored by the unwinder. */
23645 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23647 int i, cr_off = info->ehcr_offset;
23648 rtx crsave;
23650 /* ??? We might get better performance by using multiple mfocrf
23651 instructions. */
23652 crsave = gen_rtx_REG (SImode, 0);
23653 emit_insn (gen_movesi_from_cr (crsave));
23655 for (i = 0; i < 8; i++)
23656 if (!call_used_regs[CR0_REGNO + i])
23658 rtvec p = rtvec_alloc (2);
23659 RTVEC_ELT (p, 0)
23660 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
23661 RTVEC_ELT (p, 1)
23662 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23664 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23666 RTX_FRAME_RELATED_P (insn) = 1;
23667 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23668 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
23669 sp_reg_rtx, cr_off + sp_off));
23671 cr_off += reg_size;
23675 /* Update stack and set back pointer unless this is V.4,
23676 for which it was done previously. */
23677 if (!WORLD_SAVE_P (info) && info->push_p
23678 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
23680 rtx ptr_reg = NULL;
23681 int ptr_off = 0;
23683 /* If saving altivec regs we need to be able to address all save
23684 locations using a 16-bit offset. */
23685 if ((strategy & SAVE_INLINE_VRS) == 0
23686 || (info->altivec_size != 0
23687 && (info->altivec_save_offset + info->altivec_size - 16
23688 + info->total_size - frame_off) > 32767)
23689 || (info->vrsave_size != 0
23690 && (info->vrsave_save_offset
23691 + info->total_size - frame_off) > 32767))
23693 int sel = SAVRES_SAVE | SAVRES_VR;
23694 unsigned ptr_regno = ptr_regno_for_savres (sel);
23696 if (using_static_chain_p
23697 && ptr_regno == STATIC_CHAIN_REGNUM)
23698 ptr_regno = 12;
23699 if (REGNO (frame_reg_rtx) != ptr_regno)
23700 START_USE (ptr_regno);
23701 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23702 frame_reg_rtx = ptr_reg;
23703 ptr_off = info->altivec_save_offset + info->altivec_size;
23704 frame_off = -ptr_off;
23706 else if (REGNO (frame_reg_rtx) == 1)
23707 frame_off = info->total_size;
23708 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23709 sp_off = info->total_size;
23710 if (frame_reg_rtx != sp_reg_rtx)
23711 rs6000_emit_stack_tie (frame_reg_rtx, false);
23714 /* Set frame pointer, if needed. */
23715 if (frame_pointer_needed)
23717 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
23718 sp_reg_rtx);
23719 RTX_FRAME_RELATED_P (insn) = 1;
23722 /* Save AltiVec registers if needed. Save here because the red zone does
23723 not always include AltiVec registers. */
23724 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
23725 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
23727 int end_save = info->altivec_save_offset + info->altivec_size;
23728 int ptr_off;
23729 /* Oddly, the vector save/restore functions point r0 at the end
23730 of the save area, then use r11 or r12 to load offsets for
23731 [reg+reg] addressing. */
23732 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
23733 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
23734 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
23736 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
23737 NOT_INUSE (0);
23738 if (end_save + frame_off != 0)
23740 rtx offset = GEN_INT (end_save + frame_off);
23742 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23744 else
23745 emit_move_insn (ptr_reg, frame_reg_rtx);
23747 ptr_off = -end_save;
23748 insn = rs6000_emit_savres_rtx (info, scratch_reg,
23749 info->altivec_save_offset + ptr_off,
23750 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
23751 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
23752 NULL_RTX, NULL_RTX, NULL_RTX);
23753 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
23755 /* The oddity mentioned above clobbered our frame reg. */
23756 emit_move_insn (frame_reg_rtx, ptr_reg);
23757 frame_off = ptr_off;
23760 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
23761 && info->altivec_size != 0)
23763 int i;
23765 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
23766 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
23768 rtx areg, savereg, mem, split_reg;
23769 int offset;
23771 offset = (info->altivec_save_offset + frame_off
23772 + 16 * (i - info->first_altivec_reg_save));
23774 savereg = gen_rtx_REG (V4SImode, i);
23776 NOT_INUSE (0);
23777 areg = gen_rtx_REG (Pmode, 0);
23778 emit_move_insn (areg, GEN_INT (offset));
23780 /* AltiVec addressing mode is [reg+reg]. */
23781 mem = gen_frame_mem (V4SImode,
23782 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
23784 insn = emit_move_insn (mem, savereg);
23786 /* When we split a VSX store into two insns, we need to make
23787 sure the DWARF info knows which register we are storing.
23788 Pass it in to be used on the appropriate note. */
23789 if (!BYTES_BIG_ENDIAN
23790 && GET_CODE (PATTERN (insn)) == SET
23791 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
23792 split_reg = savereg;
23793 else
23794 split_reg = NULL_RTX;
23796 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23797 areg, GEN_INT (offset), split_reg);
23801 /* VRSAVE is a bit vector representing which AltiVec registers
23802 are used. The OS uses this to determine which vector
23803 registers to save on a context switch. We need to save
23804 VRSAVE on the stack frame, add whatever AltiVec registers we
23805 used in this function, and do the corresponding magic in the
23806 epilogue. */
23808 if (!WORLD_SAVE_P (info)
23809 && TARGET_ALTIVEC
23810 && TARGET_ALTIVEC_VRSAVE
23811 && info->vrsave_mask != 0)
23813 rtx reg, vrsave;
23814 int offset;
23815 int save_regno;
23817 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
23818 be using r12 as frame_reg_rtx and r11 as the static chain
23819 pointer for nested functions. */
23820 save_regno = 12;
23821 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23822 && !using_static_chain_p)
23823 save_regno = 11;
23824 else if (REGNO (frame_reg_rtx) == 12)
23826 save_regno = 11;
23827 if (using_static_chain_p)
23828 save_regno = 0;
23831 NOT_INUSE (save_regno);
23832 reg = gen_rtx_REG (SImode, save_regno);
23833 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
23834 if (TARGET_MACHO)
23835 emit_insn (gen_get_vrsave_internal (reg));
23836 else
23837 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
23839 /* Save VRSAVE. */
23840 offset = info->vrsave_save_offset + frame_off;
23841 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
23843 /* Include the registers in the mask. */
23844 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
23846 insn = emit_insn (generate_set_vrsave (reg, info, 0));
23849 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
23850 if (!TARGET_SINGLE_PIC_BASE
23851 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23852 || (DEFAULT_ABI == ABI_V4
23853 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
23854 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
23856 /* If emit_load_toc_table will use the link register, we need to save
23857 it. We use R12 for this purpose because emit_load_toc_table
23858 can use register 0. This allows us to use a plain 'blr' to return
23859 from the procedure more often. */
23860 int save_LR_around_toc_setup = (TARGET_ELF
23861 && DEFAULT_ABI == ABI_V4
23862 && flag_pic
23863 && ! info->lr_save_p
23864 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
23865 if (save_LR_around_toc_setup)
23867 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
23868 rtx tmp = gen_rtx_REG (Pmode, 12);
23870 insn = emit_move_insn (tmp, lr);
23871 RTX_FRAME_RELATED_P (insn) = 1;
23873 rs6000_emit_load_toc_table (TRUE);
23875 insn = emit_move_insn (lr, tmp);
23876 add_reg_note (insn, REG_CFA_RESTORE, lr);
23877 RTX_FRAME_RELATED_P (insn) = 1;
23879 else
23880 rs6000_emit_load_toc_table (TRUE);
23883 #if TARGET_MACHO
23884 if (!TARGET_SINGLE_PIC_BASE
23885 && DEFAULT_ABI == ABI_DARWIN
23886 && flag_pic && crtl->uses_pic_offset_table)
23888 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
23889 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
23891 /* Save and restore LR locally around this call (in R0). */
23892 if (!info->lr_save_p)
23893 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
23895 emit_insn (gen_load_macho_picbase (src));
23897 emit_move_insn (gen_rtx_REG (Pmode,
23898 RS6000_PIC_OFFSET_TABLE_REGNUM),
23899 lr);
23901 if (!info->lr_save_p)
23902 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
23904 #endif
23906 /* If we need to, save the TOC register after doing the stack setup.
23907 Do not emit eh frame info for this save. The unwinder wants info,
23908 conceptually attached to instructions in this function, about
23909 register values in the caller of this function. This R2 may have
23910 already been changed from the value in the caller.
23911 We don't attempt to write accurate DWARF EH frame info for R2
23912 because code emitted by gcc for a (non-pointer) function call
23913 doesn't save and restore R2. Instead, R2 is managed out-of-line
23914 by a linker generated plt call stub when the function resides in
23915 a shared library. This behaviour is costly to describe in DWARF,
23916 both in terms of the size of DWARF info and the time taken in the
23917 unwinder to interpret it. R2 changes, apart from the
23918 calls_eh_return case earlier in this function, are handled by
23919 linux-unwind.h frob_update_context. */
23920 if (rs6000_save_toc_in_prologue_p ())
23922 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
23923 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
23927 /* Write function prologue. */
23929 static void
23930 rs6000_output_function_prologue (FILE *file,
23931 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
23933 rs6000_stack_t *info = rs6000_stack_info ();
23935 if (TARGET_DEBUG_STACK)
23936 debug_stack_info (info);
23938 /* Write .extern for any function we will call to save and restore
23939 fp values. */
23940 if (info->first_fp_reg_save < 64
23941 && !TARGET_MACHO
23942 && !TARGET_ELF)
23944 char *name;
23945 int regno = info->first_fp_reg_save - 32;
23947 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
23949 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23950 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23951 name = rs6000_savres_routine_name (info, regno, sel);
23952 fprintf (file, "\t.extern %s\n", name);
23954 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
23956 bool lr = (info->savres_strategy
23957 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
23958 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
23959 name = rs6000_savres_routine_name (info, regno, sel);
23960 fprintf (file, "\t.extern %s\n", name);
23964 /* ELFv2 ABI r2 setup code and local entry point. This must follow
23965 immediately after the global entry point label. */
23966 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
23968 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
23970 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
23971 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
23973 fputs ("\t.localentry\t", file);
23974 assemble_name (file, name);
23975 fputs (",.-", file);
23976 assemble_name (file, name);
23977 fputs ("\n", file);
23980 /* Output -mprofile-kernel code. This needs to be done here instead of
23981 in output_function_profile since it must go after the ELFv2 ABI
23982 local entry point. */
23983 if (TARGET_PROFILE_KERNEL && crtl->profile)
23985 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
23986 gcc_assert (!TARGET_32BIT);
23988 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
23989 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
23991 /* In the ELFv2 ABI we have no compiler stack word. It must be
23992 the resposibility of _mcount to preserve the static chain
23993 register if required. */
23994 if (DEFAULT_ABI != ABI_ELFv2
23995 && cfun->static_chain_decl != NULL)
23997 asm_fprintf (file, "\tstd %s,24(%s)\n",
23998 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
23999 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24000 asm_fprintf (file, "\tld %s,24(%s)\n",
24001 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24003 else
24004 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24007 rs6000_pic_labelno++;
24010 /* Non-zero if vmx regs are restored before the frame pop, zero if
24011 we restore after the pop when possible. */
24012 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24014 /* Restoring cr is a two step process: loading a reg from the frame
24015 save, then moving the reg to cr. For ABI_V4 we must let the
24016 unwinder know that the stack location is no longer valid at or
24017 before the stack deallocation, but we can't emit a cfa_restore for
24018 cr at the stack deallocation like we do for other registers.
24019 The trouble is that it is possible for the move to cr to be
24020 scheduled after the stack deallocation. So say exactly where cr
24021 is located on each of the two insns. */
24023 static rtx
24024 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24026 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24027 rtx reg = gen_rtx_REG (SImode, regno);
24028 rtx_insn *insn = emit_move_insn (reg, mem);
24030 if (!exit_func && DEFAULT_ABI == ABI_V4)
24032 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24033 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24035 add_reg_note (insn, REG_CFA_REGISTER, set);
24036 RTX_FRAME_RELATED_P (insn) = 1;
24038 return reg;
24041 /* Reload CR from REG. */
24043 static void
24044 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24046 int count = 0;
24047 int i;
24049 if (using_mfcr_multiple)
24051 for (i = 0; i < 8; i++)
24052 if (save_reg_p (CR0_REGNO + i))
24053 count++;
24054 gcc_assert (count);
24057 if (using_mfcr_multiple && count > 1)
24059 rtx_insn *insn;
24060 rtvec p;
24061 int ndx;
24063 p = rtvec_alloc (count);
24065 ndx = 0;
24066 for (i = 0; i < 8; i++)
24067 if (save_reg_p (CR0_REGNO + i))
24069 rtvec r = rtvec_alloc (2);
24070 RTVEC_ELT (r, 0) = reg;
24071 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24072 RTVEC_ELT (p, ndx) =
24073 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24074 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24075 ndx++;
24077 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24078 gcc_assert (ndx == count);
24080 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24081 CR field separately. */
24082 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24084 for (i = 0; i < 8; i++)
24085 if (save_reg_p (CR0_REGNO + i))
24086 add_reg_note (insn, REG_CFA_RESTORE,
24087 gen_rtx_REG (SImode, CR0_REGNO + i));
24089 RTX_FRAME_RELATED_P (insn) = 1;
24092 else
24093 for (i = 0; i < 8; i++)
24094 if (save_reg_p (CR0_REGNO + i))
24096 rtx insn = emit_insn (gen_movsi_to_cr_one
24097 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24099 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24100 CR field separately, attached to the insn that in fact
24101 restores this particular CR field. */
24102 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24104 add_reg_note (insn, REG_CFA_RESTORE,
24105 gen_rtx_REG (SImode, CR0_REGNO + i));
24107 RTX_FRAME_RELATED_P (insn) = 1;
24111 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24112 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24113 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24115 rtx_insn *insn = get_last_insn ();
24116 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24118 add_reg_note (insn, REG_CFA_RESTORE, cr);
24119 RTX_FRAME_RELATED_P (insn) = 1;
24123 /* Like cr, the move to lr instruction can be scheduled after the
24124 stack deallocation, but unlike cr, its stack frame save is still
24125 valid. So we only need to emit the cfa_restore on the correct
24126 instruction. */
24128 static void
24129 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24131 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24132 rtx reg = gen_rtx_REG (Pmode, regno);
24134 emit_move_insn (reg, mem);
24137 static void
24138 restore_saved_lr (int regno, bool exit_func)
24140 rtx reg = gen_rtx_REG (Pmode, regno);
24141 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24142 rtx_insn *insn = emit_move_insn (lr, reg);
24144 if (!exit_func && flag_shrink_wrap)
24146 add_reg_note (insn, REG_CFA_RESTORE, lr);
24147 RTX_FRAME_RELATED_P (insn) = 1;
24151 static rtx
24152 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24154 if (DEFAULT_ABI == ABI_ELFv2)
24156 int i;
24157 for (i = 0; i < 8; i++)
24158 if (save_reg_p (CR0_REGNO + i))
24160 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24161 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24162 cfa_restores);
24165 else if (info->cr_save_p)
24166 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24167 gen_rtx_REG (SImode, CR2_REGNO),
24168 cfa_restores);
24170 if (info->lr_save_p)
24171 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24172 gen_rtx_REG (Pmode, LR_REGNO),
24173 cfa_restores);
24174 return cfa_restores;
24177 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24178 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24179 below stack pointer not cloberred by signals. */
24181 static inline bool
24182 offset_below_red_zone_p (HOST_WIDE_INT offset)
24184 return offset < (DEFAULT_ABI == ABI_V4
24186 : TARGET_32BIT ? -220 : -288);
24189 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24191 static void
24192 emit_cfa_restores (rtx cfa_restores)
24194 rtx_insn *insn = get_last_insn ();
24195 rtx *loc = &REG_NOTES (insn);
24197 while (*loc)
24198 loc = &XEXP (*loc, 1);
24199 *loc = cfa_restores;
24200 RTX_FRAME_RELATED_P (insn) = 1;
24203 /* Emit function epilogue as insns. */
24205 void
24206 rs6000_emit_epilogue (int sibcall)
24208 rs6000_stack_t *info;
24209 int restoring_GPRs_inline;
24210 int restoring_FPRs_inline;
24211 int using_load_multiple;
24212 int using_mtcr_multiple;
24213 int use_backchain_to_restore_sp;
24214 int restore_lr;
24215 int strategy;
24216 HOST_WIDE_INT frame_off = 0;
24217 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24218 rtx frame_reg_rtx = sp_reg_rtx;
24219 rtx cfa_restores = NULL_RTX;
24220 rtx insn;
24221 rtx cr_save_reg = NULL_RTX;
24222 enum machine_mode reg_mode = Pmode;
24223 int reg_size = TARGET_32BIT ? 4 : 8;
24224 int i;
24225 bool exit_func;
24226 unsigned ptr_regno;
24228 info = rs6000_stack_info ();
24230 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24232 reg_mode = V2SImode;
24233 reg_size = 8;
24236 strategy = info->savres_strategy;
24237 using_load_multiple = strategy & SAVRES_MULTIPLE;
24238 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24239 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24240 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24241 || rs6000_cpu == PROCESSOR_PPC603
24242 || rs6000_cpu == PROCESSOR_PPC750
24243 || optimize_size);
24244 /* Restore via the backchain when we have a large frame, since this
24245 is more efficient than an addis, addi pair. The second condition
24246 here will not trigger at the moment; We don't actually need a
24247 frame pointer for alloca, but the generic parts of the compiler
24248 give us one anyway. */
24249 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24250 || (cfun->calls_alloca
24251 && !frame_pointer_needed));
24252 restore_lr = (info->lr_save_p
24253 && (restoring_FPRs_inline
24254 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24255 && (restoring_GPRs_inline
24256 || info->first_fp_reg_save < 64));
24258 if (WORLD_SAVE_P (info))
24260 int i, j;
24261 char rname[30];
24262 const char *alloc_rname;
24263 rtvec p;
24265 /* eh_rest_world_r10 will return to the location saved in the LR
24266 stack slot (which is not likely to be our caller.)
24267 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24268 rest_world is similar, except any R10 parameter is ignored.
24269 The exception-handling stuff that was here in 2.95 is no
24270 longer necessary. */
24272 p = rtvec_alloc (9
24274 + 32 - info->first_gp_reg_save
24275 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24276 + 63 + 1 - info->first_fp_reg_save);
24278 strcpy (rname, ((crtl->calls_eh_return) ?
24279 "*eh_rest_world_r10" : "*rest_world"));
24280 alloc_rname = ggc_strdup (rname);
24282 j = 0;
24283 RTVEC_ELT (p, j++) = ret_rtx;
24284 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24285 gen_rtx_REG (Pmode,
24286 LR_REGNO));
24287 RTVEC_ELT (p, j++)
24288 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24289 /* The instruction pattern requires a clobber here;
24290 it is shared with the restVEC helper. */
24291 RTVEC_ELT (p, j++)
24292 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24295 /* CR register traditionally saved as CR2. */
24296 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24297 RTVEC_ELT (p, j++)
24298 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24299 if (flag_shrink_wrap)
24301 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24302 gen_rtx_REG (Pmode, LR_REGNO),
24303 cfa_restores);
24304 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24308 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24310 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24311 RTVEC_ELT (p, j++)
24312 = gen_frame_load (reg,
24313 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24314 if (flag_shrink_wrap)
24315 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24317 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24319 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24320 RTVEC_ELT (p, j++)
24321 = gen_frame_load (reg,
24322 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24323 if (flag_shrink_wrap)
24324 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24326 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24328 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24329 ? DFmode : SFmode),
24330 info->first_fp_reg_save + i);
24331 RTVEC_ELT (p, j++)
24332 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24333 if (flag_shrink_wrap)
24334 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24336 RTVEC_ELT (p, j++)
24337 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24338 RTVEC_ELT (p, j++)
24339 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24340 RTVEC_ELT (p, j++)
24341 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24342 RTVEC_ELT (p, j++)
24343 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24344 RTVEC_ELT (p, j++)
24345 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24346 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24348 if (flag_shrink_wrap)
24350 REG_NOTES (insn) = cfa_restores;
24351 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24352 RTX_FRAME_RELATED_P (insn) = 1;
24354 return;
24357 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24358 if (info->push_p)
24359 frame_off = info->total_size;
24361 /* Restore AltiVec registers if we must do so before adjusting the
24362 stack. */
24363 if (TARGET_ALTIVEC_ABI
24364 && info->altivec_size != 0
24365 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24366 || (DEFAULT_ABI != ABI_V4
24367 && offset_below_red_zone_p (info->altivec_save_offset))))
24369 int i;
24370 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24372 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24373 if (use_backchain_to_restore_sp)
24375 int frame_regno = 11;
24377 if ((strategy & REST_INLINE_VRS) == 0)
24379 /* Of r11 and r12, select the one not clobbered by an
24380 out-of-line restore function for the frame register. */
24381 frame_regno = 11 + 12 - scratch_regno;
24383 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24384 emit_move_insn (frame_reg_rtx,
24385 gen_rtx_MEM (Pmode, sp_reg_rtx));
24386 frame_off = 0;
24388 else if (frame_pointer_needed)
24389 frame_reg_rtx = hard_frame_pointer_rtx;
24391 if ((strategy & REST_INLINE_VRS) == 0)
24393 int end_save = info->altivec_save_offset + info->altivec_size;
24394 int ptr_off;
24395 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24396 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24398 if (end_save + frame_off != 0)
24400 rtx offset = GEN_INT (end_save + frame_off);
24402 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24404 else
24405 emit_move_insn (ptr_reg, frame_reg_rtx);
24407 ptr_off = -end_save;
24408 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24409 info->altivec_save_offset + ptr_off,
24410 0, V4SImode, SAVRES_VR);
24412 else
24414 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24415 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24417 rtx addr, areg, mem, reg;
24419 areg = gen_rtx_REG (Pmode, 0);
24420 emit_move_insn
24421 (areg, GEN_INT (info->altivec_save_offset
24422 + frame_off
24423 + 16 * (i - info->first_altivec_reg_save)));
24425 /* AltiVec addressing mode is [reg+reg]. */
24426 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24427 mem = gen_frame_mem (V4SImode, addr);
24429 reg = gen_rtx_REG (V4SImode, i);
24430 emit_move_insn (reg, mem);
24434 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24435 if (((strategy & REST_INLINE_VRS) == 0
24436 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24437 && (flag_shrink_wrap
24438 || (offset_below_red_zone_p
24439 (info->altivec_save_offset
24440 + 16 * (i - info->first_altivec_reg_save)))))
24442 rtx reg = gen_rtx_REG (V4SImode, i);
24443 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24447 /* Restore VRSAVE if we must do so before adjusting the stack. */
24448 if (TARGET_ALTIVEC
24449 && TARGET_ALTIVEC_VRSAVE
24450 && info->vrsave_mask != 0
24451 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24452 || (DEFAULT_ABI != ABI_V4
24453 && offset_below_red_zone_p (info->vrsave_save_offset))))
24455 rtx reg;
24457 if (frame_reg_rtx == sp_reg_rtx)
24459 if (use_backchain_to_restore_sp)
24461 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24462 emit_move_insn (frame_reg_rtx,
24463 gen_rtx_MEM (Pmode, sp_reg_rtx));
24464 frame_off = 0;
24466 else if (frame_pointer_needed)
24467 frame_reg_rtx = hard_frame_pointer_rtx;
24470 reg = gen_rtx_REG (SImode, 12);
24471 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24472 info->vrsave_save_offset + frame_off));
24474 emit_insn (generate_set_vrsave (reg, info, 1));
24477 insn = NULL_RTX;
24478 /* If we have a large stack frame, restore the old stack pointer
24479 using the backchain. */
24480 if (use_backchain_to_restore_sp)
24482 if (frame_reg_rtx == sp_reg_rtx)
24484 /* Under V.4, don't reset the stack pointer until after we're done
24485 loading the saved registers. */
24486 if (DEFAULT_ABI == ABI_V4)
24487 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24489 insn = emit_move_insn (frame_reg_rtx,
24490 gen_rtx_MEM (Pmode, sp_reg_rtx));
24491 frame_off = 0;
24493 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24494 && DEFAULT_ABI == ABI_V4)
24495 /* frame_reg_rtx has been set up by the altivec restore. */
24497 else
24499 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24500 frame_reg_rtx = sp_reg_rtx;
24503 /* If we have a frame pointer, we can restore the old stack pointer
24504 from it. */
24505 else if (frame_pointer_needed)
24507 frame_reg_rtx = sp_reg_rtx;
24508 if (DEFAULT_ABI == ABI_V4)
24509 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24510 /* Prevent reordering memory accesses against stack pointer restore. */
24511 else if (cfun->calls_alloca
24512 || offset_below_red_zone_p (-info->total_size))
24513 rs6000_emit_stack_tie (frame_reg_rtx, true);
24515 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24516 GEN_INT (info->total_size)));
24517 frame_off = 0;
24519 else if (info->push_p
24520 && DEFAULT_ABI != ABI_V4
24521 && !crtl->calls_eh_return)
24523 /* Prevent reordering memory accesses against stack pointer restore. */
24524 if (cfun->calls_alloca
24525 || offset_below_red_zone_p (-info->total_size))
24526 rs6000_emit_stack_tie (frame_reg_rtx, false);
24527 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24528 GEN_INT (info->total_size)));
24529 frame_off = 0;
24531 if (insn && frame_reg_rtx == sp_reg_rtx)
24533 if (cfa_restores)
24535 REG_NOTES (insn) = cfa_restores;
24536 cfa_restores = NULL_RTX;
24538 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24539 RTX_FRAME_RELATED_P (insn) = 1;
24542 /* Restore AltiVec registers if we have not done so already. */
24543 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24544 && TARGET_ALTIVEC_ABI
24545 && info->altivec_size != 0
24546 && (DEFAULT_ABI == ABI_V4
24547 || !offset_below_red_zone_p (info->altivec_save_offset)))
24549 int i;
24551 if ((strategy & REST_INLINE_VRS) == 0)
24553 int end_save = info->altivec_save_offset + info->altivec_size;
24554 int ptr_off;
24555 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24556 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24557 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24559 if (end_save + frame_off != 0)
24561 rtx offset = GEN_INT (end_save + frame_off);
24563 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24565 else
24566 emit_move_insn (ptr_reg, frame_reg_rtx);
24568 ptr_off = -end_save;
24569 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24570 info->altivec_save_offset + ptr_off,
24571 0, V4SImode, SAVRES_VR);
24572 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24574 /* Frame reg was clobbered by out-of-line save. Restore it
24575 from ptr_reg, and if we are calling out-of-line gpr or
24576 fpr restore set up the correct pointer and offset. */
24577 unsigned newptr_regno = 1;
24578 if (!restoring_GPRs_inline)
24580 bool lr = info->gp_save_offset + info->gp_size == 0;
24581 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24582 newptr_regno = ptr_regno_for_savres (sel);
24583 end_save = info->gp_save_offset + info->gp_size;
24585 else if (!restoring_FPRs_inline)
24587 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24588 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24589 newptr_regno = ptr_regno_for_savres (sel);
24590 end_save = info->gp_save_offset + info->gp_size;
24593 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24594 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24596 if (end_save + ptr_off != 0)
24598 rtx offset = GEN_INT (end_save + ptr_off);
24600 frame_off = -end_save;
24601 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24603 else
24605 frame_off = ptr_off;
24606 emit_move_insn (frame_reg_rtx, ptr_reg);
24610 else
24612 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24613 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24615 rtx addr, areg, mem, reg;
24617 areg = gen_rtx_REG (Pmode, 0);
24618 emit_move_insn
24619 (areg, GEN_INT (info->altivec_save_offset
24620 + frame_off
24621 + 16 * (i - info->first_altivec_reg_save)));
24623 /* AltiVec addressing mode is [reg+reg]. */
24624 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24625 mem = gen_frame_mem (V4SImode, addr);
24627 reg = gen_rtx_REG (V4SImode, i);
24628 emit_move_insn (reg, mem);
24632 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24633 if (((strategy & REST_INLINE_VRS) == 0
24634 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24635 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24637 rtx reg = gen_rtx_REG (V4SImode, i);
24638 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24642 /* Restore VRSAVE if we have not done so already. */
24643 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24644 && TARGET_ALTIVEC
24645 && TARGET_ALTIVEC_VRSAVE
24646 && info->vrsave_mask != 0
24647 && (DEFAULT_ABI == ABI_V4
24648 || !offset_below_red_zone_p (info->vrsave_save_offset)))
24650 rtx reg;
24652 reg = gen_rtx_REG (SImode, 12);
24653 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24654 info->vrsave_save_offset + frame_off));
24656 emit_insn (generate_set_vrsave (reg, info, 1));
24659 /* If we exit by an out-of-line restore function on ABI_V4 then that
24660 function will deallocate the stack, so we don't need to worry
24661 about the unwinder restoring cr from an invalid stack frame
24662 location. */
24663 exit_func = (!restoring_FPRs_inline
24664 || (!restoring_GPRs_inline
24665 && info->first_fp_reg_save == 64));
24667 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
24668 *separate* slots if the routine calls __builtin_eh_return, so
24669 that they can be independently restored by the unwinder. */
24670 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24672 int i, cr_off = info->ehcr_offset;
24674 for (i = 0; i < 8; i++)
24675 if (!call_used_regs[CR0_REGNO + i])
24677 rtx reg = gen_rtx_REG (SImode, 0);
24678 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24679 cr_off + frame_off));
24681 insn = emit_insn (gen_movsi_to_cr_one
24682 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24684 if (!exit_func && flag_shrink_wrap)
24686 add_reg_note (insn, REG_CFA_RESTORE,
24687 gen_rtx_REG (SImode, CR0_REGNO + i));
24689 RTX_FRAME_RELATED_P (insn) = 1;
24692 cr_off += reg_size;
24696 /* Get the old lr if we saved it. If we are restoring registers
24697 out-of-line, then the out-of-line routines can do this for us. */
24698 if (restore_lr && restoring_GPRs_inline)
24699 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
24701 /* Get the old cr if we saved it. */
24702 if (info->cr_save_p)
24704 unsigned cr_save_regno = 12;
24706 if (!restoring_GPRs_inline)
24708 /* Ensure we don't use the register used by the out-of-line
24709 gpr register restore below. */
24710 bool lr = info->gp_save_offset + info->gp_size == 0;
24711 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24712 int gpr_ptr_regno = ptr_regno_for_savres (sel);
24714 if (gpr_ptr_regno == 12)
24715 cr_save_regno = 11;
24716 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
24718 else if (REGNO (frame_reg_rtx) == 12)
24719 cr_save_regno = 11;
24721 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
24722 info->cr_save_offset + frame_off,
24723 exit_func);
24726 /* Set LR here to try to overlap restores below. */
24727 if (restore_lr && restoring_GPRs_inline)
24728 restore_saved_lr (0, exit_func);
24730 /* Load exception handler data registers, if needed. */
24731 if (crtl->calls_eh_return)
24733 unsigned int i, regno;
24735 if (TARGET_AIX)
24737 rtx reg = gen_rtx_REG (reg_mode, 2);
24738 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24739 frame_off + RS6000_TOC_SAVE_SLOT));
24742 for (i = 0; ; ++i)
24744 rtx mem;
24746 regno = EH_RETURN_DATA_REGNO (i);
24747 if (regno == INVALID_REGNUM)
24748 break;
24750 /* Note: possible use of r0 here to address SPE regs. */
24751 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
24752 info->ehrd_offset + frame_off
24753 + reg_size * (int) i);
24755 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
24759 /* Restore GPRs. This is done as a PARALLEL if we are using
24760 the load-multiple instructions. */
24761 if (TARGET_SPE_ABI
24762 && info->spe_64bit_regs_used
24763 && info->first_gp_reg_save != 32)
24765 /* Determine whether we can address all of the registers that need
24766 to be saved with an offset from frame_reg_rtx that fits in
24767 the small const field for SPE memory instructions. */
24768 int spe_regs_addressable
24769 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
24770 + reg_size * (32 - info->first_gp_reg_save - 1))
24771 && restoring_GPRs_inline);
24773 if (!spe_regs_addressable)
24775 int ool_adjust = 0;
24776 rtx old_frame_reg_rtx = frame_reg_rtx;
24777 /* Make r11 point to the start of the SPE save area. We worried about
24778 not clobbering it when we were saving registers in the prologue.
24779 There's no need to worry here because the static chain is passed
24780 anew to every function. */
24782 if (!restoring_GPRs_inline)
24783 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
24784 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24785 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
24786 GEN_INT (info->spe_gp_save_offset
24787 + frame_off
24788 - ool_adjust)));
24789 /* Keep the invariant that frame_reg_rtx + frame_off points
24790 at the top of the stack frame. */
24791 frame_off = -info->spe_gp_save_offset + ool_adjust;
24794 if (restoring_GPRs_inline)
24796 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
24798 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24799 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
24801 rtx offset, addr, mem, reg;
24803 /* We're doing all this to ensure that the immediate offset
24804 fits into the immediate field of 'evldd'. */
24805 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
24807 offset = GEN_INT (spe_offset + reg_size * i);
24808 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
24809 mem = gen_rtx_MEM (V2SImode, addr);
24810 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24812 emit_move_insn (reg, mem);
24815 else
24816 rs6000_emit_savres_rtx (info, frame_reg_rtx,
24817 info->spe_gp_save_offset + frame_off,
24818 info->lr_save_offset + frame_off,
24819 reg_mode,
24820 SAVRES_GPR | SAVRES_LR);
24822 else if (!restoring_GPRs_inline)
24824 /* We are jumping to an out-of-line function. */
24825 rtx ptr_reg;
24826 int end_save = info->gp_save_offset + info->gp_size;
24827 bool can_use_exit = end_save == 0;
24828 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
24829 int ptr_off;
24831 /* Emit stack reset code if we need it. */
24832 ptr_regno = ptr_regno_for_savres (sel);
24833 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
24834 if (can_use_exit)
24835 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
24836 else if (end_save + frame_off != 0)
24837 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
24838 GEN_INT (end_save + frame_off)));
24839 else if (REGNO (frame_reg_rtx) != ptr_regno)
24840 emit_move_insn (ptr_reg, frame_reg_rtx);
24841 if (REGNO (frame_reg_rtx) == ptr_regno)
24842 frame_off = -end_save;
24844 if (can_use_exit && info->cr_save_p)
24845 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
24847 ptr_off = -end_save;
24848 rs6000_emit_savres_rtx (info, ptr_reg,
24849 info->gp_save_offset + ptr_off,
24850 info->lr_save_offset + ptr_off,
24851 reg_mode, sel);
24853 else if (using_load_multiple)
24855 rtvec p;
24856 p = rtvec_alloc (32 - info->first_gp_reg_save);
24857 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24858 RTVEC_ELT (p, i)
24859 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
24860 frame_reg_rtx,
24861 info->gp_save_offset + frame_off + reg_size * i);
24862 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24864 else
24866 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24867 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
24868 emit_insn (gen_frame_load
24869 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
24870 frame_reg_rtx,
24871 info->gp_save_offset + frame_off + reg_size * i));
24874 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
24876 /* If the frame pointer was used then we can't delay emitting
24877 a REG_CFA_DEF_CFA note. This must happen on the insn that
24878 restores the frame pointer, r31. We may have already emitted
24879 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
24880 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
24881 be harmless if emitted. */
24882 if (frame_pointer_needed)
24884 insn = get_last_insn ();
24885 add_reg_note (insn, REG_CFA_DEF_CFA,
24886 plus_constant (Pmode, frame_reg_rtx, frame_off));
24887 RTX_FRAME_RELATED_P (insn) = 1;
24890 /* Set up cfa_restores. We always need these when
24891 shrink-wrapping. If not shrink-wrapping then we only need
24892 the cfa_restore when the stack location is no longer valid.
24893 The cfa_restores must be emitted on or before the insn that
24894 invalidates the stack, and of course must not be emitted
24895 before the insn that actually does the restore. The latter
24896 is why it is a bad idea to emit the cfa_restores as a group
24897 on the last instruction here that actually does a restore:
24898 That insn may be reordered with respect to others doing
24899 restores. */
24900 if (flag_shrink_wrap
24901 && !restoring_GPRs_inline
24902 && info->first_fp_reg_save == 64)
24903 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
24905 for (i = info->first_gp_reg_save; i < 32; i++)
24906 if (!restoring_GPRs_inline
24907 || using_load_multiple
24908 || rs6000_reg_live_or_pic_offset_p (i))
24910 rtx reg = gen_rtx_REG (reg_mode, i);
24912 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24916 if (!restoring_GPRs_inline
24917 && info->first_fp_reg_save == 64)
24919 /* We are jumping to an out-of-line function. */
24920 if (cfa_restores)
24921 emit_cfa_restores (cfa_restores);
24922 return;
24925 if (restore_lr && !restoring_GPRs_inline)
24927 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
24928 restore_saved_lr (0, exit_func);
24931 /* Restore fpr's if we need to do it without calling a function. */
24932 if (restoring_FPRs_inline)
24933 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
24934 if (save_reg_p (info->first_fp_reg_save + i))
24936 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24937 ? DFmode : SFmode),
24938 info->first_fp_reg_save + i);
24939 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24940 info->fp_save_offset + frame_off + 8 * i));
24941 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
24942 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24945 /* If we saved cr, restore it here. Just those that were used. */
24946 if (info->cr_save_p)
24947 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
24949 /* If this is V.4, unwind the stack pointer after all of the loads
24950 have been done, or set up r11 if we are restoring fp out of line. */
24951 ptr_regno = 1;
24952 if (!restoring_FPRs_inline)
24954 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24955 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24956 ptr_regno = ptr_regno_for_savres (sel);
24959 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
24960 if (REGNO (frame_reg_rtx) == ptr_regno)
24961 frame_off = 0;
24963 if (insn && restoring_FPRs_inline)
24965 if (cfa_restores)
24967 REG_NOTES (insn) = cfa_restores;
24968 cfa_restores = NULL_RTX;
24970 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24971 RTX_FRAME_RELATED_P (insn) = 1;
24974 if (crtl->calls_eh_return)
24976 rtx sa = EH_RETURN_STACKADJ_RTX;
24977 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
24980 if (!sibcall)
24982 rtvec p;
24983 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24984 if (! restoring_FPRs_inline)
24986 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
24987 RTVEC_ELT (p, 0) = ret_rtx;
24989 else
24991 if (cfa_restores)
24993 /* We can't hang the cfa_restores off a simple return,
24994 since the shrink-wrap code sometimes uses an existing
24995 return. This means there might be a path from
24996 pre-prologue code to this return, and dwarf2cfi code
24997 wants the eh_frame unwinder state to be the same on
24998 all paths to any point. So we need to emit the
24999 cfa_restores before the return. For -m64 we really
25000 don't need epilogue cfa_restores at all, except for
25001 this irritating dwarf2cfi with shrink-wrap
25002 requirement; The stack red-zone means eh_frame info
25003 from the prologue telling the unwinder to restore
25004 from the stack is perfectly good right to the end of
25005 the function. */
25006 emit_insn (gen_blockage ());
25007 emit_cfa_restores (cfa_restores);
25008 cfa_restores = NULL_RTX;
25010 p = rtvec_alloc (2);
25011 RTVEC_ELT (p, 0) = simple_return_rtx;
25014 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25015 ? gen_rtx_USE (VOIDmode,
25016 gen_rtx_REG (Pmode, LR_REGNO))
25017 : gen_rtx_CLOBBER (VOIDmode,
25018 gen_rtx_REG (Pmode, LR_REGNO)));
25020 /* If we have to restore more than two FP registers, branch to the
25021 restore function. It will return to our caller. */
25022 if (! restoring_FPRs_inline)
25024 int i;
25025 int reg;
25026 rtx sym;
25028 if (flag_shrink_wrap)
25029 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25031 sym = rs6000_savres_routine_sym (info,
25032 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25033 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25034 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25035 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25037 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25039 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25041 RTVEC_ELT (p, i + 4)
25042 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25043 if (flag_shrink_wrap)
25044 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25045 cfa_restores);
25049 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25052 if (cfa_restores)
25054 if (sibcall)
25055 /* Ensure the cfa_restores are hung off an insn that won't
25056 be reordered above other restores. */
25057 emit_insn (gen_blockage ());
25059 emit_cfa_restores (cfa_restores);
25063 /* Write function epilogue. */
25065 static void
25066 rs6000_output_function_epilogue (FILE *file,
25067 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25069 #if TARGET_MACHO
25070 macho_branch_islands ();
25071 /* Mach-O doesn't support labels at the end of objects, so if
25072 it looks like we might want one, insert a NOP. */
25074 rtx_insn *insn = get_last_insn ();
25075 rtx_insn *deleted_debug_label = NULL;
25076 while (insn
25077 && NOTE_P (insn)
25078 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25080 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25081 notes only, instead set their CODE_LABEL_NUMBER to -1,
25082 otherwise there would be code generation differences
25083 in between -g and -g0. */
25084 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25085 deleted_debug_label = insn;
25086 insn = PREV_INSN (insn);
25088 if (insn
25089 && (LABEL_P (insn)
25090 || (NOTE_P (insn)
25091 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25092 fputs ("\tnop\n", file);
25093 else if (deleted_debug_label)
25094 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25095 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25096 CODE_LABEL_NUMBER (insn) = -1;
25098 #endif
25100 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25101 on its format.
25103 We don't output a traceback table if -finhibit-size-directive was
25104 used. The documentation for -finhibit-size-directive reads
25105 ``don't output a @code{.size} assembler directive, or anything
25106 else that would cause trouble if the function is split in the
25107 middle, and the two halves are placed at locations far apart in
25108 memory.'' The traceback table has this property, since it
25109 includes the offset from the start of the function to the
25110 traceback table itself.
25112 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25113 different traceback table. */
25114 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25115 && ! flag_inhibit_size_directive
25116 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25118 const char *fname = NULL;
25119 const char *language_string = lang_hooks.name;
25120 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25121 int i;
25122 int optional_tbtab;
25123 rs6000_stack_t *info = rs6000_stack_info ();
25125 if (rs6000_traceback == traceback_full)
25126 optional_tbtab = 1;
25127 else if (rs6000_traceback == traceback_part)
25128 optional_tbtab = 0;
25129 else
25130 optional_tbtab = !optimize_size && !TARGET_ELF;
25132 if (optional_tbtab)
25134 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25135 while (*fname == '.') /* V.4 encodes . in the name */
25136 fname++;
25138 /* Need label immediately before tbtab, so we can compute
25139 its offset from the function start. */
25140 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25141 ASM_OUTPUT_LABEL (file, fname);
25144 /* The .tbtab pseudo-op can only be used for the first eight
25145 expressions, since it can't handle the possibly variable
25146 length fields that follow. However, if you omit the optional
25147 fields, the assembler outputs zeros for all optional fields
25148 anyways, giving each variable length field is minimum length
25149 (as defined in sys/debug.h). Thus we can not use the .tbtab
25150 pseudo-op at all. */
25152 /* An all-zero word flags the start of the tbtab, for debuggers
25153 that have to find it by searching forward from the entry
25154 point or from the current pc. */
25155 fputs ("\t.long 0\n", file);
25157 /* Tbtab format type. Use format type 0. */
25158 fputs ("\t.byte 0,", file);
25160 /* Language type. Unfortunately, there does not seem to be any
25161 official way to discover the language being compiled, so we
25162 use language_string.
25163 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25164 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25165 a number, so for now use 9. LTO and Go aren't assigned numbers
25166 either, so for now use 0. */
25167 if (! strcmp (language_string, "GNU C")
25168 || ! strcmp (language_string, "GNU GIMPLE")
25169 || ! strcmp (language_string, "GNU Go"))
25170 i = 0;
25171 else if (! strcmp (language_string, "GNU F77")
25172 || ! strcmp (language_string, "GNU Fortran"))
25173 i = 1;
25174 else if (! strcmp (language_string, "GNU Pascal"))
25175 i = 2;
25176 else if (! strcmp (language_string, "GNU Ada"))
25177 i = 3;
25178 else if (! strcmp (language_string, "GNU C++")
25179 || ! strcmp (language_string, "GNU Objective-C++"))
25180 i = 9;
25181 else if (! strcmp (language_string, "GNU Java"))
25182 i = 13;
25183 else if (! strcmp (language_string, "GNU Objective-C"))
25184 i = 14;
25185 else
25186 gcc_unreachable ();
25187 fprintf (file, "%d,", i);
25189 /* 8 single bit fields: global linkage (not set for C extern linkage,
25190 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25191 from start of procedure stored in tbtab, internal function, function
25192 has controlled storage, function has no toc, function uses fp,
25193 function logs/aborts fp operations. */
25194 /* Assume that fp operations are used if any fp reg must be saved. */
25195 fprintf (file, "%d,",
25196 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25198 /* 6 bitfields: function is interrupt handler, name present in
25199 proc table, function calls alloca, on condition directives
25200 (controls stack walks, 3 bits), saves condition reg, saves
25201 link reg. */
25202 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25203 set up as a frame pointer, even when there is no alloca call. */
25204 fprintf (file, "%d,",
25205 ((optional_tbtab << 6)
25206 | ((optional_tbtab & frame_pointer_needed) << 5)
25207 | (info->cr_save_p << 1)
25208 | (info->lr_save_p)));
25210 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25211 (6 bits). */
25212 fprintf (file, "%d,",
25213 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25215 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25216 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25218 if (optional_tbtab)
25220 /* Compute the parameter info from the function decl argument
25221 list. */
25222 tree decl;
25223 int next_parm_info_bit = 31;
25225 for (decl = DECL_ARGUMENTS (current_function_decl);
25226 decl; decl = DECL_CHAIN (decl))
25228 rtx parameter = DECL_INCOMING_RTL (decl);
25229 enum machine_mode mode = GET_MODE (parameter);
25231 if (GET_CODE (parameter) == REG)
25233 if (SCALAR_FLOAT_MODE_P (mode))
25235 int bits;
25237 float_parms++;
25239 switch (mode)
25241 case SFmode:
25242 case SDmode:
25243 bits = 0x2;
25244 break;
25246 case DFmode:
25247 case DDmode:
25248 case TFmode:
25249 case TDmode:
25250 bits = 0x3;
25251 break;
25253 default:
25254 gcc_unreachable ();
25257 /* If only one bit will fit, don't or in this entry. */
25258 if (next_parm_info_bit > 0)
25259 parm_info |= (bits << (next_parm_info_bit - 1));
25260 next_parm_info_bit -= 2;
25262 else
25264 fixed_parms += ((GET_MODE_SIZE (mode)
25265 + (UNITS_PER_WORD - 1))
25266 / UNITS_PER_WORD);
25267 next_parm_info_bit -= 1;
25273 /* Number of fixed point parameters. */
25274 /* This is actually the number of words of fixed point parameters; thus
25275 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25276 fprintf (file, "%d,", fixed_parms);
25278 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25279 all on stack. */
25280 /* This is actually the number of fp registers that hold parameters;
25281 and thus the maximum value is 13. */
25282 /* Set parameters on stack bit if parameters are not in their original
25283 registers, regardless of whether they are on the stack? Xlc
25284 seems to set the bit when not optimizing. */
25285 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25287 if (! optional_tbtab)
25288 return;
25290 /* Optional fields follow. Some are variable length. */
25292 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25293 11 double float. */
25294 /* There is an entry for each parameter in a register, in the order that
25295 they occur in the parameter list. Any intervening arguments on the
25296 stack are ignored. If the list overflows a long (max possible length
25297 34 bits) then completely leave off all elements that don't fit. */
25298 /* Only emit this long if there was at least one parameter. */
25299 if (fixed_parms || float_parms)
25300 fprintf (file, "\t.long %d\n", parm_info);
25302 /* Offset from start of code to tb table. */
25303 fputs ("\t.long ", file);
25304 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25305 RS6000_OUTPUT_BASENAME (file, fname);
25306 putc ('-', file);
25307 rs6000_output_function_entry (file, fname);
25308 putc ('\n', file);
25310 /* Interrupt handler mask. */
25311 /* Omit this long, since we never set the interrupt handler bit
25312 above. */
25314 /* Number of CTL (controlled storage) anchors. */
25315 /* Omit this long, since the has_ctl bit is never set above. */
25317 /* Displacement into stack of each CTL anchor. */
25318 /* Omit this list of longs, because there are no CTL anchors. */
25320 /* Length of function name. */
25321 if (*fname == '*')
25322 ++fname;
25323 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25325 /* Function name. */
25326 assemble_string (fname, strlen (fname));
25328 /* Register for alloca automatic storage; this is always reg 31.
25329 Only emit this if the alloca bit was set above. */
25330 if (frame_pointer_needed)
25331 fputs ("\t.byte 31\n", file);
25333 fputs ("\t.align 2\n", file);
25337 /* A C compound statement that outputs the assembler code for a thunk
25338 function, used to implement C++ virtual function calls with
25339 multiple inheritance. The thunk acts as a wrapper around a virtual
25340 function, adjusting the implicit object parameter before handing
25341 control off to the real function.
25343 First, emit code to add the integer DELTA to the location that
25344 contains the incoming first argument. Assume that this argument
25345 contains a pointer, and is the one used to pass the `this' pointer
25346 in C++. This is the incoming argument *before* the function
25347 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25348 values of all other incoming arguments.
25350 After the addition, emit code to jump to FUNCTION, which is a
25351 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25352 not touch the return address. Hence returning from FUNCTION will
25353 return to whoever called the current `thunk'.
25355 The effect must be as if FUNCTION had been called directly with the
25356 adjusted first argument. This macro is responsible for emitting
25357 all of the code for a thunk function; output_function_prologue()
25358 and output_function_epilogue() are not invoked.
25360 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25361 been extracted from it.) It might possibly be useful on some
25362 targets, but probably not.
25364 If you do not define this macro, the target-independent code in the
25365 C++ frontend will generate a less efficient heavyweight thunk that
25366 calls FUNCTION instead of jumping to it. The generic approach does
25367 not support varargs. */
25369 static void
25370 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25371 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25372 tree function)
25374 rtx this_rtx, funexp;
25375 rtx_insn *insn;
25377 reload_completed = 1;
25378 epilogue_completed = 1;
25380 /* Mark the end of the (empty) prologue. */
25381 emit_note (NOTE_INSN_PROLOGUE_END);
25383 /* Find the "this" pointer. If the function returns a structure,
25384 the structure return pointer is in r3. */
25385 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25386 this_rtx = gen_rtx_REG (Pmode, 4);
25387 else
25388 this_rtx = gen_rtx_REG (Pmode, 3);
25390 /* Apply the constant offset, if required. */
25391 if (delta)
25392 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25394 /* Apply the offset from the vtable, if required. */
25395 if (vcall_offset)
25397 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25398 rtx tmp = gen_rtx_REG (Pmode, 12);
25400 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25401 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25403 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25404 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25406 else
25408 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25410 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25412 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25415 /* Generate a tail call to the target function. */
25416 if (!TREE_USED (function))
25418 assemble_external (function);
25419 TREE_USED (function) = 1;
25421 funexp = XEXP (DECL_RTL (function), 0);
25422 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25424 #if TARGET_MACHO
25425 if (MACHOPIC_INDIRECT)
25426 funexp = machopic_indirect_call_target (funexp);
25427 #endif
25429 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25430 generate sibcall RTL explicitly. */
25431 insn = emit_call_insn (
25432 gen_rtx_PARALLEL (VOIDmode,
25433 gen_rtvec (4,
25434 gen_rtx_CALL (VOIDmode,
25435 funexp, const0_rtx),
25436 gen_rtx_USE (VOIDmode, const0_rtx),
25437 gen_rtx_USE (VOIDmode,
25438 gen_rtx_REG (SImode,
25439 LR_REGNO)),
25440 simple_return_rtx)));
25441 SIBLING_CALL_P (insn) = 1;
25442 emit_barrier ();
25444 /* Ensure we have a global entry point for the thunk. ??? We could
25445 avoid that if the target routine doesn't need a global entry point,
25446 but we do not know whether this is the case at this point. */
25447 if (DEFAULT_ABI == ABI_ELFv2)
25448 cfun->machine->r2_setup_needed = true;
25450 /* Run just enough of rest_of_compilation to get the insns emitted.
25451 There's not really enough bulk here to make other passes such as
25452 instruction scheduling worth while. Note that use_thunk calls
25453 assemble_start_function and assemble_end_function. */
25454 insn = get_insns ();
25455 shorten_branches (insn);
25456 final_start_function (insn, file, 1);
25457 final (insn, file, 1);
25458 final_end_function ();
25460 reload_completed = 0;
25461 epilogue_completed = 0;
25464 /* A quick summary of the various types of 'constant-pool tables'
25465 under PowerPC:
25467 Target Flags Name One table per
25468 AIX (none) AIX TOC object file
25469 AIX -mfull-toc AIX TOC object file
25470 AIX -mminimal-toc AIX minimal TOC translation unit
25471 SVR4/EABI (none) SVR4 SDATA object file
25472 SVR4/EABI -fpic SVR4 pic object file
25473 SVR4/EABI -fPIC SVR4 PIC translation unit
25474 SVR4/EABI -mrelocatable EABI TOC function
25475 SVR4/EABI -maix AIX TOC object file
25476 SVR4/EABI -maix -mminimal-toc
25477 AIX minimal TOC translation unit
25479 Name Reg. Set by entries contains:
25480 made by addrs? fp? sum?
25482 AIX TOC 2 crt0 as Y option option
25483 AIX minimal TOC 30 prolog gcc Y Y option
25484 SVR4 SDATA 13 crt0 gcc N Y N
25485 SVR4 pic 30 prolog ld Y not yet N
25486 SVR4 PIC 30 prolog gcc Y option option
25487 EABI TOC 30 prolog gcc Y option option
25491 /* Hash functions for the hash table. */
25493 static unsigned
25494 rs6000_hash_constant (rtx k)
25496 enum rtx_code code = GET_CODE (k);
25497 enum machine_mode mode = GET_MODE (k);
25498 unsigned result = (code << 3) ^ mode;
25499 const char *format;
25500 int flen, fidx;
25502 format = GET_RTX_FORMAT (code);
25503 flen = strlen (format);
25504 fidx = 0;
25506 switch (code)
25508 case LABEL_REF:
25509 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25511 case CONST_WIDE_INT:
25513 int i;
25514 flen = CONST_WIDE_INT_NUNITS (k);
25515 for (i = 0; i < flen; i++)
25516 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25517 return result;
25520 case CONST_DOUBLE:
25521 if (mode != VOIDmode)
25522 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25523 flen = 2;
25524 break;
25526 case CODE_LABEL:
25527 fidx = 3;
25528 break;
25530 default:
25531 break;
25534 for (; fidx < flen; fidx++)
25535 switch (format[fidx])
25537 case 's':
25539 unsigned i, len;
25540 const char *str = XSTR (k, fidx);
25541 len = strlen (str);
25542 result = result * 613 + len;
25543 for (i = 0; i < len; i++)
25544 result = result * 613 + (unsigned) str[i];
25545 break;
25547 case 'u':
25548 case 'e':
25549 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25550 break;
25551 case 'i':
25552 case 'n':
25553 result = result * 613 + (unsigned) XINT (k, fidx);
25554 break;
25555 case 'w':
25556 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25557 result = result * 613 + (unsigned) XWINT (k, fidx);
25558 else
25560 size_t i;
25561 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25562 result = result * 613 + (unsigned) (XWINT (k, fidx)
25563 >> CHAR_BIT * i);
25565 break;
25566 case '0':
25567 break;
25568 default:
25569 gcc_unreachable ();
25572 return result;
25575 hashval_t
25576 toc_hasher::hash (toc_hash_struct *thc)
25578 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25581 /* Compare H1 and H2 for equivalence. */
25583 bool
25584 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25586 rtx r1 = h1->key;
25587 rtx r2 = h2->key;
25589 if (h1->key_mode != h2->key_mode)
25590 return 0;
25592 return rtx_equal_p (r1, r2);
25595 /* These are the names given by the C++ front-end to vtables, and
25596 vtable-like objects. Ideally, this logic should not be here;
25597 instead, there should be some programmatic way of inquiring as
25598 to whether or not an object is a vtable. */
25600 #define VTABLE_NAME_P(NAME) \
25601 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25602 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25603 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25604 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25605 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25607 #ifdef NO_DOLLAR_IN_LABEL
25608 /* Return a GGC-allocated character string translating dollar signs in
25609 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25611 const char *
25612 rs6000_xcoff_strip_dollar (const char *name)
25614 char *strip, *p;
25615 const char *q;
25616 size_t len;
25618 q = (const char *) strchr (name, '$');
25620 if (q == 0 || q == name)
25621 return name;
25623 len = strlen (name);
25624 strip = XALLOCAVEC (char, len + 1);
25625 strcpy (strip, name);
25626 p = strip + (q - name);
25627 while (p)
25629 *p = '_';
25630 p = strchr (p + 1, '$');
25633 return ggc_alloc_string (strip, len);
25635 #endif
25637 void
25638 rs6000_output_symbol_ref (FILE *file, rtx x)
25640 /* Currently C++ toc references to vtables can be emitted before it
25641 is decided whether the vtable is public or private. If this is
25642 the case, then the linker will eventually complain that there is
25643 a reference to an unknown section. Thus, for vtables only,
25644 we emit the TOC reference to reference the symbol and not the
25645 section. */
25646 const char *name = XSTR (x, 0);
25648 if (VTABLE_NAME_P (name))
25650 RS6000_OUTPUT_BASENAME (file, name);
25652 else
25653 assemble_name (file, name);
25656 /* Output a TOC entry. We derive the entry name from what is being
25657 written. */
25659 void
25660 output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
25662 char buf[256];
25663 const char *name = buf;
25664 rtx base = x;
25665 HOST_WIDE_INT offset = 0;
25667 gcc_assert (!TARGET_NO_TOC);
25669 /* When the linker won't eliminate them, don't output duplicate
25670 TOC entries (this happens on AIX if there is any kind of TOC,
25671 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
25672 CODE_LABELs. */
25673 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
25675 struct toc_hash_struct *h;
25677 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
25678 time because GGC is not initialized at that point. */
25679 if (toc_hash_table == NULL)
25680 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
25682 h = ggc_alloc<toc_hash_struct> ();
25683 h->key = x;
25684 h->key_mode = mode;
25685 h->labelno = labelno;
25687 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
25688 if (*found == NULL)
25689 *found = h;
25690 else /* This is indeed a duplicate.
25691 Set this label equal to that label. */
25693 fputs ("\t.set ", file);
25694 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25695 fprintf (file, "%d,", labelno);
25696 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25697 fprintf (file, "%d\n", ((*found)->labelno));
25699 #ifdef HAVE_AS_TLS
25700 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
25701 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
25702 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
25704 fputs ("\t.set ", file);
25705 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25706 fprintf (file, "%d,", labelno);
25707 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25708 fprintf (file, "%d\n", ((*found)->labelno));
25710 #endif
25711 return;
25715 /* If we're going to put a double constant in the TOC, make sure it's
25716 aligned properly when strict alignment is on. */
25717 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
25718 && STRICT_ALIGNMENT
25719 && GET_MODE_BITSIZE (mode) >= 64
25720 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
25721 ASM_OUTPUT_ALIGN (file, 3);
25724 (*targetm.asm_out.internal_label) (file, "LC", labelno);
25726 /* Handle FP constants specially. Note that if we have a minimal
25727 TOC, things we put here aren't actually in the TOC, so we can allow
25728 FP constants. */
25729 if (GET_CODE (x) == CONST_DOUBLE &&
25730 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
25732 REAL_VALUE_TYPE rv;
25733 long k[4];
25735 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25736 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25737 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
25738 else
25739 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
25741 if (TARGET_64BIT)
25743 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25744 fputs (DOUBLE_INT_ASM_OP, file);
25745 else
25746 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
25747 k[0] & 0xffffffff, k[1] & 0xffffffff,
25748 k[2] & 0xffffffff, k[3] & 0xffffffff);
25749 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
25750 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
25751 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
25752 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
25753 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
25754 return;
25756 else
25758 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25759 fputs ("\t.long ", file);
25760 else
25761 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
25762 k[0] & 0xffffffff, k[1] & 0xffffffff,
25763 k[2] & 0xffffffff, k[3] & 0xffffffff);
25764 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
25765 k[0] & 0xffffffff, k[1] & 0xffffffff,
25766 k[2] & 0xffffffff, k[3] & 0xffffffff);
25767 return;
25770 else if (GET_CODE (x) == CONST_DOUBLE &&
25771 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
25773 REAL_VALUE_TYPE rv;
25774 long k[2];
25776 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25778 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25779 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
25780 else
25781 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
25783 if (TARGET_64BIT)
25785 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25786 fputs (DOUBLE_INT_ASM_OP, file);
25787 else
25788 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
25789 k[0] & 0xffffffff, k[1] & 0xffffffff);
25790 fprintf (file, "0x%lx%08lx\n",
25791 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
25792 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
25793 return;
25795 else
25797 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25798 fputs ("\t.long ", file);
25799 else
25800 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
25801 k[0] & 0xffffffff, k[1] & 0xffffffff);
25802 fprintf (file, "0x%lx,0x%lx\n",
25803 k[0] & 0xffffffff, k[1] & 0xffffffff);
25804 return;
25807 else if (GET_CODE (x) == CONST_DOUBLE &&
25808 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
25810 REAL_VALUE_TYPE rv;
25811 long l;
25813 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25814 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25815 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
25816 else
25817 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
25819 if (TARGET_64BIT)
25821 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25822 fputs (DOUBLE_INT_ASM_OP, file);
25823 else
25824 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
25825 if (WORDS_BIG_ENDIAN)
25826 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
25827 else
25828 fprintf (file, "0x%lx\n", l & 0xffffffff);
25829 return;
25831 else
25833 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25834 fputs ("\t.long ", file);
25835 else
25836 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
25837 fprintf (file, "0x%lx\n", l & 0xffffffff);
25838 return;
25841 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
25843 unsigned HOST_WIDE_INT low;
25844 HOST_WIDE_INT high;
25846 low = INTVAL (x) & 0xffffffff;
25847 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
25849 /* TOC entries are always Pmode-sized, so when big-endian
25850 smaller integer constants in the TOC need to be padded.
25851 (This is still a win over putting the constants in
25852 a separate constant pool, because then we'd have
25853 to have both a TOC entry _and_ the actual constant.)
25855 For a 32-bit target, CONST_INT values are loaded and shifted
25856 entirely within `low' and can be stored in one TOC entry. */
25858 /* It would be easy to make this work, but it doesn't now. */
25859 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
25861 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
25863 low |= high << 32;
25864 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
25865 high = (HOST_WIDE_INT) low >> 32;
25866 low &= 0xffffffff;
25869 if (TARGET_64BIT)
25871 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25872 fputs (DOUBLE_INT_ASM_OP, file);
25873 else
25874 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
25875 (long) high & 0xffffffff, (long) low & 0xffffffff);
25876 fprintf (file, "0x%lx%08lx\n",
25877 (long) high & 0xffffffff, (long) low & 0xffffffff);
25878 return;
25880 else
25882 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
25884 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25885 fputs ("\t.long ", file);
25886 else
25887 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
25888 (long) high & 0xffffffff, (long) low & 0xffffffff);
25889 fprintf (file, "0x%lx,0x%lx\n",
25890 (long) high & 0xffffffff, (long) low & 0xffffffff);
25892 else
25894 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25895 fputs ("\t.long ", file);
25896 else
25897 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
25898 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
25900 return;
25904 if (GET_CODE (x) == CONST)
25906 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
25907 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
25909 base = XEXP (XEXP (x, 0), 0);
25910 offset = INTVAL (XEXP (XEXP (x, 0), 1));
25913 switch (GET_CODE (base))
25915 case SYMBOL_REF:
25916 name = XSTR (base, 0);
25917 break;
25919 case LABEL_REF:
25920 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
25921 CODE_LABEL_NUMBER (XEXP (base, 0)));
25922 break;
25924 case CODE_LABEL:
25925 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
25926 break;
25928 default:
25929 gcc_unreachable ();
25932 if (TARGET_ELF || TARGET_MINIMAL_TOC)
25933 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
25934 else
25936 fputs ("\t.tc ", file);
25937 RS6000_OUTPUT_BASENAME (file, name);
25939 if (offset < 0)
25940 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
25941 else if (offset)
25942 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
25944 /* Mark large TOC symbols on AIX with [TE] so they are mapped
25945 after other TOC symbols, reducing overflow of small TOC access
25946 to [TC] symbols. */
25947 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
25948 ? "[TE]," : "[TC],", file);
25951 /* Currently C++ toc references to vtables can be emitted before it
25952 is decided whether the vtable is public or private. If this is
25953 the case, then the linker will eventually complain that there is
25954 a TOC reference to an unknown section. Thus, for vtables only,
25955 we emit the TOC reference to reference the symbol and not the
25956 section. */
25957 if (VTABLE_NAME_P (name))
25959 RS6000_OUTPUT_BASENAME (file, name);
25960 if (offset < 0)
25961 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
25962 else if (offset > 0)
25963 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
25965 else
25966 output_addr_const (file, x);
25968 #if HAVE_AS_TLS
25969 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
25970 && SYMBOL_REF_TLS_MODEL (base) != 0)
25972 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
25973 fputs ("@le", file);
25974 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
25975 fputs ("@ie", file);
25976 /* Use global-dynamic for local-dynamic. */
25977 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
25978 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
25980 putc ('\n', file);
25981 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
25982 fputs ("\t.tc .", file);
25983 RS6000_OUTPUT_BASENAME (file, name);
25984 fputs ("[TC],", file);
25985 output_addr_const (file, x);
25986 fputs ("@m", file);
25989 #endif
25991 putc ('\n', file);
25994 /* Output an assembler pseudo-op to write an ASCII string of N characters
25995 starting at P to FILE.
25997 On the RS/6000, we have to do this using the .byte operation and
25998 write out special characters outside the quoted string.
25999 Also, the assembler is broken; very long strings are truncated,
26000 so we must artificially break them up early. */
26002 void
26003 output_ascii (FILE *file, const char *p, int n)
26005 char c;
26006 int i, count_string;
26007 const char *for_string = "\t.byte \"";
26008 const char *for_decimal = "\t.byte ";
26009 const char *to_close = NULL;
26011 count_string = 0;
26012 for (i = 0; i < n; i++)
26014 c = *p++;
26015 if (c >= ' ' && c < 0177)
26017 if (for_string)
26018 fputs (for_string, file);
26019 putc (c, file);
26021 /* Write two quotes to get one. */
26022 if (c == '"')
26024 putc (c, file);
26025 ++count_string;
26028 for_string = NULL;
26029 for_decimal = "\"\n\t.byte ";
26030 to_close = "\"\n";
26031 ++count_string;
26033 if (count_string >= 512)
26035 fputs (to_close, file);
26037 for_string = "\t.byte \"";
26038 for_decimal = "\t.byte ";
26039 to_close = NULL;
26040 count_string = 0;
26043 else
26045 if (for_decimal)
26046 fputs (for_decimal, file);
26047 fprintf (file, "%d", c);
26049 for_string = "\n\t.byte \"";
26050 for_decimal = ", ";
26051 to_close = "\n";
26052 count_string = 0;
26056 /* Now close the string if we have written one. Then end the line. */
26057 if (to_close)
26058 fputs (to_close, file);
26061 /* Generate a unique section name for FILENAME for a section type
26062 represented by SECTION_DESC. Output goes into BUF.
26064 SECTION_DESC can be any string, as long as it is different for each
26065 possible section type.
26067 We name the section in the same manner as xlc. The name begins with an
26068 underscore followed by the filename (after stripping any leading directory
26069 names) with the last period replaced by the string SECTION_DESC. If
26070 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26071 the name. */
26073 void
26074 rs6000_gen_section_name (char **buf, const char *filename,
26075 const char *section_desc)
26077 const char *q, *after_last_slash, *last_period = 0;
26078 char *p;
26079 int len;
26081 after_last_slash = filename;
26082 for (q = filename; *q; q++)
26084 if (*q == '/')
26085 after_last_slash = q + 1;
26086 else if (*q == '.')
26087 last_period = q;
26090 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26091 *buf = (char *) xmalloc (len);
26093 p = *buf;
26094 *p++ = '_';
26096 for (q = after_last_slash; *q; q++)
26098 if (q == last_period)
26100 strcpy (p, section_desc);
26101 p += strlen (section_desc);
26102 break;
26105 else if (ISALNUM (*q))
26106 *p++ = *q;
26109 if (last_period == 0)
26110 strcpy (p, section_desc);
26111 else
26112 *p = '\0';
26115 /* Emit profile function. */
26117 void
26118 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26120 /* Non-standard profiling for kernels, which just saves LR then calls
26121 _mcount without worrying about arg saves. The idea is to change
26122 the function prologue as little as possible as it isn't easy to
26123 account for arg save/restore code added just for _mcount. */
26124 if (TARGET_PROFILE_KERNEL)
26125 return;
26127 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26129 #ifndef NO_PROFILE_COUNTERS
26130 # define NO_PROFILE_COUNTERS 0
26131 #endif
26132 if (NO_PROFILE_COUNTERS)
26133 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26134 LCT_NORMAL, VOIDmode, 0);
26135 else
26137 char buf[30];
26138 const char *label_name;
26139 rtx fun;
26141 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26142 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26143 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26145 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26146 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26149 else if (DEFAULT_ABI == ABI_DARWIN)
26151 const char *mcount_name = RS6000_MCOUNT;
26152 int caller_addr_regno = LR_REGNO;
26154 /* Be conservative and always set this, at least for now. */
26155 crtl->uses_pic_offset_table = 1;
26157 #if TARGET_MACHO
26158 /* For PIC code, set up a stub and collect the caller's address
26159 from r0, which is where the prologue puts it. */
26160 if (MACHOPIC_INDIRECT
26161 && crtl->uses_pic_offset_table)
26162 caller_addr_regno = 0;
26163 #endif
26164 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26165 LCT_NORMAL, VOIDmode, 1,
26166 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26170 /* Write function profiler code. */
26172 void
26173 output_function_profiler (FILE *file, int labelno)
26175 char buf[100];
26177 switch (DEFAULT_ABI)
26179 default:
26180 gcc_unreachable ();
26182 case ABI_V4:
26183 if (!TARGET_32BIT)
26185 warning (0, "no profiling of 64-bit code for this ABI");
26186 return;
26188 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26189 fprintf (file, "\tmflr %s\n", reg_names[0]);
26190 if (NO_PROFILE_COUNTERS)
26192 asm_fprintf (file, "\tstw %s,4(%s)\n",
26193 reg_names[0], reg_names[1]);
26195 else if (TARGET_SECURE_PLT && flag_pic)
26197 if (TARGET_LINK_STACK)
26199 char name[32];
26200 get_ppc476_thunk_name (name);
26201 asm_fprintf (file, "\tbl %s\n", name);
26203 else
26204 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26205 asm_fprintf (file, "\tstw %s,4(%s)\n",
26206 reg_names[0], reg_names[1]);
26207 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26208 asm_fprintf (file, "\taddis %s,%s,",
26209 reg_names[12], reg_names[12]);
26210 assemble_name (file, buf);
26211 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26212 assemble_name (file, buf);
26213 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26215 else if (flag_pic == 1)
26217 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26218 asm_fprintf (file, "\tstw %s,4(%s)\n",
26219 reg_names[0], reg_names[1]);
26220 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26221 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26222 assemble_name (file, buf);
26223 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26225 else if (flag_pic > 1)
26227 asm_fprintf (file, "\tstw %s,4(%s)\n",
26228 reg_names[0], reg_names[1]);
26229 /* Now, we need to get the address of the label. */
26230 if (TARGET_LINK_STACK)
26232 char name[32];
26233 get_ppc476_thunk_name (name);
26234 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26235 assemble_name (file, buf);
26236 fputs ("-.\n1:", file);
26237 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26238 asm_fprintf (file, "\taddi %s,%s,4\n",
26239 reg_names[11], reg_names[11]);
26241 else
26243 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26244 assemble_name (file, buf);
26245 fputs ("-.\n1:", file);
26246 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26248 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26249 reg_names[0], reg_names[11]);
26250 asm_fprintf (file, "\tadd %s,%s,%s\n",
26251 reg_names[0], reg_names[0], reg_names[11]);
26253 else
26255 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26256 assemble_name (file, buf);
26257 fputs ("@ha\n", file);
26258 asm_fprintf (file, "\tstw %s,4(%s)\n",
26259 reg_names[0], reg_names[1]);
26260 asm_fprintf (file, "\tla %s,", reg_names[0]);
26261 assemble_name (file, buf);
26262 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26265 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26266 fprintf (file, "\tbl %s%s\n",
26267 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26268 break;
26270 case ABI_AIX:
26271 case ABI_ELFv2:
26272 case ABI_DARWIN:
26273 /* Don't do anything, done in output_profile_hook (). */
26274 break;
26280 /* The following variable value is the last issued insn. */
26282 static rtx last_scheduled_insn;
26284 /* The following variable helps to balance issuing of load and
26285 store instructions */
26287 static int load_store_pendulum;
26289 /* Power4 load update and store update instructions are cracked into a
26290 load or store and an integer insn which are executed in the same cycle.
26291 Branches have their own dispatch slot which does not count against the
26292 GCC issue rate, but it changes the program flow so there are no other
26293 instructions to issue in this cycle. */
26295 static int
26296 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26298 last_scheduled_insn = insn;
26299 if (GET_CODE (PATTERN (insn)) == USE
26300 || GET_CODE (PATTERN (insn)) == CLOBBER)
26302 cached_can_issue_more = more;
26303 return cached_can_issue_more;
26306 if (insn_terminates_group_p (insn, current_group))
26308 cached_can_issue_more = 0;
26309 return cached_can_issue_more;
26312 /* If no reservation, but reach here */
26313 if (recog_memoized (insn) < 0)
26314 return more;
26316 if (rs6000_sched_groups)
26318 if (is_microcoded_insn (insn))
26319 cached_can_issue_more = 0;
26320 else if (is_cracked_insn (insn))
26321 cached_can_issue_more = more > 2 ? more - 2 : 0;
26322 else
26323 cached_can_issue_more = more - 1;
26325 return cached_can_issue_more;
26328 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26329 return 0;
26331 cached_can_issue_more = more - 1;
26332 return cached_can_issue_more;
26335 static int
26336 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26338 int r = rs6000_variable_issue_1 (insn, more);
26339 if (verbose)
26340 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26341 return r;
26344 /* Adjust the cost of a scheduling dependency. Return the new cost of
26345 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26347 static int
26348 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26350 enum attr_type attr_type;
26352 if (! recog_memoized (insn))
26353 return 0;
26355 switch (REG_NOTE_KIND (link))
26357 case REG_DEP_TRUE:
26359 /* Data dependency; DEP_INSN writes a register that INSN reads
26360 some cycles later. */
26362 /* Separate a load from a narrower, dependent store. */
26363 if (rs6000_sched_groups
26364 && GET_CODE (PATTERN (insn)) == SET
26365 && GET_CODE (PATTERN (dep_insn)) == SET
26366 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26367 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26368 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26369 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26370 return cost + 14;
26372 attr_type = get_attr_type (insn);
26374 switch (attr_type)
26376 case TYPE_JMPREG:
26377 /* Tell the first scheduling pass about the latency between
26378 a mtctr and bctr (and mtlr and br/blr). The first
26379 scheduling pass will not know about this latency since
26380 the mtctr instruction, which has the latency associated
26381 to it, will be generated by reload. */
26382 return 4;
26383 case TYPE_BRANCH:
26384 /* Leave some extra cycles between a compare and its
26385 dependent branch, to inhibit expensive mispredicts. */
26386 if ((rs6000_cpu_attr == CPU_PPC603
26387 || rs6000_cpu_attr == CPU_PPC604
26388 || rs6000_cpu_attr == CPU_PPC604E
26389 || rs6000_cpu_attr == CPU_PPC620
26390 || rs6000_cpu_attr == CPU_PPC630
26391 || rs6000_cpu_attr == CPU_PPC750
26392 || rs6000_cpu_attr == CPU_PPC7400
26393 || rs6000_cpu_attr == CPU_PPC7450
26394 || rs6000_cpu_attr == CPU_PPCE5500
26395 || rs6000_cpu_attr == CPU_PPCE6500
26396 || rs6000_cpu_attr == CPU_POWER4
26397 || rs6000_cpu_attr == CPU_POWER5
26398 || rs6000_cpu_attr == CPU_POWER7
26399 || rs6000_cpu_attr == CPU_POWER8
26400 || rs6000_cpu_attr == CPU_CELL)
26401 && recog_memoized (dep_insn)
26402 && (INSN_CODE (dep_insn) >= 0))
26404 switch (get_attr_type (dep_insn))
26406 case TYPE_CMP:
26407 case TYPE_COMPARE:
26408 case TYPE_FPCOMPARE:
26409 case TYPE_CR_LOGICAL:
26410 case TYPE_DELAYED_CR:
26411 return cost + 2;
26412 case TYPE_EXTS:
26413 case TYPE_MUL:
26414 if (get_attr_dot (dep_insn) == DOT_YES)
26415 return cost + 2;
26416 else
26417 break;
26418 case TYPE_SHIFT:
26419 if (get_attr_dot (dep_insn) == DOT_YES
26420 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26421 return cost + 2;
26422 else
26423 break;
26424 default:
26425 break;
26427 break;
26429 case TYPE_STORE:
26430 case TYPE_FPSTORE:
26431 if ((rs6000_cpu == PROCESSOR_POWER6)
26432 && recog_memoized (dep_insn)
26433 && (INSN_CODE (dep_insn) >= 0))
26436 if (GET_CODE (PATTERN (insn)) != SET)
26437 /* If this happens, we have to extend this to schedule
26438 optimally. Return default for now. */
26439 return cost;
26441 /* Adjust the cost for the case where the value written
26442 by a fixed point operation is used as the address
26443 gen value on a store. */
26444 switch (get_attr_type (dep_insn))
26446 case TYPE_LOAD:
26447 case TYPE_CNTLZ:
26449 if (! store_data_bypass_p (dep_insn, insn))
26450 return get_attr_sign_extend (dep_insn)
26451 == SIGN_EXTEND_YES ? 6 : 4;
26452 break;
26454 case TYPE_SHIFT:
26456 if (! store_data_bypass_p (dep_insn, insn))
26457 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26458 6 : 3;
26459 break;
26461 case TYPE_INTEGER:
26462 case TYPE_ADD:
26463 case TYPE_LOGICAL:
26464 case TYPE_COMPARE:
26465 case TYPE_EXTS:
26466 case TYPE_INSERT:
26468 if (! store_data_bypass_p (dep_insn, insn))
26469 return 3;
26470 break;
26472 case TYPE_STORE:
26473 case TYPE_FPLOAD:
26474 case TYPE_FPSTORE:
26476 if (get_attr_update (dep_insn) == UPDATE_YES
26477 && ! store_data_bypass_p (dep_insn, insn))
26478 return 3;
26479 break;
26481 case TYPE_MUL:
26483 if (! store_data_bypass_p (dep_insn, insn))
26484 return 17;
26485 break;
26487 case TYPE_DIV:
26489 if (! store_data_bypass_p (dep_insn, insn))
26490 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26491 break;
26493 default:
26494 break;
26497 break;
26499 case TYPE_LOAD:
26500 if ((rs6000_cpu == PROCESSOR_POWER6)
26501 && recog_memoized (dep_insn)
26502 && (INSN_CODE (dep_insn) >= 0))
26505 /* Adjust the cost for the case where the value written
26506 by a fixed point instruction is used within the address
26507 gen portion of a subsequent load(u)(x) */
26508 switch (get_attr_type (dep_insn))
26510 case TYPE_LOAD:
26511 case TYPE_CNTLZ:
26513 if (set_to_load_agen (dep_insn, insn))
26514 return get_attr_sign_extend (dep_insn)
26515 == SIGN_EXTEND_YES ? 6 : 4;
26516 break;
26518 case TYPE_SHIFT:
26520 if (set_to_load_agen (dep_insn, insn))
26521 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26522 6 : 3;
26523 break;
26525 case TYPE_INTEGER:
26526 case TYPE_ADD:
26527 case TYPE_LOGICAL:
26528 case TYPE_COMPARE:
26529 case TYPE_EXTS:
26530 case TYPE_INSERT:
26532 if (set_to_load_agen (dep_insn, insn))
26533 return 3;
26534 break;
26536 case TYPE_STORE:
26537 case TYPE_FPLOAD:
26538 case TYPE_FPSTORE:
26540 if (get_attr_update (dep_insn) == UPDATE_YES
26541 && set_to_load_agen (dep_insn, insn))
26542 return 3;
26543 break;
26545 case TYPE_MUL:
26547 if (set_to_load_agen (dep_insn, insn))
26548 return 17;
26549 break;
26551 case TYPE_DIV:
26553 if (set_to_load_agen (dep_insn, insn))
26554 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26555 break;
26557 default:
26558 break;
26561 break;
26563 case TYPE_FPLOAD:
26564 if ((rs6000_cpu == PROCESSOR_POWER6)
26565 && get_attr_update (insn) == UPDATE_NO
26566 && recog_memoized (dep_insn)
26567 && (INSN_CODE (dep_insn) >= 0)
26568 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26569 return 2;
26571 default:
26572 break;
26575 /* Fall out to return default cost. */
26577 break;
26579 case REG_DEP_OUTPUT:
26580 /* Output dependency; DEP_INSN writes a register that INSN writes some
26581 cycles later. */
26582 if ((rs6000_cpu == PROCESSOR_POWER6)
26583 && recog_memoized (dep_insn)
26584 && (INSN_CODE (dep_insn) >= 0))
26586 attr_type = get_attr_type (insn);
26588 switch (attr_type)
26590 case TYPE_FP:
26591 if (get_attr_type (dep_insn) == TYPE_FP)
26592 return 1;
26593 break;
26594 case TYPE_FPLOAD:
26595 if (get_attr_update (insn) == UPDATE_NO
26596 && get_attr_type (dep_insn) == TYPE_MFFGPR)
26597 return 2;
26598 break;
26599 default:
26600 break;
26603 case REG_DEP_ANTI:
26604 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26605 cycles later. */
26606 return 0;
26608 default:
26609 gcc_unreachable ();
26612 return cost;
26615 /* Debug version of rs6000_adjust_cost. */
26617 static int
26618 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
26619 int cost)
26621 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
26623 if (ret != cost)
26625 const char *dep;
26627 switch (REG_NOTE_KIND (link))
26629 default: dep = "unknown depencency"; break;
26630 case REG_DEP_TRUE: dep = "data dependency"; break;
26631 case REG_DEP_OUTPUT: dep = "output dependency"; break;
26632 case REG_DEP_ANTI: dep = "anti depencency"; break;
26635 fprintf (stderr,
26636 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
26637 "%s, insn:\n", ret, cost, dep);
26639 debug_rtx (insn);
26642 return ret;
26645 /* The function returns a true if INSN is microcoded.
26646 Return false otherwise. */
26648 static bool
26649 is_microcoded_insn (rtx_insn *insn)
26651 if (!insn || !NONDEBUG_INSN_P (insn)
26652 || GET_CODE (PATTERN (insn)) == USE
26653 || GET_CODE (PATTERN (insn)) == CLOBBER)
26654 return false;
26656 if (rs6000_cpu_attr == CPU_CELL)
26657 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
26659 if (rs6000_sched_groups
26660 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26662 enum attr_type type = get_attr_type (insn);
26663 if ((type == TYPE_LOAD
26664 && get_attr_update (insn) == UPDATE_YES
26665 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26666 || ((type == TYPE_LOAD || type == TYPE_STORE)
26667 && get_attr_update (insn) == UPDATE_YES
26668 && get_attr_indexed (insn) == INDEXED_YES)
26669 || type == TYPE_MFCR)
26670 return true;
26673 return false;
26676 /* The function returns true if INSN is cracked into 2 instructions
26677 by the processor (and therefore occupies 2 issue slots). */
26679 static bool
26680 is_cracked_insn (rtx_insn *insn)
26682 if (!insn || !NONDEBUG_INSN_P (insn)
26683 || GET_CODE (PATTERN (insn)) == USE
26684 || GET_CODE (PATTERN (insn)) == CLOBBER)
26685 return false;
26687 if (rs6000_sched_groups
26688 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26690 enum attr_type type = get_attr_type (insn);
26691 if ((type == TYPE_LOAD
26692 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26693 && get_attr_update (insn) == UPDATE_NO)
26694 || (type == TYPE_LOAD
26695 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
26696 && get_attr_update (insn) == UPDATE_YES
26697 && get_attr_indexed (insn) == INDEXED_NO)
26698 || (type == TYPE_STORE
26699 && get_attr_update (insn) == UPDATE_YES
26700 && get_attr_indexed (insn) == INDEXED_NO)
26701 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
26702 && get_attr_update (insn) == UPDATE_YES)
26703 || type == TYPE_DELAYED_CR
26704 || type == TYPE_COMPARE
26705 || (type == TYPE_EXTS
26706 && get_attr_dot (insn) == DOT_YES)
26707 || (type == TYPE_SHIFT
26708 && get_attr_dot (insn) == DOT_YES
26709 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
26710 || (type == TYPE_MUL
26711 && get_attr_dot (insn) == DOT_YES)
26712 || type == TYPE_DIV
26713 || (type == TYPE_INSERT
26714 && get_attr_size (insn) == SIZE_32))
26715 return true;
26718 return false;
26721 /* The function returns true if INSN can be issued only from
26722 the branch slot. */
26724 static bool
26725 is_branch_slot_insn (rtx_insn *insn)
26727 if (!insn || !NONDEBUG_INSN_P (insn)
26728 || GET_CODE (PATTERN (insn)) == USE
26729 || GET_CODE (PATTERN (insn)) == CLOBBER)
26730 return false;
26732 if (rs6000_sched_groups)
26734 enum attr_type type = get_attr_type (insn);
26735 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
26736 return true;
26737 return false;
26740 return false;
26743 /* The function returns true if out_inst sets a value that is
26744 used in the address generation computation of in_insn */
26745 static bool
26746 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
26748 rtx out_set, in_set;
26750 /* For performance reasons, only handle the simple case where
26751 both loads are a single_set. */
26752 out_set = single_set (out_insn);
26753 if (out_set)
26755 in_set = single_set (in_insn);
26756 if (in_set)
26757 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
26760 return false;
26763 /* Try to determine base/offset/size parts of the given MEM.
26764 Return true if successful, false if all the values couldn't
26765 be determined.
26767 This function only looks for REG or REG+CONST address forms.
26768 REG+REG address form will return false. */
26770 static bool
26771 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
26772 HOST_WIDE_INT *size)
26774 rtx addr_rtx;
26775 if MEM_SIZE_KNOWN_P (mem)
26776 *size = MEM_SIZE (mem);
26777 else
26778 return false;
26780 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
26781 addr_rtx = XEXP (XEXP (mem, 0), 1);
26782 else
26783 addr_rtx = (XEXP (mem, 0));
26785 if (GET_CODE (addr_rtx) == REG)
26787 *base = addr_rtx;
26788 *offset = 0;
26790 else if (GET_CODE (addr_rtx) == PLUS
26791 && CONST_INT_P (XEXP (addr_rtx, 1)))
26793 *base = XEXP (addr_rtx, 0);
26794 *offset = INTVAL (XEXP (addr_rtx, 1));
26796 else
26797 return false;
26799 return true;
26802 /* The function returns true if the target storage location of
26803 mem1 is adjacent to the target storage location of mem2 */
26804 /* Return 1 if memory locations are adjacent. */
26806 static bool
26807 adjacent_mem_locations (rtx mem1, rtx mem2)
26809 rtx reg1, reg2;
26810 HOST_WIDE_INT off1, size1, off2, size2;
26812 if (get_memref_parts (mem1, &reg1, &off1, &size1)
26813 && get_memref_parts (mem2, &reg2, &off2, &size2))
26814 return ((REGNO (reg1) == REGNO (reg2))
26815 && ((off1 + size1 == off2)
26816 || (off2 + size2 == off1)));
26818 return false;
26821 /* This function returns true if it can be determined that the two MEM
26822 locations overlap by at least 1 byte based on base reg/offset/size. */
26824 static bool
26825 mem_locations_overlap (rtx mem1, rtx mem2)
26827 rtx reg1, reg2;
26828 HOST_WIDE_INT off1, size1, off2, size2;
26830 if (get_memref_parts (mem1, &reg1, &off1, &size1)
26831 && get_memref_parts (mem2, &reg2, &off2, &size2))
26832 return ((REGNO (reg1) == REGNO (reg2))
26833 && (((off1 <= off2) && (off1 + size1 > off2))
26834 || ((off2 <= off1) && (off2 + size2 > off1))));
26836 return false;
26839 /* A C statement (sans semicolon) to update the integer scheduling
26840 priority INSN_PRIORITY (INSN). Increase the priority to execute the
26841 INSN earlier, reduce the priority to execute INSN later. Do not
26842 define this macro if you do not need to adjust the scheduling
26843 priorities of insns. */
26845 static int
26846 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
26848 rtx load_mem, str_mem;
26849 /* On machines (like the 750) which have asymmetric integer units,
26850 where one integer unit can do multiply and divides and the other
26851 can't, reduce the priority of multiply/divide so it is scheduled
26852 before other integer operations. */
26854 #if 0
26855 if (! INSN_P (insn))
26856 return priority;
26858 if (GET_CODE (PATTERN (insn)) == USE)
26859 return priority;
26861 switch (rs6000_cpu_attr) {
26862 case CPU_PPC750:
26863 switch (get_attr_type (insn))
26865 default:
26866 break;
26868 case TYPE_MUL:
26869 case TYPE_DIV:
26870 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
26871 priority, priority);
26872 if (priority >= 0 && priority < 0x01000000)
26873 priority >>= 3;
26874 break;
26877 #endif
26879 if (insn_must_be_first_in_group (insn)
26880 && reload_completed
26881 && current_sched_info->sched_max_insns_priority
26882 && rs6000_sched_restricted_insns_priority)
26885 /* Prioritize insns that can be dispatched only in the first
26886 dispatch slot. */
26887 if (rs6000_sched_restricted_insns_priority == 1)
26888 /* Attach highest priority to insn. This means that in
26889 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
26890 precede 'priority' (critical path) considerations. */
26891 return current_sched_info->sched_max_insns_priority;
26892 else if (rs6000_sched_restricted_insns_priority == 2)
26893 /* Increase priority of insn by a minimal amount. This means that in
26894 haifa-sched.c:ready_sort(), only 'priority' (critical path)
26895 considerations precede dispatch-slot restriction considerations. */
26896 return (priority + 1);
26899 if (rs6000_cpu == PROCESSOR_POWER6
26900 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
26901 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
26902 /* Attach highest priority to insn if the scheduler has just issued two
26903 stores and this instruction is a load, or two loads and this instruction
26904 is a store. Power6 wants loads and stores scheduled alternately
26905 when possible */
26906 return current_sched_info->sched_max_insns_priority;
26908 return priority;
26911 /* Return true if the instruction is nonpipelined on the Cell. */
26912 static bool
26913 is_nonpipeline_insn (rtx_insn *insn)
26915 enum attr_type type;
26916 if (!insn || !NONDEBUG_INSN_P (insn)
26917 || GET_CODE (PATTERN (insn)) == USE
26918 || GET_CODE (PATTERN (insn)) == CLOBBER)
26919 return false;
26921 type = get_attr_type (insn);
26922 if (type == TYPE_MUL
26923 || type == TYPE_DIV
26924 || type == TYPE_SDIV
26925 || type == TYPE_DDIV
26926 || type == TYPE_SSQRT
26927 || type == TYPE_DSQRT
26928 || type == TYPE_MFCR
26929 || type == TYPE_MFCRF
26930 || type == TYPE_MFJMPR)
26932 return true;
26934 return false;
26938 /* Return how many instructions the machine can issue per cycle. */
26940 static int
26941 rs6000_issue_rate (void)
26943 /* Unless scheduling for register pressure, use issue rate of 1 for
26944 first scheduling pass to decrease degradation. */
26945 if (!reload_completed && !flag_sched_pressure)
26946 return 1;
26948 switch (rs6000_cpu_attr) {
26949 case CPU_RS64A:
26950 case CPU_PPC601: /* ? */
26951 case CPU_PPC7450:
26952 return 3;
26953 case CPU_PPC440:
26954 case CPU_PPC603:
26955 case CPU_PPC750:
26956 case CPU_PPC7400:
26957 case CPU_PPC8540:
26958 case CPU_PPC8548:
26959 case CPU_CELL:
26960 case CPU_PPCE300C2:
26961 case CPU_PPCE300C3:
26962 case CPU_PPCE500MC:
26963 case CPU_PPCE500MC64:
26964 case CPU_PPCE5500:
26965 case CPU_PPCE6500:
26966 case CPU_TITAN:
26967 return 2;
26968 case CPU_PPC476:
26969 case CPU_PPC604:
26970 case CPU_PPC604E:
26971 case CPU_PPC620:
26972 case CPU_PPC630:
26973 return 4;
26974 case CPU_POWER4:
26975 case CPU_POWER5:
26976 case CPU_POWER6:
26977 case CPU_POWER7:
26978 return 5;
26979 case CPU_POWER8:
26980 return 7;
26981 default:
26982 return 1;
26986 /* Return how many instructions to look ahead for better insn
26987 scheduling. */
26989 static int
26990 rs6000_use_sched_lookahead (void)
26992 switch (rs6000_cpu_attr)
26994 case CPU_PPC8540:
26995 case CPU_PPC8548:
26996 return 4;
26998 case CPU_CELL:
26999 return (reload_completed ? 8 : 0);
27001 default:
27002 return 0;
27006 /* We are choosing insn from the ready queue. Return zero if INSN can be
27007 chosen. */
27008 static int
27009 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27011 if (ready_index == 0)
27012 return 0;
27014 if (rs6000_cpu_attr != CPU_CELL)
27015 return 0;
27017 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27019 if (!reload_completed
27020 || is_nonpipeline_insn (insn)
27021 || is_microcoded_insn (insn))
27022 return 1;
27024 return 0;
27027 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27028 and return true. */
27030 static bool
27031 find_mem_ref (rtx pat, rtx *mem_ref)
27033 const char * fmt;
27034 int i, j;
27036 /* stack_tie does not produce any real memory traffic. */
27037 if (tie_operand (pat, VOIDmode))
27038 return false;
27040 if (GET_CODE (pat) == MEM)
27042 *mem_ref = pat;
27043 return true;
27046 /* Recursively process the pattern. */
27047 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27049 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27051 if (fmt[i] == 'e')
27053 if (find_mem_ref (XEXP (pat, i), mem_ref))
27054 return true;
27056 else if (fmt[i] == 'E')
27057 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27059 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27060 return true;
27064 return false;
27067 /* Determine if PAT is a PATTERN of a load insn. */
27069 static bool
27070 is_load_insn1 (rtx pat, rtx *load_mem)
27072 if (!pat || pat == NULL_RTX)
27073 return false;
27075 if (GET_CODE (pat) == SET)
27076 return find_mem_ref (SET_SRC (pat), load_mem);
27078 if (GET_CODE (pat) == PARALLEL)
27080 int i;
27082 for (i = 0; i < XVECLEN (pat, 0); i++)
27083 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27084 return true;
27087 return false;
27090 /* Determine if INSN loads from memory. */
27092 static bool
27093 is_load_insn (rtx insn, rtx *load_mem)
27095 if (!insn || !INSN_P (insn))
27096 return false;
27098 if (CALL_P (insn))
27099 return false;
27101 return is_load_insn1 (PATTERN (insn), load_mem);
27104 /* Determine if PAT is a PATTERN of a store insn. */
27106 static bool
27107 is_store_insn1 (rtx pat, rtx *str_mem)
27109 if (!pat || pat == NULL_RTX)
27110 return false;
27112 if (GET_CODE (pat) == SET)
27113 return find_mem_ref (SET_DEST (pat), str_mem);
27115 if (GET_CODE (pat) == PARALLEL)
27117 int i;
27119 for (i = 0; i < XVECLEN (pat, 0); i++)
27120 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27121 return true;
27124 return false;
27127 /* Determine if INSN stores to memory. */
27129 static bool
27130 is_store_insn (rtx insn, rtx *str_mem)
27132 if (!insn || !INSN_P (insn))
27133 return false;
27135 return is_store_insn1 (PATTERN (insn), str_mem);
27138 /* Returns whether the dependence between INSN and NEXT is considered
27139 costly by the given target. */
27141 static bool
27142 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27144 rtx insn;
27145 rtx next;
27146 rtx load_mem, str_mem;
27148 /* If the flag is not enabled - no dependence is considered costly;
27149 allow all dependent insns in the same group.
27150 This is the most aggressive option. */
27151 if (rs6000_sched_costly_dep == no_dep_costly)
27152 return false;
27154 /* If the flag is set to 1 - a dependence is always considered costly;
27155 do not allow dependent instructions in the same group.
27156 This is the most conservative option. */
27157 if (rs6000_sched_costly_dep == all_deps_costly)
27158 return true;
27160 insn = DEP_PRO (dep);
27161 next = DEP_CON (dep);
27163 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27164 && is_load_insn (next, &load_mem)
27165 && is_store_insn (insn, &str_mem))
27166 /* Prevent load after store in the same group. */
27167 return true;
27169 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27170 && is_load_insn (next, &load_mem)
27171 && is_store_insn (insn, &str_mem)
27172 && DEP_TYPE (dep) == REG_DEP_TRUE
27173 && mem_locations_overlap(str_mem, load_mem))
27174 /* Prevent load after store in the same group if it is a true
27175 dependence. */
27176 return true;
27178 /* The flag is set to X; dependences with latency >= X are considered costly,
27179 and will not be scheduled in the same group. */
27180 if (rs6000_sched_costly_dep <= max_dep_latency
27181 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27182 return true;
27184 return false;
27187 /* Return the next insn after INSN that is found before TAIL is reached,
27188 skipping any "non-active" insns - insns that will not actually occupy
27189 an issue slot. Return NULL_RTX if such an insn is not found. */
27191 static rtx_insn *
27192 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27194 if (insn == NULL_RTX || insn == tail)
27195 return NULL;
27197 while (1)
27199 insn = NEXT_INSN (insn);
27200 if (insn == NULL_RTX || insn == tail)
27201 return NULL;
27203 if (CALL_P (insn)
27204 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27205 || (NONJUMP_INSN_P (insn)
27206 && GET_CODE (PATTERN (insn)) != USE
27207 && GET_CODE (PATTERN (insn)) != CLOBBER
27208 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27209 break;
27211 return insn;
27214 /* We are about to begin issuing insns for this clock cycle. */
27216 static int
27217 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27218 rtx_insn **ready ATTRIBUTE_UNUSED,
27219 int *pn_ready ATTRIBUTE_UNUSED,
27220 int clock_var ATTRIBUTE_UNUSED)
27222 int n_ready = *pn_ready;
27224 if (sched_verbose)
27225 fprintf (dump, "// rs6000_sched_reorder :\n");
27227 /* Reorder the ready list, if the second to last ready insn
27228 is a nonepipeline insn. */
27229 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27231 if (is_nonpipeline_insn (ready[n_ready - 1])
27232 && (recog_memoized (ready[n_ready - 2]) > 0))
27233 /* Simply swap first two insns. */
27235 rtx_insn *tmp = ready[n_ready - 1];
27236 ready[n_ready - 1] = ready[n_ready - 2];
27237 ready[n_ready - 2] = tmp;
27241 if (rs6000_cpu == PROCESSOR_POWER6)
27242 load_store_pendulum = 0;
27244 return rs6000_issue_rate ();
27247 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27249 static int
27250 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27251 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27253 if (sched_verbose)
27254 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27256 /* For Power6, we need to handle some special cases to try and keep the
27257 store queue from overflowing and triggering expensive flushes.
27259 This code monitors how load and store instructions are being issued
27260 and skews the ready list one way or the other to increase the likelihood
27261 that a desired instruction is issued at the proper time.
27263 A couple of things are done. First, we maintain a "load_store_pendulum"
27264 to track the current state of load/store issue.
27266 - If the pendulum is at zero, then no loads or stores have been
27267 issued in the current cycle so we do nothing.
27269 - If the pendulum is 1, then a single load has been issued in this
27270 cycle and we attempt to locate another load in the ready list to
27271 issue with it.
27273 - If the pendulum is -2, then two stores have already been
27274 issued in this cycle, so we increase the priority of the first load
27275 in the ready list to increase it's likelihood of being chosen first
27276 in the next cycle.
27278 - If the pendulum is -1, then a single store has been issued in this
27279 cycle and we attempt to locate another store in the ready list to
27280 issue with it, preferring a store to an adjacent memory location to
27281 facilitate store pairing in the store queue.
27283 - If the pendulum is 2, then two loads have already been
27284 issued in this cycle, so we increase the priority of the first store
27285 in the ready list to increase it's likelihood of being chosen first
27286 in the next cycle.
27288 - If the pendulum < -2 or > 2, then do nothing.
27290 Note: This code covers the most common scenarios. There exist non
27291 load/store instructions which make use of the LSU and which
27292 would need to be accounted for to strictly model the behavior
27293 of the machine. Those instructions are currently unaccounted
27294 for to help minimize compile time overhead of this code.
27296 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27298 int pos;
27299 int i;
27300 rtx_insn *tmp;
27301 rtx load_mem, str_mem;
27303 if (is_store_insn (last_scheduled_insn, &str_mem))
27304 /* Issuing a store, swing the load_store_pendulum to the left */
27305 load_store_pendulum--;
27306 else if (is_load_insn (last_scheduled_insn, &load_mem))
27307 /* Issuing a load, swing the load_store_pendulum to the right */
27308 load_store_pendulum++;
27309 else
27310 return cached_can_issue_more;
27312 /* If the pendulum is balanced, or there is only one instruction on
27313 the ready list, then all is well, so return. */
27314 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27315 return cached_can_issue_more;
27317 if (load_store_pendulum == 1)
27319 /* A load has been issued in this cycle. Scan the ready list
27320 for another load to issue with it */
27321 pos = *pn_ready-1;
27323 while (pos >= 0)
27325 if (is_load_insn (ready[pos], &load_mem))
27327 /* Found a load. Move it to the head of the ready list,
27328 and adjust it's priority so that it is more likely to
27329 stay there */
27330 tmp = ready[pos];
27331 for (i=pos; i<*pn_ready-1; i++)
27332 ready[i] = ready[i + 1];
27333 ready[*pn_ready-1] = tmp;
27335 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27336 INSN_PRIORITY (tmp)++;
27337 break;
27339 pos--;
27342 else if (load_store_pendulum == -2)
27344 /* Two stores have been issued in this cycle. Increase the
27345 priority of the first load in the ready list to favor it for
27346 issuing in the next cycle. */
27347 pos = *pn_ready-1;
27349 while (pos >= 0)
27351 if (is_load_insn (ready[pos], &load_mem)
27352 && !sel_sched_p ()
27353 && INSN_PRIORITY_KNOWN (ready[pos]))
27355 INSN_PRIORITY (ready[pos])++;
27357 /* Adjust the pendulum to account for the fact that a load
27358 was found and increased in priority. This is to prevent
27359 increasing the priority of multiple loads */
27360 load_store_pendulum--;
27362 break;
27364 pos--;
27367 else if (load_store_pendulum == -1)
27369 /* A store has been issued in this cycle. Scan the ready list for
27370 another store to issue with it, preferring a store to an adjacent
27371 memory location */
27372 int first_store_pos = -1;
27374 pos = *pn_ready-1;
27376 while (pos >= 0)
27378 if (is_store_insn (ready[pos], &str_mem))
27380 rtx str_mem2;
27381 /* Maintain the index of the first store found on the
27382 list */
27383 if (first_store_pos == -1)
27384 first_store_pos = pos;
27386 if (is_store_insn (last_scheduled_insn, &str_mem2)
27387 && adjacent_mem_locations (str_mem, str_mem2))
27389 /* Found an adjacent store. Move it to the head of the
27390 ready list, and adjust it's priority so that it is
27391 more likely to stay there */
27392 tmp = ready[pos];
27393 for (i=pos; i<*pn_ready-1; i++)
27394 ready[i] = ready[i + 1];
27395 ready[*pn_ready-1] = tmp;
27397 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27398 INSN_PRIORITY (tmp)++;
27400 first_store_pos = -1;
27402 break;
27405 pos--;
27408 if (first_store_pos >= 0)
27410 /* An adjacent store wasn't found, but a non-adjacent store was,
27411 so move the non-adjacent store to the front of the ready
27412 list, and adjust its priority so that it is more likely to
27413 stay there. */
27414 tmp = ready[first_store_pos];
27415 for (i=first_store_pos; i<*pn_ready-1; i++)
27416 ready[i] = ready[i + 1];
27417 ready[*pn_ready-1] = tmp;
27418 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27419 INSN_PRIORITY (tmp)++;
27422 else if (load_store_pendulum == 2)
27424 /* Two loads have been issued in this cycle. Increase the priority
27425 of the first store in the ready list to favor it for issuing in
27426 the next cycle. */
27427 pos = *pn_ready-1;
27429 while (pos >= 0)
27431 if (is_store_insn (ready[pos], &str_mem)
27432 && !sel_sched_p ()
27433 && INSN_PRIORITY_KNOWN (ready[pos]))
27435 INSN_PRIORITY (ready[pos])++;
27437 /* Adjust the pendulum to account for the fact that a store
27438 was found and increased in priority. This is to prevent
27439 increasing the priority of multiple stores */
27440 load_store_pendulum++;
27442 break;
27444 pos--;
27449 return cached_can_issue_more;
27452 /* Return whether the presence of INSN causes a dispatch group termination
27453 of group WHICH_GROUP.
27455 If WHICH_GROUP == current_group, this function will return true if INSN
27456 causes the termination of the current group (i.e, the dispatch group to
27457 which INSN belongs). This means that INSN will be the last insn in the
27458 group it belongs to.
27460 If WHICH_GROUP == previous_group, this function will return true if INSN
27461 causes the termination of the previous group (i.e, the dispatch group that
27462 precedes the group to which INSN belongs). This means that INSN will be
27463 the first insn in the group it belongs to). */
27465 static bool
27466 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27468 bool first, last;
27470 if (! insn)
27471 return false;
27473 first = insn_must_be_first_in_group (insn);
27474 last = insn_must_be_last_in_group (insn);
27476 if (first && last)
27477 return true;
27479 if (which_group == current_group)
27480 return last;
27481 else if (which_group == previous_group)
27482 return first;
27484 return false;
27488 static bool
27489 insn_must_be_first_in_group (rtx_insn *insn)
27491 enum attr_type type;
27493 if (!insn
27494 || NOTE_P (insn)
27495 || DEBUG_INSN_P (insn)
27496 || GET_CODE (PATTERN (insn)) == USE
27497 || GET_CODE (PATTERN (insn)) == CLOBBER)
27498 return false;
27500 switch (rs6000_cpu)
27502 case PROCESSOR_POWER5:
27503 if (is_cracked_insn (insn))
27504 return true;
27505 case PROCESSOR_POWER4:
27506 if (is_microcoded_insn (insn))
27507 return true;
27509 if (!rs6000_sched_groups)
27510 return false;
27512 type = get_attr_type (insn);
27514 switch (type)
27516 case TYPE_MFCR:
27517 case TYPE_MFCRF:
27518 case TYPE_MTCR:
27519 case TYPE_DELAYED_CR:
27520 case TYPE_CR_LOGICAL:
27521 case TYPE_MTJMPR:
27522 case TYPE_MFJMPR:
27523 case TYPE_DIV:
27524 case TYPE_LOAD_L:
27525 case TYPE_STORE_C:
27526 case TYPE_ISYNC:
27527 case TYPE_SYNC:
27528 return true;
27529 default:
27530 break;
27532 break;
27533 case PROCESSOR_POWER6:
27534 type = get_attr_type (insn);
27536 switch (type)
27538 case TYPE_EXTS:
27539 case TYPE_CNTLZ:
27540 case TYPE_TRAP:
27541 case TYPE_MUL:
27542 case TYPE_INSERT:
27543 case TYPE_FPCOMPARE:
27544 case TYPE_MFCR:
27545 case TYPE_MTCR:
27546 case TYPE_MFJMPR:
27547 case TYPE_MTJMPR:
27548 case TYPE_ISYNC:
27549 case TYPE_SYNC:
27550 case TYPE_LOAD_L:
27551 case TYPE_STORE_C:
27552 return true;
27553 case TYPE_SHIFT:
27554 if (get_attr_dot (insn) == DOT_NO
27555 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27556 return true;
27557 else
27558 break;
27559 case TYPE_DIV:
27560 if (get_attr_size (insn) == SIZE_32)
27561 return true;
27562 else
27563 break;
27564 case TYPE_LOAD:
27565 case TYPE_STORE:
27566 case TYPE_FPLOAD:
27567 case TYPE_FPSTORE:
27568 if (get_attr_update (insn) == UPDATE_YES)
27569 return true;
27570 else
27571 break;
27572 default:
27573 break;
27575 break;
27576 case PROCESSOR_POWER7:
27577 type = get_attr_type (insn);
27579 switch (type)
27581 case TYPE_CR_LOGICAL:
27582 case TYPE_MFCR:
27583 case TYPE_MFCRF:
27584 case TYPE_MTCR:
27585 case TYPE_DIV:
27586 case TYPE_COMPARE:
27587 case TYPE_ISYNC:
27588 case TYPE_LOAD_L:
27589 case TYPE_STORE_C:
27590 case TYPE_MFJMPR:
27591 case TYPE_MTJMPR:
27592 return true;
27593 case TYPE_MUL:
27594 case TYPE_SHIFT:
27595 case TYPE_EXTS:
27596 if (get_attr_dot (insn) == DOT_YES)
27597 return true;
27598 else
27599 break;
27600 case TYPE_LOAD:
27601 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27602 || get_attr_update (insn) == UPDATE_YES)
27603 return true;
27604 else
27605 break;
27606 case TYPE_STORE:
27607 case TYPE_FPLOAD:
27608 case TYPE_FPSTORE:
27609 if (get_attr_update (insn) == UPDATE_YES)
27610 return true;
27611 else
27612 break;
27613 default:
27614 break;
27616 break;
27617 case PROCESSOR_POWER8:
27618 type = get_attr_type (insn);
27620 switch (type)
27622 case TYPE_CR_LOGICAL:
27623 case TYPE_DELAYED_CR:
27624 case TYPE_MFCR:
27625 case TYPE_MFCRF:
27626 case TYPE_MTCR:
27627 case TYPE_COMPARE:
27628 case TYPE_SYNC:
27629 case TYPE_ISYNC:
27630 case TYPE_LOAD_L:
27631 case TYPE_STORE_C:
27632 case TYPE_VECSTORE:
27633 case TYPE_MFJMPR:
27634 case TYPE_MTJMPR:
27635 return true;
27636 case TYPE_SHIFT:
27637 case TYPE_EXTS:
27638 case TYPE_MUL:
27639 if (get_attr_dot (insn) == DOT_YES)
27640 return true;
27641 else
27642 break;
27643 case TYPE_LOAD:
27644 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27645 || get_attr_update (insn) == UPDATE_YES)
27646 return true;
27647 else
27648 break;
27649 case TYPE_STORE:
27650 if (get_attr_update (insn) == UPDATE_YES
27651 && get_attr_indexed (insn) == INDEXED_YES)
27652 return true;
27653 else
27654 break;
27655 default:
27656 break;
27658 break;
27659 default:
27660 break;
27663 return false;
27666 static bool
27667 insn_must_be_last_in_group (rtx_insn *insn)
27669 enum attr_type type;
27671 if (!insn
27672 || NOTE_P (insn)
27673 || DEBUG_INSN_P (insn)
27674 || GET_CODE (PATTERN (insn)) == USE
27675 || GET_CODE (PATTERN (insn)) == CLOBBER)
27676 return false;
27678 switch (rs6000_cpu) {
27679 case PROCESSOR_POWER4:
27680 case PROCESSOR_POWER5:
27681 if (is_microcoded_insn (insn))
27682 return true;
27684 if (is_branch_slot_insn (insn))
27685 return true;
27687 break;
27688 case PROCESSOR_POWER6:
27689 type = get_attr_type (insn);
27691 switch (type)
27693 case TYPE_EXTS:
27694 case TYPE_CNTLZ:
27695 case TYPE_TRAP:
27696 case TYPE_MUL:
27697 case TYPE_FPCOMPARE:
27698 case TYPE_MFCR:
27699 case TYPE_MTCR:
27700 case TYPE_MFJMPR:
27701 case TYPE_MTJMPR:
27702 case TYPE_ISYNC:
27703 case TYPE_SYNC:
27704 case TYPE_LOAD_L:
27705 case TYPE_STORE_C:
27706 return true;
27707 case TYPE_SHIFT:
27708 if (get_attr_dot (insn) == DOT_NO
27709 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27710 return true;
27711 else
27712 break;
27713 case TYPE_DIV:
27714 if (get_attr_size (insn) == SIZE_32)
27715 return true;
27716 else
27717 break;
27718 default:
27719 break;
27721 break;
27722 case PROCESSOR_POWER7:
27723 type = get_attr_type (insn);
27725 switch (type)
27727 case TYPE_ISYNC:
27728 case TYPE_SYNC:
27729 case TYPE_LOAD_L:
27730 case TYPE_STORE_C:
27731 return true;
27732 case TYPE_LOAD:
27733 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27734 && get_attr_update (insn) == UPDATE_YES)
27735 return true;
27736 else
27737 break;
27738 case TYPE_STORE:
27739 if (get_attr_update (insn) == UPDATE_YES
27740 && get_attr_indexed (insn) == INDEXED_YES)
27741 return true;
27742 else
27743 break;
27744 default:
27745 break;
27747 break;
27748 case PROCESSOR_POWER8:
27749 type = get_attr_type (insn);
27751 switch (type)
27753 case TYPE_MFCR:
27754 case TYPE_MTCR:
27755 case TYPE_ISYNC:
27756 case TYPE_SYNC:
27757 case TYPE_LOAD_L:
27758 case TYPE_STORE_C:
27759 return true;
27760 case TYPE_LOAD:
27761 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27762 && get_attr_update (insn) == UPDATE_YES)
27763 return true;
27764 else
27765 break;
27766 case TYPE_STORE:
27767 if (get_attr_update (insn) == UPDATE_YES
27768 && get_attr_indexed (insn) == INDEXED_YES)
27769 return true;
27770 else
27771 break;
27772 default:
27773 break;
27775 break;
27776 default:
27777 break;
27780 return false;
27783 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
27784 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
27786 static bool
27787 is_costly_group (rtx *group_insns, rtx next_insn)
27789 int i;
27790 int issue_rate = rs6000_issue_rate ();
27792 for (i = 0; i < issue_rate; i++)
27794 sd_iterator_def sd_it;
27795 dep_t dep;
27796 rtx insn = group_insns[i];
27798 if (!insn)
27799 continue;
27801 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
27803 rtx next = DEP_CON (dep);
27805 if (next == next_insn
27806 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
27807 return true;
27811 return false;
27814 /* Utility of the function redefine_groups.
27815 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
27816 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
27817 to keep it "far" (in a separate group) from GROUP_INSNS, following
27818 one of the following schemes, depending on the value of the flag
27819 -minsert_sched_nops = X:
27820 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
27821 in order to force NEXT_INSN into a separate group.
27822 (2) X < sched_finish_regroup_exact: insert exactly X nops.
27823 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
27824 insertion (has a group just ended, how many vacant issue slots remain in the
27825 last group, and how many dispatch groups were encountered so far). */
27827 static int
27828 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
27829 rtx_insn *next_insn, bool *group_end, int can_issue_more,
27830 int *group_count)
27832 rtx nop;
27833 bool force;
27834 int issue_rate = rs6000_issue_rate ();
27835 bool end = *group_end;
27836 int i;
27838 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
27839 return can_issue_more;
27841 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
27842 return can_issue_more;
27844 force = is_costly_group (group_insns, next_insn);
27845 if (!force)
27846 return can_issue_more;
27848 if (sched_verbose > 6)
27849 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
27850 *group_count ,can_issue_more);
27852 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
27854 if (*group_end)
27855 can_issue_more = 0;
27857 /* Since only a branch can be issued in the last issue_slot, it is
27858 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
27859 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
27860 in this case the last nop will start a new group and the branch
27861 will be forced to the new group. */
27862 if (can_issue_more && !is_branch_slot_insn (next_insn))
27863 can_issue_more--;
27865 /* Do we have a special group ending nop? */
27866 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
27867 || rs6000_cpu_attr == CPU_POWER8)
27869 nop = gen_group_ending_nop ();
27870 emit_insn_before (nop, next_insn);
27871 can_issue_more = 0;
27873 else
27874 while (can_issue_more > 0)
27876 nop = gen_nop ();
27877 emit_insn_before (nop, next_insn);
27878 can_issue_more--;
27881 *group_end = true;
27882 return 0;
27885 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
27887 int n_nops = rs6000_sched_insert_nops;
27889 /* Nops can't be issued from the branch slot, so the effective
27890 issue_rate for nops is 'issue_rate - 1'. */
27891 if (can_issue_more == 0)
27892 can_issue_more = issue_rate;
27893 can_issue_more--;
27894 if (can_issue_more == 0)
27896 can_issue_more = issue_rate - 1;
27897 (*group_count)++;
27898 end = true;
27899 for (i = 0; i < issue_rate; i++)
27901 group_insns[i] = 0;
27905 while (n_nops > 0)
27907 nop = gen_nop ();
27908 emit_insn_before (nop, next_insn);
27909 if (can_issue_more == issue_rate - 1) /* new group begins */
27910 end = false;
27911 can_issue_more--;
27912 if (can_issue_more == 0)
27914 can_issue_more = issue_rate - 1;
27915 (*group_count)++;
27916 end = true;
27917 for (i = 0; i < issue_rate; i++)
27919 group_insns[i] = 0;
27922 n_nops--;
27925 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
27926 can_issue_more++;
27928 /* Is next_insn going to start a new group? */
27929 *group_end
27930 = (end
27931 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
27932 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
27933 || (can_issue_more < issue_rate &&
27934 insn_terminates_group_p (next_insn, previous_group)));
27935 if (*group_end && end)
27936 (*group_count)--;
27938 if (sched_verbose > 6)
27939 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
27940 *group_count, can_issue_more);
27941 return can_issue_more;
27944 return can_issue_more;
27947 /* This function tries to synch the dispatch groups that the compiler "sees"
27948 with the dispatch groups that the processor dispatcher is expected to
27949 form in practice. It tries to achieve this synchronization by forcing the
27950 estimated processor grouping on the compiler (as opposed to the function
27951 'pad_goups' which tries to force the scheduler's grouping on the processor).
27953 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
27954 examines the (estimated) dispatch groups that will be formed by the processor
27955 dispatcher. It marks these group boundaries to reflect the estimated
27956 processor grouping, overriding the grouping that the scheduler had marked.
27957 Depending on the value of the flag '-minsert-sched-nops' this function can
27958 force certain insns into separate groups or force a certain distance between
27959 them by inserting nops, for example, if there exists a "costly dependence"
27960 between the insns.
27962 The function estimates the group boundaries that the processor will form as
27963 follows: It keeps track of how many vacant issue slots are available after
27964 each insn. A subsequent insn will start a new group if one of the following
27965 4 cases applies:
27966 - no more vacant issue slots remain in the current dispatch group.
27967 - only the last issue slot, which is the branch slot, is vacant, but the next
27968 insn is not a branch.
27969 - only the last 2 or less issue slots, including the branch slot, are vacant,
27970 which means that a cracked insn (which occupies two issue slots) can't be
27971 issued in this group.
27972 - less than 'issue_rate' slots are vacant, and the next insn always needs to
27973 start a new group. */
27975 static int
27976 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
27977 rtx_insn *tail)
27979 rtx_insn *insn, *next_insn;
27980 int issue_rate;
27981 int can_issue_more;
27982 int slot, i;
27983 bool group_end;
27984 int group_count = 0;
27985 rtx *group_insns;
27987 /* Initialize. */
27988 issue_rate = rs6000_issue_rate ();
27989 group_insns = XALLOCAVEC (rtx, issue_rate);
27990 for (i = 0; i < issue_rate; i++)
27992 group_insns[i] = 0;
27994 can_issue_more = issue_rate;
27995 slot = 0;
27996 insn = get_next_active_insn (prev_head_insn, tail);
27997 group_end = false;
27999 while (insn != NULL_RTX)
28001 slot = (issue_rate - can_issue_more);
28002 group_insns[slot] = insn;
28003 can_issue_more =
28004 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28005 if (insn_terminates_group_p (insn, current_group))
28006 can_issue_more = 0;
28008 next_insn = get_next_active_insn (insn, tail);
28009 if (next_insn == NULL_RTX)
28010 return group_count + 1;
28012 /* Is next_insn going to start a new group? */
28013 group_end
28014 = (can_issue_more == 0
28015 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28016 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28017 || (can_issue_more < issue_rate &&
28018 insn_terminates_group_p (next_insn, previous_group)));
28020 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28021 next_insn, &group_end, can_issue_more,
28022 &group_count);
28024 if (group_end)
28026 group_count++;
28027 can_issue_more = 0;
28028 for (i = 0; i < issue_rate; i++)
28030 group_insns[i] = 0;
28034 if (GET_MODE (next_insn) == TImode && can_issue_more)
28035 PUT_MODE (next_insn, VOIDmode);
28036 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28037 PUT_MODE (next_insn, TImode);
28039 insn = next_insn;
28040 if (can_issue_more == 0)
28041 can_issue_more = issue_rate;
28042 } /* while */
28044 return group_count;
28047 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28048 dispatch group boundaries that the scheduler had marked. Pad with nops
28049 any dispatch groups which have vacant issue slots, in order to force the
28050 scheduler's grouping on the processor dispatcher. The function
28051 returns the number of dispatch groups found. */
28053 static int
28054 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28055 rtx_insn *tail)
28057 rtx_insn *insn, *next_insn;
28058 rtx nop;
28059 int issue_rate;
28060 int can_issue_more;
28061 int group_end;
28062 int group_count = 0;
28064 /* Initialize issue_rate. */
28065 issue_rate = rs6000_issue_rate ();
28066 can_issue_more = issue_rate;
28068 insn = get_next_active_insn (prev_head_insn, tail);
28069 next_insn = get_next_active_insn (insn, tail);
28071 while (insn != NULL_RTX)
28073 can_issue_more =
28074 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28076 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28078 if (next_insn == NULL_RTX)
28079 break;
28081 if (group_end)
28083 /* If the scheduler had marked group termination at this location
28084 (between insn and next_insn), and neither insn nor next_insn will
28085 force group termination, pad the group with nops to force group
28086 termination. */
28087 if (can_issue_more
28088 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28089 && !insn_terminates_group_p (insn, current_group)
28090 && !insn_terminates_group_p (next_insn, previous_group))
28092 if (!is_branch_slot_insn (next_insn))
28093 can_issue_more--;
28095 while (can_issue_more)
28097 nop = gen_nop ();
28098 emit_insn_before (nop, next_insn);
28099 can_issue_more--;
28103 can_issue_more = issue_rate;
28104 group_count++;
28107 insn = next_insn;
28108 next_insn = get_next_active_insn (insn, tail);
28111 return group_count;
28114 /* We're beginning a new block. Initialize data structures as necessary. */
28116 static void
28117 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28118 int sched_verbose ATTRIBUTE_UNUSED,
28119 int max_ready ATTRIBUTE_UNUSED)
28121 last_scheduled_insn = NULL_RTX;
28122 load_store_pendulum = 0;
28125 /* The following function is called at the end of scheduling BB.
28126 After reload, it inserts nops at insn group bundling. */
28128 static void
28129 rs6000_sched_finish (FILE *dump, int sched_verbose)
28131 int n_groups;
28133 if (sched_verbose)
28134 fprintf (dump, "=== Finishing schedule.\n");
28136 if (reload_completed && rs6000_sched_groups)
28138 /* Do not run sched_finish hook when selective scheduling enabled. */
28139 if (sel_sched_p ())
28140 return;
28142 if (rs6000_sched_insert_nops == sched_finish_none)
28143 return;
28145 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28146 n_groups = pad_groups (dump, sched_verbose,
28147 current_sched_info->prev_head,
28148 current_sched_info->next_tail);
28149 else
28150 n_groups = redefine_groups (dump, sched_verbose,
28151 current_sched_info->prev_head,
28152 current_sched_info->next_tail);
28154 if (sched_verbose >= 6)
28156 fprintf (dump, "ngroups = %d\n", n_groups);
28157 print_rtl (dump, current_sched_info->prev_head);
28158 fprintf (dump, "Done finish_sched\n");
28163 struct _rs6000_sched_context
28165 short cached_can_issue_more;
28166 rtx last_scheduled_insn;
28167 int load_store_pendulum;
28170 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28171 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28173 /* Allocate store for new scheduling context. */
28174 static void *
28175 rs6000_alloc_sched_context (void)
28177 return xmalloc (sizeof (rs6000_sched_context_def));
28180 /* If CLEAN_P is true then initializes _SC with clean data,
28181 and from the global context otherwise. */
28182 static void
28183 rs6000_init_sched_context (void *_sc, bool clean_p)
28185 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28187 if (clean_p)
28189 sc->cached_can_issue_more = 0;
28190 sc->last_scheduled_insn = NULL_RTX;
28191 sc->load_store_pendulum = 0;
28193 else
28195 sc->cached_can_issue_more = cached_can_issue_more;
28196 sc->last_scheduled_insn = last_scheduled_insn;
28197 sc->load_store_pendulum = load_store_pendulum;
28201 /* Sets the global scheduling context to the one pointed to by _SC. */
28202 static void
28203 rs6000_set_sched_context (void *_sc)
28205 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28207 gcc_assert (sc != NULL);
28209 cached_can_issue_more = sc->cached_can_issue_more;
28210 last_scheduled_insn = sc->last_scheduled_insn;
28211 load_store_pendulum = sc->load_store_pendulum;
28214 /* Free _SC. */
28215 static void
28216 rs6000_free_sched_context (void *_sc)
28218 gcc_assert (_sc != NULL);
28220 free (_sc);
28224 /* Length in units of the trampoline for entering a nested function. */
28227 rs6000_trampoline_size (void)
28229 int ret = 0;
28231 switch (DEFAULT_ABI)
28233 default:
28234 gcc_unreachable ();
28236 case ABI_AIX:
28237 ret = (TARGET_32BIT) ? 12 : 24;
28238 break;
28240 case ABI_ELFv2:
28241 gcc_assert (!TARGET_32BIT);
28242 ret = 32;
28243 break;
28245 case ABI_DARWIN:
28246 case ABI_V4:
28247 ret = (TARGET_32BIT) ? 40 : 48;
28248 break;
28251 return ret;
28254 /* Emit RTL insns to initialize the variable parts of a trampoline.
28255 FNADDR is an RTX for the address of the function's pure code.
28256 CXT is an RTX for the static chain value for the function. */
28258 static void
28259 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28261 int regsize = (TARGET_32BIT) ? 4 : 8;
28262 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28263 rtx ctx_reg = force_reg (Pmode, cxt);
28264 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28266 switch (DEFAULT_ABI)
28268 default:
28269 gcc_unreachable ();
28271 /* Under AIX, just build the 3 word function descriptor */
28272 case ABI_AIX:
28274 rtx fnmem, fn_reg, toc_reg;
28276 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28277 error ("You cannot take the address of a nested function if you use "
28278 "the -mno-pointers-to-nested-functions option.");
28280 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28281 fn_reg = gen_reg_rtx (Pmode);
28282 toc_reg = gen_reg_rtx (Pmode);
28284 /* Macro to shorten the code expansions below. */
28285 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28287 m_tramp = replace_equiv_address (m_tramp, addr);
28289 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28290 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28291 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28292 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28293 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28295 # undef MEM_PLUS
28297 break;
28299 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28300 case ABI_ELFv2:
28301 case ABI_DARWIN:
28302 case ABI_V4:
28303 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28304 LCT_NORMAL, VOIDmode, 4,
28305 addr, Pmode,
28306 GEN_INT (rs6000_trampoline_size ()), SImode,
28307 fnaddr, Pmode,
28308 ctx_reg, Pmode);
28309 break;
28314 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28315 identifier as an argument, so the front end shouldn't look it up. */
28317 static bool
28318 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28320 return is_attribute_p ("altivec", attr_id);
28323 /* Handle the "altivec" attribute. The attribute may have
28324 arguments as follows:
28326 __attribute__((altivec(vector__)))
28327 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28328 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28330 and may appear more than once (e.g., 'vector bool char') in a
28331 given declaration. */
28333 static tree
28334 rs6000_handle_altivec_attribute (tree *node,
28335 tree name ATTRIBUTE_UNUSED,
28336 tree args,
28337 int flags ATTRIBUTE_UNUSED,
28338 bool *no_add_attrs)
28340 tree type = *node, result = NULL_TREE;
28341 enum machine_mode mode;
28342 int unsigned_p;
28343 char altivec_type
28344 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28345 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28346 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28347 : '?');
28349 while (POINTER_TYPE_P (type)
28350 || TREE_CODE (type) == FUNCTION_TYPE
28351 || TREE_CODE (type) == METHOD_TYPE
28352 || TREE_CODE (type) == ARRAY_TYPE)
28353 type = TREE_TYPE (type);
28355 mode = TYPE_MODE (type);
28357 /* Check for invalid AltiVec type qualifiers. */
28358 if (type == long_double_type_node)
28359 error ("use of %<long double%> in AltiVec types is invalid");
28360 else if (type == boolean_type_node)
28361 error ("use of boolean types in AltiVec types is invalid");
28362 else if (TREE_CODE (type) == COMPLEX_TYPE)
28363 error ("use of %<complex%> in AltiVec types is invalid");
28364 else if (DECIMAL_FLOAT_MODE_P (mode))
28365 error ("use of decimal floating point types in AltiVec types is invalid");
28366 else if (!TARGET_VSX)
28368 if (type == long_unsigned_type_node || type == long_integer_type_node)
28370 if (TARGET_64BIT)
28371 error ("use of %<long%> in AltiVec types is invalid for "
28372 "64-bit code without -mvsx");
28373 else if (rs6000_warn_altivec_long)
28374 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28375 "use %<int%>");
28377 else if (type == long_long_unsigned_type_node
28378 || type == long_long_integer_type_node)
28379 error ("use of %<long long%> in AltiVec types is invalid without "
28380 "-mvsx");
28381 else if (type == double_type_node)
28382 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28385 switch (altivec_type)
28387 case 'v':
28388 unsigned_p = TYPE_UNSIGNED (type);
28389 switch (mode)
28391 case TImode:
28392 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28393 break;
28394 case DImode:
28395 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28396 break;
28397 case SImode:
28398 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28399 break;
28400 case HImode:
28401 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28402 break;
28403 case QImode:
28404 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28405 break;
28406 case SFmode: result = V4SF_type_node; break;
28407 case DFmode: result = V2DF_type_node; break;
28408 /* If the user says 'vector int bool', we may be handed the 'bool'
28409 attribute _before_ the 'vector' attribute, and so select the
28410 proper type in the 'b' case below. */
28411 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28412 case V2DImode: case V2DFmode:
28413 result = type;
28414 default: break;
28416 break;
28417 case 'b':
28418 switch (mode)
28420 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28421 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28422 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28423 case QImode: case V16QImode: result = bool_V16QI_type_node;
28424 default: break;
28426 break;
28427 case 'p':
28428 switch (mode)
28430 case V8HImode: result = pixel_V8HI_type_node;
28431 default: break;
28433 default: break;
28436 /* Propagate qualifiers attached to the element type
28437 onto the vector type. */
28438 if (result && result != type && TYPE_QUALS (type))
28439 result = build_qualified_type (result, TYPE_QUALS (type));
28441 *no_add_attrs = true; /* No need to hang on to the attribute. */
28443 if (result)
28444 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28446 return NULL_TREE;
28449 /* AltiVec defines four built-in scalar types that serve as vector
28450 elements; we must teach the compiler how to mangle them. */
28452 static const char *
28453 rs6000_mangle_type (const_tree type)
28455 type = TYPE_MAIN_VARIANT (type);
28457 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28458 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28459 return NULL;
28461 if (type == bool_char_type_node) return "U6__boolc";
28462 if (type == bool_short_type_node) return "U6__bools";
28463 if (type == pixel_type_node) return "u7__pixel";
28464 if (type == bool_int_type_node) return "U6__booli";
28465 if (type == bool_long_type_node) return "U6__booll";
28467 /* Mangle IBM extended float long double as `g' (__float128) on
28468 powerpc*-linux where long-double-64 previously was the default. */
28469 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28470 && TARGET_ELF
28471 && TARGET_LONG_DOUBLE_128
28472 && !TARGET_IEEEQUAD)
28473 return "g";
28475 /* For all other types, use normal C++ mangling. */
28476 return NULL;
28479 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28480 struct attribute_spec.handler. */
28482 static tree
28483 rs6000_handle_longcall_attribute (tree *node, tree name,
28484 tree args ATTRIBUTE_UNUSED,
28485 int flags ATTRIBUTE_UNUSED,
28486 bool *no_add_attrs)
28488 if (TREE_CODE (*node) != FUNCTION_TYPE
28489 && TREE_CODE (*node) != FIELD_DECL
28490 && TREE_CODE (*node) != TYPE_DECL)
28492 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28493 name);
28494 *no_add_attrs = true;
28497 return NULL_TREE;
28500 /* Set longcall attributes on all functions declared when
28501 rs6000_default_long_calls is true. */
28502 static void
28503 rs6000_set_default_type_attributes (tree type)
28505 if (rs6000_default_long_calls
28506 && (TREE_CODE (type) == FUNCTION_TYPE
28507 || TREE_CODE (type) == METHOD_TYPE))
28508 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28509 NULL_TREE,
28510 TYPE_ATTRIBUTES (type));
28512 #if TARGET_MACHO
28513 darwin_set_default_type_attributes (type);
28514 #endif
28517 /* Return a reference suitable for calling a function with the
28518 longcall attribute. */
28521 rs6000_longcall_ref (rtx call_ref)
28523 const char *call_name;
28524 tree node;
28526 if (GET_CODE (call_ref) != SYMBOL_REF)
28527 return call_ref;
28529 /* System V adds '.' to the internal name, so skip them. */
28530 call_name = XSTR (call_ref, 0);
28531 if (*call_name == '.')
28533 while (*call_name == '.')
28534 call_name++;
28536 node = get_identifier (call_name);
28537 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28540 return force_reg (Pmode, call_ref);
28543 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28544 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28545 #endif
28547 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28548 struct attribute_spec.handler. */
28549 static tree
28550 rs6000_handle_struct_attribute (tree *node, tree name,
28551 tree args ATTRIBUTE_UNUSED,
28552 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28554 tree *type = NULL;
28555 if (DECL_P (*node))
28557 if (TREE_CODE (*node) == TYPE_DECL)
28558 type = &TREE_TYPE (*node);
28560 else
28561 type = node;
28563 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28564 || TREE_CODE (*type) == UNION_TYPE)))
28566 warning (OPT_Wattributes, "%qE attribute ignored", name);
28567 *no_add_attrs = true;
28570 else if ((is_attribute_p ("ms_struct", name)
28571 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28572 || ((is_attribute_p ("gcc_struct", name)
28573 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28575 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28576 name);
28577 *no_add_attrs = true;
28580 return NULL_TREE;
28583 static bool
28584 rs6000_ms_bitfield_layout_p (const_tree record_type)
28586 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28587 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28588 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28591 #ifdef USING_ELFOS_H
28593 /* A get_unnamed_section callback, used for switching to toc_section. */
28595 static void
28596 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28598 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28599 && TARGET_MINIMAL_TOC
28600 && !TARGET_RELOCATABLE)
28602 if (!toc_initialized)
28604 toc_initialized = 1;
28605 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28606 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28607 fprintf (asm_out_file, "\t.tc ");
28608 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28609 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28610 fprintf (asm_out_file, "\n");
28612 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28613 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28614 fprintf (asm_out_file, " = .+32768\n");
28616 else
28617 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28619 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28620 && !TARGET_RELOCATABLE)
28621 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28622 else
28624 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28625 if (!toc_initialized)
28627 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28628 fprintf (asm_out_file, " = .+32768\n");
28629 toc_initialized = 1;
28634 /* Implement TARGET_ASM_INIT_SECTIONS. */
28636 static void
28637 rs6000_elf_asm_init_sections (void)
28639 toc_section
28640 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
28642 sdata2_section
28643 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
28644 SDATA2_SECTION_ASM_OP);
28647 /* Implement TARGET_SELECT_RTX_SECTION. */
28649 static section *
28650 rs6000_elf_select_rtx_section (enum machine_mode mode, rtx x,
28651 unsigned HOST_WIDE_INT align)
28653 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28654 return toc_section;
28655 else
28656 return default_elf_select_rtx_section (mode, x, align);
28659 /* For a SYMBOL_REF, set generic flags and then perform some
28660 target-specific processing.
28662 When the AIX ABI is requested on a non-AIX system, replace the
28663 function name with the real name (with a leading .) rather than the
28664 function descriptor name. This saves a lot of overriding code to
28665 read the prefixes. */
28667 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
28668 static void
28669 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
28671 default_encode_section_info (decl, rtl, first);
28673 if (first
28674 && TREE_CODE (decl) == FUNCTION_DECL
28675 && !TARGET_AIX
28676 && DEFAULT_ABI == ABI_AIX)
28678 rtx sym_ref = XEXP (rtl, 0);
28679 size_t len = strlen (XSTR (sym_ref, 0));
28680 char *str = XALLOCAVEC (char, len + 2);
28681 str[0] = '.';
28682 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
28683 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
28687 static inline bool
28688 compare_section_name (const char *section, const char *templ)
28690 int len;
28692 len = strlen (templ);
28693 return (strncmp (section, templ, len) == 0
28694 && (section[len] == 0 || section[len] == '.'));
28697 bool
28698 rs6000_elf_in_small_data_p (const_tree decl)
28700 if (rs6000_sdata == SDATA_NONE)
28701 return false;
28703 /* We want to merge strings, so we never consider them small data. */
28704 if (TREE_CODE (decl) == STRING_CST)
28705 return false;
28707 /* Functions are never in the small data area. */
28708 if (TREE_CODE (decl) == FUNCTION_DECL)
28709 return false;
28711 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
28713 const char *section = DECL_SECTION_NAME (decl);
28714 if (compare_section_name (section, ".sdata")
28715 || compare_section_name (section, ".sdata2")
28716 || compare_section_name (section, ".gnu.linkonce.s")
28717 || compare_section_name (section, ".sbss")
28718 || compare_section_name (section, ".sbss2")
28719 || compare_section_name (section, ".gnu.linkonce.sb")
28720 || strcmp (section, ".PPC.EMB.sdata0") == 0
28721 || strcmp (section, ".PPC.EMB.sbss0") == 0)
28722 return true;
28724 else
28726 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
28728 if (size > 0
28729 && size <= g_switch_value
28730 /* If it's not public, and we're not going to reference it there,
28731 there's no need to put it in the small data section. */
28732 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
28733 return true;
28736 return false;
28739 #endif /* USING_ELFOS_H */
28741 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
28743 static bool
28744 rs6000_use_blocks_for_constant_p (enum machine_mode mode, const_rtx x)
28746 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
28749 /* Do not place thread-local symbols refs in the object blocks. */
28751 static bool
28752 rs6000_use_blocks_for_decl_p (const_tree decl)
28754 return !DECL_THREAD_LOCAL_P (decl);
28757 /* Return a REG that occurs in ADDR with coefficient 1.
28758 ADDR can be effectively incremented by incrementing REG.
28760 r0 is special and we must not select it as an address
28761 register by this routine since our caller will try to
28762 increment the returned register via an "la" instruction. */
28765 find_addr_reg (rtx addr)
28767 while (GET_CODE (addr) == PLUS)
28769 if (GET_CODE (XEXP (addr, 0)) == REG
28770 && REGNO (XEXP (addr, 0)) != 0)
28771 addr = XEXP (addr, 0);
28772 else if (GET_CODE (XEXP (addr, 1)) == REG
28773 && REGNO (XEXP (addr, 1)) != 0)
28774 addr = XEXP (addr, 1);
28775 else if (CONSTANT_P (XEXP (addr, 0)))
28776 addr = XEXP (addr, 1);
28777 else if (CONSTANT_P (XEXP (addr, 1)))
28778 addr = XEXP (addr, 0);
28779 else
28780 gcc_unreachable ();
28782 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
28783 return addr;
28786 void
28787 rs6000_fatal_bad_address (rtx op)
28789 fatal_insn ("bad address", op);
28792 #if TARGET_MACHO
28794 typedef struct branch_island_d {
28795 tree function_name;
28796 tree label_name;
28797 int line_number;
28798 } branch_island;
28801 static vec<branch_island, va_gc> *branch_islands;
28803 /* Remember to generate a branch island for far calls to the given
28804 function. */
28806 static void
28807 add_compiler_branch_island (tree label_name, tree function_name,
28808 int line_number)
28810 branch_island bi = {function_name, label_name, line_number};
28811 vec_safe_push (branch_islands, bi);
28814 /* Generate far-jump branch islands for everything recorded in
28815 branch_islands. Invoked immediately after the last instruction of
28816 the epilogue has been emitted; the branch islands must be appended
28817 to, and contiguous with, the function body. Mach-O stubs are
28818 generated in machopic_output_stub(). */
28820 static void
28821 macho_branch_islands (void)
28823 char tmp_buf[512];
28825 while (!vec_safe_is_empty (branch_islands))
28827 branch_island *bi = &branch_islands->last ();
28828 const char *label = IDENTIFIER_POINTER (bi->label_name);
28829 const char *name = IDENTIFIER_POINTER (bi->function_name);
28830 char name_buf[512];
28831 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
28832 if (name[0] == '*' || name[0] == '&')
28833 strcpy (name_buf, name+1);
28834 else
28836 name_buf[0] = '_';
28837 strcpy (name_buf+1, name);
28839 strcpy (tmp_buf, "\n");
28840 strcat (tmp_buf, label);
28841 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
28842 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28843 dbxout_stabd (N_SLINE, bi->line_number);
28844 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
28845 if (flag_pic)
28847 if (TARGET_LINK_STACK)
28849 char name[32];
28850 get_ppc476_thunk_name (name);
28851 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
28852 strcat (tmp_buf, name);
28853 strcat (tmp_buf, "\n");
28854 strcat (tmp_buf, label);
28855 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
28857 else
28859 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
28860 strcat (tmp_buf, label);
28861 strcat (tmp_buf, "_pic\n");
28862 strcat (tmp_buf, label);
28863 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
28866 strcat (tmp_buf, "\taddis r11,r11,ha16(");
28867 strcat (tmp_buf, name_buf);
28868 strcat (tmp_buf, " - ");
28869 strcat (tmp_buf, label);
28870 strcat (tmp_buf, "_pic)\n");
28872 strcat (tmp_buf, "\tmtlr r0\n");
28874 strcat (tmp_buf, "\taddi r12,r11,lo16(");
28875 strcat (tmp_buf, name_buf);
28876 strcat (tmp_buf, " - ");
28877 strcat (tmp_buf, label);
28878 strcat (tmp_buf, "_pic)\n");
28880 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
28882 else
28884 strcat (tmp_buf, ":\nlis r12,hi16(");
28885 strcat (tmp_buf, name_buf);
28886 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
28887 strcat (tmp_buf, name_buf);
28888 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
28890 output_asm_insn (tmp_buf, 0);
28891 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
28892 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
28893 dbxout_stabd (N_SLINE, bi->line_number);
28894 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
28895 branch_islands->pop ();
28899 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
28900 already there or not. */
28902 static int
28903 no_previous_def (tree function_name)
28905 branch_island *bi;
28906 unsigned ix;
28908 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
28909 if (function_name == bi->function_name)
28910 return 0;
28911 return 1;
28914 /* GET_PREV_LABEL gets the label name from the previous definition of
28915 the function. */
28917 static tree
28918 get_prev_label (tree function_name)
28920 branch_island *bi;
28921 unsigned ix;
28923 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
28924 if (function_name == bi->function_name)
28925 return bi->label_name;
28926 return NULL_TREE;
28929 /* INSN is either a function call or a millicode call. It may have an
28930 unconditional jump in its delay slot.
28932 CALL_DEST is the routine we are calling. */
28934 char *
28935 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
28936 int cookie_operand_number)
28938 static char buf[256];
28939 if (darwin_emit_branch_islands
28940 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
28941 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
28943 tree labelname;
28944 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
28946 if (no_previous_def (funname))
28948 rtx label_rtx = gen_label_rtx ();
28949 char *label_buf, temp_buf[256];
28950 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
28951 CODE_LABEL_NUMBER (label_rtx));
28952 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
28953 labelname = get_identifier (label_buf);
28954 add_compiler_branch_island (labelname, funname, insn_line (insn));
28956 else
28957 labelname = get_prev_label (funname);
28959 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
28960 instruction will reach 'foo', otherwise link as 'bl L42'".
28961 "L42" should be a 'branch island', that will do a far jump to
28962 'foo'. Branch islands are generated in
28963 macho_branch_islands(). */
28964 sprintf (buf, "jbsr %%z%d,%.246s",
28965 dest_operand_number, IDENTIFIER_POINTER (labelname));
28967 else
28968 sprintf (buf, "bl %%z%d", dest_operand_number);
28969 return buf;
28972 /* Generate PIC and indirect symbol stubs. */
28974 void
28975 machopic_output_stub (FILE *file, const char *symb, const char *stub)
28977 unsigned int length;
28978 char *symbol_name, *lazy_ptr_name;
28979 char *local_label_0;
28980 static int label = 0;
28982 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
28983 symb = (*targetm.strip_name_encoding) (symb);
28986 length = strlen (symb);
28987 symbol_name = XALLOCAVEC (char, length + 32);
28988 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
28990 lazy_ptr_name = XALLOCAVEC (char, length + 32);
28991 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
28993 if (flag_pic == 2)
28994 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
28995 else
28996 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
28998 if (flag_pic == 2)
29000 fprintf (file, "\t.align 5\n");
29002 fprintf (file, "%s:\n", stub);
29003 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29005 label++;
29006 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29007 sprintf (local_label_0, "\"L%011d$spb\"", label);
29009 fprintf (file, "\tmflr r0\n");
29010 if (TARGET_LINK_STACK)
29012 char name[32];
29013 get_ppc476_thunk_name (name);
29014 fprintf (file, "\tbl %s\n", name);
29015 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29017 else
29019 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29020 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29022 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29023 lazy_ptr_name, local_label_0);
29024 fprintf (file, "\tmtlr r0\n");
29025 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29026 (TARGET_64BIT ? "ldu" : "lwzu"),
29027 lazy_ptr_name, local_label_0);
29028 fprintf (file, "\tmtctr r12\n");
29029 fprintf (file, "\tbctr\n");
29031 else
29033 fprintf (file, "\t.align 4\n");
29035 fprintf (file, "%s:\n", stub);
29036 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29038 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29039 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29040 (TARGET_64BIT ? "ldu" : "lwzu"),
29041 lazy_ptr_name);
29042 fprintf (file, "\tmtctr r12\n");
29043 fprintf (file, "\tbctr\n");
29046 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29047 fprintf (file, "%s:\n", lazy_ptr_name);
29048 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29049 fprintf (file, "%sdyld_stub_binding_helper\n",
29050 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29053 /* Legitimize PIC addresses. If the address is already
29054 position-independent, we return ORIG. Newly generated
29055 position-independent addresses go into a reg. This is REG if non
29056 zero, otherwise we allocate register(s) as necessary. */
29058 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29061 rs6000_machopic_legitimize_pic_address (rtx orig, enum machine_mode mode,
29062 rtx reg)
29064 rtx base, offset;
29066 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29067 reg = gen_reg_rtx (Pmode);
29069 if (GET_CODE (orig) == CONST)
29071 rtx reg_temp;
29073 if (GET_CODE (XEXP (orig, 0)) == PLUS
29074 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29075 return orig;
29077 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29079 /* Use a different reg for the intermediate value, as
29080 it will be marked UNCHANGING. */
29081 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29082 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29083 Pmode, reg_temp);
29084 offset =
29085 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29086 Pmode, reg);
29088 if (GET_CODE (offset) == CONST_INT)
29090 if (SMALL_INT (offset))
29091 return plus_constant (Pmode, base, INTVAL (offset));
29092 else if (! reload_in_progress && ! reload_completed)
29093 offset = force_reg (Pmode, offset);
29094 else
29096 rtx mem = force_const_mem (Pmode, orig);
29097 return machopic_legitimize_pic_address (mem, Pmode, reg);
29100 return gen_rtx_PLUS (Pmode, base, offset);
29103 /* Fall back on generic machopic code. */
29104 return machopic_legitimize_pic_address (orig, mode, reg);
29107 /* Output a .machine directive for the Darwin assembler, and call
29108 the generic start_file routine. */
29110 static void
29111 rs6000_darwin_file_start (void)
29113 static const struct
29115 const char *arg;
29116 const char *name;
29117 HOST_WIDE_INT if_set;
29118 } mapping[] = {
29119 { "ppc64", "ppc64", MASK_64BIT },
29120 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29121 { "power4", "ppc970", 0 },
29122 { "G5", "ppc970", 0 },
29123 { "7450", "ppc7450", 0 },
29124 { "7400", "ppc7400", MASK_ALTIVEC },
29125 { "G4", "ppc7400", 0 },
29126 { "750", "ppc750", 0 },
29127 { "740", "ppc750", 0 },
29128 { "G3", "ppc750", 0 },
29129 { "604e", "ppc604e", 0 },
29130 { "604", "ppc604", 0 },
29131 { "603e", "ppc603", 0 },
29132 { "603", "ppc603", 0 },
29133 { "601", "ppc601", 0 },
29134 { NULL, "ppc", 0 } };
29135 const char *cpu_id = "";
29136 size_t i;
29138 rs6000_file_start ();
29139 darwin_file_start ();
29141 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29143 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29144 cpu_id = rs6000_default_cpu;
29146 if (global_options_set.x_rs6000_cpu_index)
29147 cpu_id = processor_target_table[rs6000_cpu_index].name;
29149 /* Look through the mapping array. Pick the first name that either
29150 matches the argument, has a bit set in IF_SET that is also set
29151 in the target flags, or has a NULL name. */
29153 i = 0;
29154 while (mapping[i].arg != NULL
29155 && strcmp (mapping[i].arg, cpu_id) != 0
29156 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29157 i++;
29159 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29162 #endif /* TARGET_MACHO */
29164 #if TARGET_ELF
29165 static int
29166 rs6000_elf_reloc_rw_mask (void)
29168 if (flag_pic)
29169 return 3;
29170 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29171 return 2;
29172 else
29173 return 0;
29176 /* Record an element in the table of global constructors. SYMBOL is
29177 a SYMBOL_REF of the function to be called; PRIORITY is a number
29178 between 0 and MAX_INIT_PRIORITY.
29180 This differs from default_named_section_asm_out_constructor in
29181 that we have special handling for -mrelocatable. */
29183 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29184 static void
29185 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29187 const char *section = ".ctors";
29188 char buf[16];
29190 if (priority != DEFAULT_INIT_PRIORITY)
29192 sprintf (buf, ".ctors.%.5u",
29193 /* Invert the numbering so the linker puts us in the proper
29194 order; constructors are run from right to left, and the
29195 linker sorts in increasing order. */
29196 MAX_INIT_PRIORITY - priority);
29197 section = buf;
29200 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29201 assemble_align (POINTER_SIZE);
29203 if (TARGET_RELOCATABLE)
29205 fputs ("\t.long (", asm_out_file);
29206 output_addr_const (asm_out_file, symbol);
29207 fputs (")@fixup\n", asm_out_file);
29209 else
29210 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29213 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29214 static void
29215 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29217 const char *section = ".dtors";
29218 char buf[16];
29220 if (priority != DEFAULT_INIT_PRIORITY)
29222 sprintf (buf, ".dtors.%.5u",
29223 /* Invert the numbering so the linker puts us in the proper
29224 order; constructors are run from right to left, and the
29225 linker sorts in increasing order. */
29226 MAX_INIT_PRIORITY - priority);
29227 section = buf;
29230 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29231 assemble_align (POINTER_SIZE);
29233 if (TARGET_RELOCATABLE)
29235 fputs ("\t.long (", asm_out_file);
29236 output_addr_const (asm_out_file, symbol);
29237 fputs (")@fixup\n", asm_out_file);
29239 else
29240 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29243 void
29244 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29246 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29248 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29249 ASM_OUTPUT_LABEL (file, name);
29250 fputs (DOUBLE_INT_ASM_OP, file);
29251 rs6000_output_function_entry (file, name);
29252 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29253 if (DOT_SYMBOLS)
29255 fputs ("\t.size\t", file);
29256 assemble_name (file, name);
29257 fputs (",24\n\t.type\t.", file);
29258 assemble_name (file, name);
29259 fputs (",@function\n", file);
29260 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29262 fputs ("\t.globl\t.", file);
29263 assemble_name (file, name);
29264 putc ('\n', file);
29267 else
29268 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29269 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29270 rs6000_output_function_entry (file, name);
29271 fputs (":\n", file);
29272 return;
29275 if (TARGET_RELOCATABLE
29276 && !TARGET_SECURE_PLT
29277 && (get_pool_size () != 0 || crtl->profile)
29278 && uses_TOC ())
29280 char buf[256];
29282 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29284 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29285 fprintf (file, "\t.long ");
29286 assemble_name (file, buf);
29287 putc ('-', file);
29288 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29289 assemble_name (file, buf);
29290 putc ('\n', file);
29293 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29294 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29296 if (DEFAULT_ABI == ABI_AIX)
29298 const char *desc_name, *orig_name;
29300 orig_name = (*targetm.strip_name_encoding) (name);
29301 desc_name = orig_name;
29302 while (*desc_name == '.')
29303 desc_name++;
29305 if (TREE_PUBLIC (decl))
29306 fprintf (file, "\t.globl %s\n", desc_name);
29308 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29309 fprintf (file, "%s:\n", desc_name);
29310 fprintf (file, "\t.long %s\n", orig_name);
29311 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29312 fputs ("\t.long 0\n", file);
29313 fprintf (file, "\t.previous\n");
29315 ASM_OUTPUT_LABEL (file, name);
29318 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29319 static void
29320 rs6000_elf_file_end (void)
29322 #ifdef HAVE_AS_GNU_ATTRIBUTE
29323 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29325 if (rs6000_passes_float)
29326 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29327 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29328 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29329 : 2));
29330 if (rs6000_passes_vector)
29331 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29332 (TARGET_ALTIVEC_ABI ? 2
29333 : TARGET_SPE_ABI ? 3
29334 : 1));
29335 if (rs6000_returns_struct)
29336 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29337 aix_struct_return ? 2 : 1);
29339 #endif
29340 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29341 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29342 file_end_indicate_exec_stack ();
29343 #endif
29345 #endif
29347 #if TARGET_XCOFF
29348 static void
29349 rs6000_xcoff_asm_output_anchor (rtx symbol)
29351 char buffer[100];
29353 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29354 SYMBOL_REF_BLOCK_OFFSET (symbol));
29355 fprintf (asm_out_file, "%s", SET_ASM_OP);
29356 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29357 fprintf (asm_out_file, ",");
29358 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29359 fprintf (asm_out_file, "\n");
29362 static void
29363 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29365 fputs (GLOBAL_ASM_OP, stream);
29366 RS6000_OUTPUT_BASENAME (stream, name);
29367 putc ('\n', stream);
29370 /* A get_unnamed_decl callback, used for read-only sections. PTR
29371 points to the section string variable. */
29373 static void
29374 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29376 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29377 *(const char *const *) directive,
29378 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29381 /* Likewise for read-write sections. */
29383 static void
29384 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29386 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29387 *(const char *const *) directive,
29388 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29391 static void
29392 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29394 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29395 *(const char *const *) directive,
29396 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29399 /* A get_unnamed_section callback, used for switching to toc_section. */
29401 static void
29402 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29404 if (TARGET_MINIMAL_TOC)
29406 /* toc_section is always selected at least once from
29407 rs6000_xcoff_file_start, so this is guaranteed to
29408 always be defined once and only once in each file. */
29409 if (!toc_initialized)
29411 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29412 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29413 toc_initialized = 1;
29415 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29416 (TARGET_32BIT ? "" : ",3"));
29418 else
29419 fputs ("\t.toc\n", asm_out_file);
29422 /* Implement TARGET_ASM_INIT_SECTIONS. */
29424 static void
29425 rs6000_xcoff_asm_init_sections (void)
29427 read_only_data_section
29428 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29429 &xcoff_read_only_section_name);
29431 private_data_section
29432 = get_unnamed_section (SECTION_WRITE,
29433 rs6000_xcoff_output_readwrite_section_asm_op,
29434 &xcoff_private_data_section_name);
29436 tls_data_section
29437 = get_unnamed_section (SECTION_TLS,
29438 rs6000_xcoff_output_tls_section_asm_op,
29439 &xcoff_tls_data_section_name);
29441 tls_private_data_section
29442 = get_unnamed_section (SECTION_TLS,
29443 rs6000_xcoff_output_tls_section_asm_op,
29444 &xcoff_private_data_section_name);
29446 read_only_private_data_section
29447 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29448 &xcoff_private_data_section_name);
29450 toc_section
29451 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29453 readonly_data_section = read_only_data_section;
29454 exception_section = data_section;
29457 static int
29458 rs6000_xcoff_reloc_rw_mask (void)
29460 return 3;
29463 static void
29464 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29465 tree decl ATTRIBUTE_UNUSED)
29467 int smclass;
29468 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29470 if (flags & SECTION_CODE)
29471 smclass = 0;
29472 else if (flags & SECTION_TLS)
29473 smclass = 3;
29474 else if (flags & SECTION_WRITE)
29475 smclass = 2;
29476 else
29477 smclass = 1;
29479 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29480 (flags & SECTION_CODE) ? "." : "",
29481 name, suffix[smclass], flags & SECTION_ENTSIZE);
29484 #define IN_NAMED_SECTION(DECL) \
29485 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29486 && DECL_SECTION_NAME (DECL) != NULL)
29488 static section *
29489 rs6000_xcoff_select_section (tree decl, int reloc,
29490 unsigned HOST_WIDE_INT align)
29492 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29493 named section. */
29494 if (align > BIGGEST_ALIGNMENT)
29496 resolve_unique_section (decl, reloc, true);
29497 if (IN_NAMED_SECTION (decl))
29498 return get_named_section (decl, NULL, reloc);
29501 if (decl_readonly_section (decl, reloc))
29503 if (TREE_PUBLIC (decl))
29504 return read_only_data_section;
29505 else
29506 return read_only_private_data_section;
29508 else
29510 #if HAVE_AS_TLS
29511 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29513 if (TREE_PUBLIC (decl))
29514 return tls_data_section;
29515 else if (bss_initializer_p (decl))
29517 /* Convert to COMMON to emit in BSS. */
29518 DECL_COMMON (decl) = 1;
29519 return tls_comm_section;
29521 else
29522 return tls_private_data_section;
29524 else
29525 #endif
29526 if (TREE_PUBLIC (decl))
29527 return data_section;
29528 else
29529 return private_data_section;
29533 static void
29534 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29536 const char *name;
29538 /* Use select_section for private data and uninitialized data with
29539 alignment <= BIGGEST_ALIGNMENT. */
29540 if (!TREE_PUBLIC (decl)
29541 || DECL_COMMON (decl)
29542 || (DECL_INITIAL (decl) == NULL_TREE
29543 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29544 || DECL_INITIAL (decl) == error_mark_node
29545 || (flag_zero_initialized_in_bss
29546 && initializer_zerop (DECL_INITIAL (decl))))
29547 return;
29549 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29550 name = (*targetm.strip_name_encoding) (name);
29551 set_decl_section_name (decl, name);
29554 /* Select section for constant in constant pool.
29556 On RS/6000, all constants are in the private read-only data area.
29557 However, if this is being placed in the TOC it must be output as a
29558 toc entry. */
29560 static section *
29561 rs6000_xcoff_select_rtx_section (enum machine_mode mode, rtx x,
29562 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29564 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29565 return toc_section;
29566 else
29567 return read_only_private_data_section;
29570 /* Remove any trailing [DS] or the like from the symbol name. */
29572 static const char *
29573 rs6000_xcoff_strip_name_encoding (const char *name)
29575 size_t len;
29576 if (*name == '*')
29577 name++;
29578 len = strlen (name);
29579 if (name[len - 1] == ']')
29580 return ggc_alloc_string (name, len - 4);
29581 else
29582 return name;
29585 /* Section attributes. AIX is always PIC. */
29587 static unsigned int
29588 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29590 unsigned int align;
29591 unsigned int flags = default_section_type_flags (decl, name, reloc);
29593 /* Align to at least UNIT size. */
29594 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29595 align = MIN_UNITS_PER_WORD;
29596 else
29597 /* Increase alignment of large objects if not already stricter. */
29598 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29599 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29600 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29602 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29605 /* Output at beginning of assembler file.
29607 Initialize the section names for the RS/6000 at this point.
29609 Specify filename, including full path, to assembler.
29611 We want to go into the TOC section so at least one .toc will be emitted.
29612 Also, in order to output proper .bs/.es pairs, we need at least one static
29613 [RW] section emitted.
29615 Finally, declare mcount when profiling to make the assembler happy. */
29617 static void
29618 rs6000_xcoff_file_start (void)
29620 rs6000_gen_section_name (&xcoff_bss_section_name,
29621 main_input_filename, ".bss_");
29622 rs6000_gen_section_name (&xcoff_private_data_section_name,
29623 main_input_filename, ".rw_");
29624 rs6000_gen_section_name (&xcoff_read_only_section_name,
29625 main_input_filename, ".ro_");
29626 rs6000_gen_section_name (&xcoff_tls_data_section_name,
29627 main_input_filename, ".tls_");
29628 rs6000_gen_section_name (&xcoff_tbss_section_name,
29629 main_input_filename, ".tbss_[UL]");
29631 fputs ("\t.file\t", asm_out_file);
29632 output_quoted_string (asm_out_file, main_input_filename);
29633 fputc ('\n', asm_out_file);
29634 if (write_symbols != NO_DEBUG)
29635 switch_to_section (private_data_section);
29636 switch_to_section (text_section);
29637 if (profile_flag)
29638 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
29639 rs6000_file_start ();
29642 /* Output at end of assembler file.
29643 On the RS/6000, referencing data should automatically pull in text. */
29645 static void
29646 rs6000_xcoff_file_end (void)
29648 switch_to_section (text_section);
29649 fputs ("_section_.text:\n", asm_out_file);
29650 switch_to_section (data_section);
29651 fputs (TARGET_32BIT
29652 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
29653 asm_out_file);
29656 struct declare_alias_data
29658 FILE *file;
29659 bool function_descriptor;
29662 /* Declare alias N. A helper function for for_node_and_aliases. */
29664 static bool
29665 rs6000_declare_alias (struct symtab_node *n, void *d)
29667 struct declare_alias_data *data = (struct declare_alias_data *)d;
29668 /* Main symbol is output specially, because varasm machinery does part of
29669 the job for us - we do not need to declare .globl/lglobs and such. */
29670 if (!n->alias || n->weakref)
29671 return false;
29673 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
29674 return false;
29676 /* Prevent assemble_alias from trying to use .set pseudo operation
29677 that does not behave as expected by the middle-end. */
29678 TREE_ASM_WRITTEN (n->decl) = true;
29680 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
29681 char *buffer = (char *) alloca (strlen (name) + 2);
29682 char *p;
29683 int dollar_inside = 0;
29685 strcpy (buffer, name);
29686 p = strchr (buffer, '$');
29687 while (p) {
29688 *p = '_';
29689 dollar_inside++;
29690 p = strchr (p + 1, '$');
29692 if (TREE_PUBLIC (n->decl))
29694 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
29696 if (dollar_inside) {
29697 if (data->function_descriptor)
29698 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29699 else
29700 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29702 if (data->function_descriptor)
29703 fputs ("\t.globl .", data->file);
29704 else
29705 fputs ("\t.globl ", data->file);
29706 RS6000_OUTPUT_BASENAME (data->file, buffer);
29707 putc ('\n', data->file);
29709 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
29710 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
29712 else
29714 if (dollar_inside)
29716 if (data->function_descriptor)
29717 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29718 else
29719 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29721 if (data->function_descriptor)
29722 fputs ("\t.lglobl .", data->file);
29723 else
29724 fputs ("\t.lglobl ", data->file);
29725 RS6000_OUTPUT_BASENAME (data->file, buffer);
29726 putc ('\n', data->file);
29728 if (data->function_descriptor)
29729 fputs (".", data->file);
29730 RS6000_OUTPUT_BASENAME (data->file, buffer);
29731 fputs (":\n", data->file);
29732 return false;
29735 /* This macro produces the initial definition of a function name.
29736 On the RS/6000, we need to place an extra '.' in the function name and
29737 output the function descriptor.
29738 Dollar signs are converted to underscores.
29740 The csect for the function will have already been created when
29741 text_section was selected. We do have to go back to that csect, however.
29743 The third and fourth parameters to the .function pseudo-op (16 and 044)
29744 are placeholders which no longer have any use.
29746 Because AIX assembler's .set command has unexpected semantics, we output
29747 all aliases as alternative labels in front of the definition. */
29749 void
29750 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
29752 char *buffer = (char *) alloca (strlen (name) + 1);
29753 char *p;
29754 int dollar_inside = 0;
29755 struct declare_alias_data data = {file, false};
29757 strcpy (buffer, name);
29758 p = strchr (buffer, '$');
29759 while (p) {
29760 *p = '_';
29761 dollar_inside++;
29762 p = strchr (p + 1, '$');
29764 if (TREE_PUBLIC (decl))
29766 if (!RS6000_WEAK || !DECL_WEAK (decl))
29768 if (dollar_inside) {
29769 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
29770 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
29772 fputs ("\t.globl .", file);
29773 RS6000_OUTPUT_BASENAME (file, buffer);
29774 putc ('\n', file);
29777 else
29779 if (dollar_inside) {
29780 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
29781 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
29783 fputs ("\t.lglobl .", file);
29784 RS6000_OUTPUT_BASENAME (file, buffer);
29785 putc ('\n', file);
29787 fputs ("\t.csect ", file);
29788 RS6000_OUTPUT_BASENAME (file, buffer);
29789 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
29790 RS6000_OUTPUT_BASENAME (file, buffer);
29791 fputs (":\n", file);
29792 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
29793 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
29794 RS6000_OUTPUT_BASENAME (file, buffer);
29795 fputs (", TOC[tc0], 0\n", file);
29796 in_section = NULL;
29797 switch_to_section (function_section (decl));
29798 putc ('.', file);
29799 RS6000_OUTPUT_BASENAME (file, buffer);
29800 fputs (":\n", file);
29801 data.function_descriptor = true;
29802 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
29803 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
29804 xcoffout_declare_function (file, decl, buffer);
29805 return;
29808 /* This macro produces the initial definition of a object (variable) name.
29809 Because AIX assembler's .set command has unexpected semantics, we output
29810 all aliases as alternative labels in front of the definition. */
29812 void
29813 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
29815 struct declare_alias_data data = {file, false};
29816 RS6000_OUTPUT_BASENAME (file, name);
29817 fputs (":\n", file);
29818 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
29821 #ifdef HAVE_AS_TLS
29822 static void
29823 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
29825 rtx symbol;
29826 int flags;
29828 default_encode_section_info (decl, rtl, first);
29830 /* Careful not to prod global register variables. */
29831 if (!MEM_P (rtl))
29832 return;
29833 symbol = XEXP (rtl, 0);
29834 if (GET_CODE (symbol) != SYMBOL_REF)
29835 return;
29837 flags = SYMBOL_REF_FLAGS (symbol);
29839 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29840 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
29842 SYMBOL_REF_FLAGS (symbol) = flags;
29844 #endif /* HAVE_AS_TLS */
29845 #endif /* TARGET_XCOFF */
29847 /* Compute a (partial) cost for rtx X. Return true if the complete
29848 cost has been computed, and false if subexpressions should be
29849 scanned. In either case, *TOTAL contains the cost result. */
29851 static bool
29852 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
29853 int *total, bool speed)
29855 enum machine_mode mode = GET_MODE (x);
29857 switch (code)
29859 /* On the RS/6000, if it is valid in the insn, it is free. */
29860 case CONST_INT:
29861 if (((outer_code == SET
29862 || outer_code == PLUS
29863 || outer_code == MINUS)
29864 && (satisfies_constraint_I (x)
29865 || satisfies_constraint_L (x)))
29866 || (outer_code == AND
29867 && (satisfies_constraint_K (x)
29868 || (mode == SImode
29869 ? satisfies_constraint_L (x)
29870 : satisfies_constraint_J (x))
29871 || mask_operand (x, mode)
29872 || (mode == DImode
29873 && mask64_operand (x, DImode))))
29874 || ((outer_code == IOR || outer_code == XOR)
29875 && (satisfies_constraint_K (x)
29876 || (mode == SImode
29877 ? satisfies_constraint_L (x)
29878 : satisfies_constraint_J (x))))
29879 || outer_code == ASHIFT
29880 || outer_code == ASHIFTRT
29881 || outer_code == LSHIFTRT
29882 || outer_code == ROTATE
29883 || outer_code == ROTATERT
29884 || outer_code == ZERO_EXTRACT
29885 || (outer_code == MULT
29886 && satisfies_constraint_I (x))
29887 || ((outer_code == DIV || outer_code == UDIV
29888 || outer_code == MOD || outer_code == UMOD)
29889 && exact_log2 (INTVAL (x)) >= 0)
29890 || (outer_code == COMPARE
29891 && (satisfies_constraint_I (x)
29892 || satisfies_constraint_K (x)))
29893 || ((outer_code == EQ || outer_code == NE)
29894 && (satisfies_constraint_I (x)
29895 || satisfies_constraint_K (x)
29896 || (mode == SImode
29897 ? satisfies_constraint_L (x)
29898 : satisfies_constraint_J (x))))
29899 || (outer_code == GTU
29900 && satisfies_constraint_I (x))
29901 || (outer_code == LTU
29902 && satisfies_constraint_P (x)))
29904 *total = 0;
29905 return true;
29907 else if ((outer_code == PLUS
29908 && reg_or_add_cint_operand (x, VOIDmode))
29909 || (outer_code == MINUS
29910 && reg_or_sub_cint_operand (x, VOIDmode))
29911 || ((outer_code == SET
29912 || outer_code == IOR
29913 || outer_code == XOR)
29914 && (INTVAL (x)
29915 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
29917 *total = COSTS_N_INSNS (1);
29918 return true;
29920 /* FALLTHRU */
29922 case CONST_DOUBLE:
29923 case CONST_WIDE_INT:
29924 case CONST:
29925 case HIGH:
29926 case SYMBOL_REF:
29927 case MEM:
29928 /* When optimizing for size, MEM should be slightly more expensive
29929 than generating address, e.g., (plus (reg) (const)).
29930 L1 cache latency is about two instructions. */
29931 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
29932 return true;
29934 case LABEL_REF:
29935 *total = 0;
29936 return true;
29938 case PLUS:
29939 case MINUS:
29940 if (FLOAT_MODE_P (mode))
29941 *total = rs6000_cost->fp;
29942 else
29943 *total = COSTS_N_INSNS (1);
29944 return false;
29946 case MULT:
29947 if (GET_CODE (XEXP (x, 1)) == CONST_INT
29948 && satisfies_constraint_I (XEXP (x, 1)))
29950 if (INTVAL (XEXP (x, 1)) >= -256
29951 && INTVAL (XEXP (x, 1)) <= 255)
29952 *total = rs6000_cost->mulsi_const9;
29953 else
29954 *total = rs6000_cost->mulsi_const;
29956 else if (mode == SFmode)
29957 *total = rs6000_cost->fp;
29958 else if (FLOAT_MODE_P (mode))
29959 *total = rs6000_cost->dmul;
29960 else if (mode == DImode)
29961 *total = rs6000_cost->muldi;
29962 else
29963 *total = rs6000_cost->mulsi;
29964 return false;
29966 case FMA:
29967 if (mode == SFmode)
29968 *total = rs6000_cost->fp;
29969 else
29970 *total = rs6000_cost->dmul;
29971 break;
29973 case DIV:
29974 case MOD:
29975 if (FLOAT_MODE_P (mode))
29977 *total = mode == DFmode ? rs6000_cost->ddiv
29978 : rs6000_cost->sdiv;
29979 return false;
29981 /* FALLTHRU */
29983 case UDIV:
29984 case UMOD:
29985 if (GET_CODE (XEXP (x, 1)) == CONST_INT
29986 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
29988 if (code == DIV || code == MOD)
29989 /* Shift, addze */
29990 *total = COSTS_N_INSNS (2);
29991 else
29992 /* Shift */
29993 *total = COSTS_N_INSNS (1);
29995 else
29997 if (GET_MODE (XEXP (x, 1)) == DImode)
29998 *total = rs6000_cost->divdi;
29999 else
30000 *total = rs6000_cost->divsi;
30002 /* Add in shift and subtract for MOD. */
30003 if (code == MOD || code == UMOD)
30004 *total += COSTS_N_INSNS (2);
30005 return false;
30007 case CTZ:
30008 case FFS:
30009 *total = COSTS_N_INSNS (4);
30010 return false;
30012 case POPCOUNT:
30013 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30014 return false;
30016 case PARITY:
30017 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30018 return false;
30020 case NOT:
30021 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30023 *total = 0;
30024 return false;
30026 /* FALLTHRU */
30028 case AND:
30029 case CLZ:
30030 case IOR:
30031 case XOR:
30032 case ZERO_EXTRACT:
30033 *total = COSTS_N_INSNS (1);
30034 return false;
30036 case ASHIFT:
30037 case ASHIFTRT:
30038 case LSHIFTRT:
30039 case ROTATE:
30040 case ROTATERT:
30041 /* Handle mul_highpart. */
30042 if (outer_code == TRUNCATE
30043 && GET_CODE (XEXP (x, 0)) == MULT)
30045 if (mode == DImode)
30046 *total = rs6000_cost->muldi;
30047 else
30048 *total = rs6000_cost->mulsi;
30049 return true;
30051 else if (outer_code == AND)
30052 *total = 0;
30053 else
30054 *total = COSTS_N_INSNS (1);
30055 return false;
30057 case SIGN_EXTEND:
30058 case ZERO_EXTEND:
30059 if (GET_CODE (XEXP (x, 0)) == MEM)
30060 *total = 0;
30061 else
30062 *total = COSTS_N_INSNS (1);
30063 return false;
30065 case COMPARE:
30066 case NEG:
30067 case ABS:
30068 if (!FLOAT_MODE_P (mode))
30070 *total = COSTS_N_INSNS (1);
30071 return false;
30073 /* FALLTHRU */
30075 case FLOAT:
30076 case UNSIGNED_FLOAT:
30077 case FIX:
30078 case UNSIGNED_FIX:
30079 case FLOAT_TRUNCATE:
30080 *total = rs6000_cost->fp;
30081 return false;
30083 case FLOAT_EXTEND:
30084 if (mode == DFmode)
30085 *total = 0;
30086 else
30087 *total = rs6000_cost->fp;
30088 return false;
30090 case UNSPEC:
30091 switch (XINT (x, 1))
30093 case UNSPEC_FRSP:
30094 *total = rs6000_cost->fp;
30095 return true;
30097 default:
30098 break;
30100 break;
30102 case CALL:
30103 case IF_THEN_ELSE:
30104 if (!speed)
30106 *total = COSTS_N_INSNS (1);
30107 return true;
30109 else if (FLOAT_MODE_P (mode)
30110 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30112 *total = rs6000_cost->fp;
30113 return false;
30115 break;
30117 case NE:
30118 case EQ:
30119 case GTU:
30120 case LTU:
30121 /* Carry bit requires mode == Pmode.
30122 NEG or PLUS already counted so only add one. */
30123 if (mode == Pmode
30124 && (outer_code == NEG || outer_code == PLUS))
30126 *total = COSTS_N_INSNS (1);
30127 return true;
30129 if (outer_code == SET)
30131 if (XEXP (x, 1) == const0_rtx)
30133 if (TARGET_ISEL && !TARGET_MFCRF)
30134 *total = COSTS_N_INSNS (8);
30135 else
30136 *total = COSTS_N_INSNS (2);
30137 return true;
30139 else if (mode == Pmode)
30141 *total = COSTS_N_INSNS (3);
30142 return false;
30145 /* FALLTHRU */
30147 case GT:
30148 case LT:
30149 case UNORDERED:
30150 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30152 if (TARGET_ISEL && !TARGET_MFCRF)
30153 *total = COSTS_N_INSNS (8);
30154 else
30155 *total = COSTS_N_INSNS (2);
30156 return true;
30158 /* CC COMPARE. */
30159 if (outer_code == COMPARE)
30161 *total = 0;
30162 return true;
30164 break;
30166 default:
30167 break;
30170 return false;
30173 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30175 static bool
30176 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30177 bool speed)
30179 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30181 fprintf (stderr,
30182 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30183 "opno = %d, total = %d, speed = %s, x:\n",
30184 ret ? "complete" : "scan inner",
30185 GET_RTX_NAME (code),
30186 GET_RTX_NAME (outer_code),
30187 opno,
30188 *total,
30189 speed ? "true" : "false");
30191 debug_rtx (x);
30193 return ret;
30196 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30198 static int
30199 rs6000_debug_address_cost (rtx x, enum machine_mode mode,
30200 addr_space_t as, bool speed)
30202 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30204 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30205 ret, speed ? "true" : "false");
30206 debug_rtx (x);
30208 return ret;
30212 /* A C expression returning the cost of moving data from a register of class
30213 CLASS1 to one of CLASS2. */
30215 static int
30216 rs6000_register_move_cost (enum machine_mode mode,
30217 reg_class_t from, reg_class_t to)
30219 int ret;
30221 if (TARGET_DEBUG_COST)
30222 dbg_cost_ctrl++;
30224 /* Moves from/to GENERAL_REGS. */
30225 if (reg_classes_intersect_p (to, GENERAL_REGS)
30226 || reg_classes_intersect_p (from, GENERAL_REGS))
30228 reg_class_t rclass = from;
30230 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30231 rclass = to;
30233 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30234 ret = (rs6000_memory_move_cost (mode, rclass, false)
30235 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30237 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30238 shift. */
30239 else if (rclass == CR_REGS)
30240 ret = 4;
30242 /* For those processors that have slow LR/CTR moves, make them more
30243 expensive than memory in order to bias spills to memory .*/
30244 else if ((rs6000_cpu == PROCESSOR_POWER6
30245 || rs6000_cpu == PROCESSOR_POWER7
30246 || rs6000_cpu == PROCESSOR_POWER8)
30247 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30248 ret = 6 * hard_regno_nregs[0][mode];
30250 else
30251 /* A move will cost one instruction per GPR moved. */
30252 ret = 2 * hard_regno_nregs[0][mode];
30255 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30256 else if (VECTOR_MEM_VSX_P (mode)
30257 && reg_classes_intersect_p (to, VSX_REGS)
30258 && reg_classes_intersect_p (from, VSX_REGS))
30259 ret = 2 * hard_regno_nregs[32][mode];
30261 /* Moving between two similar registers is just one instruction. */
30262 else if (reg_classes_intersect_p (to, from))
30263 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30265 /* Everything else has to go through GENERAL_REGS. */
30266 else
30267 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30268 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30270 if (TARGET_DEBUG_COST)
30272 if (dbg_cost_ctrl == 1)
30273 fprintf (stderr,
30274 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30275 ret, GET_MODE_NAME (mode), reg_class_names[from],
30276 reg_class_names[to]);
30277 dbg_cost_ctrl--;
30280 return ret;
30283 /* A C expressions returning the cost of moving data of MODE from a register to
30284 or from memory. */
30286 static int
30287 rs6000_memory_move_cost (enum machine_mode mode, reg_class_t rclass,
30288 bool in ATTRIBUTE_UNUSED)
30290 int ret;
30292 if (TARGET_DEBUG_COST)
30293 dbg_cost_ctrl++;
30295 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30296 ret = 4 * hard_regno_nregs[0][mode];
30297 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30298 || reg_classes_intersect_p (rclass, VSX_REGS)))
30299 ret = 4 * hard_regno_nregs[32][mode];
30300 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30301 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30302 else
30303 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30305 if (TARGET_DEBUG_COST)
30307 if (dbg_cost_ctrl == 1)
30308 fprintf (stderr,
30309 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30310 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30311 dbg_cost_ctrl--;
30314 return ret;
30317 /* Returns a code for a target-specific builtin that implements
30318 reciprocal of the function, or NULL_TREE if not available. */
30320 static tree
30321 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30322 bool sqrt ATTRIBUTE_UNUSED)
30324 if (optimize_insn_for_size_p ())
30325 return NULL_TREE;
30327 if (md_fn)
30328 switch (fn)
30330 case VSX_BUILTIN_XVSQRTDP:
30331 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30332 return NULL_TREE;
30334 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30336 case VSX_BUILTIN_XVSQRTSP:
30337 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30338 return NULL_TREE;
30340 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30342 default:
30343 return NULL_TREE;
30346 else
30347 switch (fn)
30349 case BUILT_IN_SQRT:
30350 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30351 return NULL_TREE;
30353 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30355 case BUILT_IN_SQRTF:
30356 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30357 return NULL_TREE;
30359 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30361 default:
30362 return NULL_TREE;
30366 /* Load up a constant. If the mode is a vector mode, splat the value across
30367 all of the vector elements. */
30369 static rtx
30370 rs6000_load_constant_and_splat (enum machine_mode mode, REAL_VALUE_TYPE dconst)
30372 rtx reg;
30374 if (mode == SFmode || mode == DFmode)
30376 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30377 reg = force_reg (mode, d);
30379 else if (mode == V4SFmode)
30381 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30382 rtvec v = gen_rtvec (4, d, d, d, d);
30383 reg = gen_reg_rtx (mode);
30384 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30386 else if (mode == V2DFmode)
30388 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30389 rtvec v = gen_rtvec (2, d, d);
30390 reg = gen_reg_rtx (mode);
30391 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30393 else
30394 gcc_unreachable ();
30396 return reg;
30399 /* Generate an FMA instruction. */
30401 static void
30402 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30404 enum machine_mode mode = GET_MODE (target);
30405 rtx dst;
30407 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30408 gcc_assert (dst != NULL);
30410 if (dst != target)
30411 emit_move_insn (target, dst);
30414 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30416 static void
30417 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30419 enum machine_mode mode = GET_MODE (target);
30420 rtx dst;
30422 /* Altivec does not support fms directly;
30423 generate in terms of fma in that case. */
30424 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30425 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30426 else
30428 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30429 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30431 gcc_assert (dst != NULL);
30433 if (dst != target)
30434 emit_move_insn (target, dst);
30437 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30439 static void
30440 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30442 enum machine_mode mode = GET_MODE (dst);
30443 rtx r;
30445 /* This is a tad more complicated, since the fnma_optab is for
30446 a different expression: fma(-m1, m2, a), which is the same
30447 thing except in the case of signed zeros.
30449 Fortunately we know that if FMA is supported that FNMSUB is
30450 also supported in the ISA. Just expand it directly. */
30452 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30454 r = gen_rtx_NEG (mode, a);
30455 r = gen_rtx_FMA (mode, m1, m2, r);
30456 r = gen_rtx_NEG (mode, r);
30457 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30460 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30461 add a reg_note saying that this was a division. Support both scalar and
30462 vector divide. Assumes no trapping math and finite arguments. */
30464 void
30465 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30467 enum machine_mode mode = GET_MODE (dst);
30468 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30469 int i;
30471 /* Low precision estimates guarantee 5 bits of accuracy. High
30472 precision estimates guarantee 14 bits of accuracy. SFmode
30473 requires 23 bits of accuracy. DFmode requires 52 bits of
30474 accuracy. Each pass at least doubles the accuracy, leading
30475 to the following. */
30476 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30477 if (mode == DFmode || mode == V2DFmode)
30478 passes++;
30480 enum insn_code code = optab_handler (smul_optab, mode);
30481 insn_gen_fn gen_mul = GEN_FCN (code);
30483 gcc_assert (code != CODE_FOR_nothing);
30485 one = rs6000_load_constant_and_splat (mode, dconst1);
30487 /* x0 = 1./d estimate */
30488 x0 = gen_reg_rtx (mode);
30489 emit_insn (gen_rtx_SET (VOIDmode, x0,
30490 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30491 UNSPEC_FRES)));
30493 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30494 if (passes > 1) {
30496 /* e0 = 1. - d * x0 */
30497 e0 = gen_reg_rtx (mode);
30498 rs6000_emit_nmsub (e0, d, x0, one);
30500 /* x1 = x0 + e0 * x0 */
30501 x1 = gen_reg_rtx (mode);
30502 rs6000_emit_madd (x1, e0, x0, x0);
30504 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30505 ++i, xprev = xnext, eprev = enext) {
30507 /* enext = eprev * eprev */
30508 enext = gen_reg_rtx (mode);
30509 emit_insn (gen_mul (enext, eprev, eprev));
30511 /* xnext = xprev + enext * xprev */
30512 xnext = gen_reg_rtx (mode);
30513 rs6000_emit_madd (xnext, enext, xprev, xprev);
30516 } else
30517 xprev = x0;
30519 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30521 /* u = n * xprev */
30522 u = gen_reg_rtx (mode);
30523 emit_insn (gen_mul (u, n, xprev));
30525 /* v = n - (d * u) */
30526 v = gen_reg_rtx (mode);
30527 rs6000_emit_nmsub (v, d, u, n);
30529 /* dst = (v * xprev) + u */
30530 rs6000_emit_madd (dst, v, xprev, u);
30532 if (note_p)
30533 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30536 /* Newton-Raphson approximation of single/double-precision floating point
30537 rsqrt. Assumes no trapping math and finite arguments. */
30539 void
30540 rs6000_emit_swrsqrt (rtx dst, rtx src)
30542 enum machine_mode mode = GET_MODE (src);
30543 rtx x0 = gen_reg_rtx (mode);
30544 rtx y = gen_reg_rtx (mode);
30546 /* Low precision estimates guarantee 5 bits of accuracy. High
30547 precision estimates guarantee 14 bits of accuracy. SFmode
30548 requires 23 bits of accuracy. DFmode requires 52 bits of
30549 accuracy. Each pass at least doubles the accuracy, leading
30550 to the following. */
30551 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30552 if (mode == DFmode || mode == V2DFmode)
30553 passes++;
30555 REAL_VALUE_TYPE dconst3_2;
30556 int i;
30557 rtx halfthree;
30558 enum insn_code code = optab_handler (smul_optab, mode);
30559 insn_gen_fn gen_mul = GEN_FCN (code);
30561 gcc_assert (code != CODE_FOR_nothing);
30563 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30564 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30565 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30567 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30569 /* x0 = rsqrt estimate */
30570 emit_insn (gen_rtx_SET (VOIDmode, x0,
30571 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30572 UNSPEC_RSQRT)));
30574 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30575 rs6000_emit_msub (y, src, halfthree, src);
30577 for (i = 0; i < passes; i++)
30579 rtx x1 = gen_reg_rtx (mode);
30580 rtx u = gen_reg_rtx (mode);
30581 rtx v = gen_reg_rtx (mode);
30583 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30584 emit_insn (gen_mul (u, x0, x0));
30585 rs6000_emit_nmsub (v, y, u, halfthree);
30586 emit_insn (gen_mul (x1, x0, v));
30587 x0 = x1;
30590 emit_move_insn (dst, x0);
30591 return;
30594 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30595 (Power7) targets. DST is the target, and SRC is the argument operand. */
30597 void
30598 rs6000_emit_popcount (rtx dst, rtx src)
30600 enum machine_mode mode = GET_MODE (dst);
30601 rtx tmp1, tmp2;
30603 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30604 if (TARGET_POPCNTD)
30606 if (mode == SImode)
30607 emit_insn (gen_popcntdsi2 (dst, src));
30608 else
30609 emit_insn (gen_popcntddi2 (dst, src));
30610 return;
30613 tmp1 = gen_reg_rtx (mode);
30615 if (mode == SImode)
30617 emit_insn (gen_popcntbsi2 (tmp1, src));
30618 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30619 NULL_RTX, 0);
30620 tmp2 = force_reg (SImode, tmp2);
30621 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30623 else
30625 emit_insn (gen_popcntbdi2 (tmp1, src));
30626 tmp2 = expand_mult (DImode, tmp1,
30627 GEN_INT ((HOST_WIDE_INT)
30628 0x01010101 << 32 | 0x01010101),
30629 NULL_RTX, 0);
30630 tmp2 = force_reg (DImode, tmp2);
30631 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
30636 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
30637 target, and SRC is the argument operand. */
30639 void
30640 rs6000_emit_parity (rtx dst, rtx src)
30642 enum machine_mode mode = GET_MODE (dst);
30643 rtx tmp;
30645 tmp = gen_reg_rtx (mode);
30647 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
30648 if (TARGET_CMPB)
30650 if (mode == SImode)
30652 emit_insn (gen_popcntbsi2 (tmp, src));
30653 emit_insn (gen_paritysi2_cmpb (dst, tmp));
30655 else
30657 emit_insn (gen_popcntbdi2 (tmp, src));
30658 emit_insn (gen_paritydi2_cmpb (dst, tmp));
30660 return;
30663 if (mode == SImode)
30665 /* Is mult+shift >= shift+xor+shift+xor? */
30666 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
30668 rtx tmp1, tmp2, tmp3, tmp4;
30670 tmp1 = gen_reg_rtx (SImode);
30671 emit_insn (gen_popcntbsi2 (tmp1, src));
30673 tmp2 = gen_reg_rtx (SImode);
30674 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
30675 tmp3 = gen_reg_rtx (SImode);
30676 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
30678 tmp4 = gen_reg_rtx (SImode);
30679 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
30680 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
30682 else
30683 rs6000_emit_popcount (tmp, src);
30684 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
30686 else
30688 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
30689 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
30691 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
30693 tmp1 = gen_reg_rtx (DImode);
30694 emit_insn (gen_popcntbdi2 (tmp1, src));
30696 tmp2 = gen_reg_rtx (DImode);
30697 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
30698 tmp3 = gen_reg_rtx (DImode);
30699 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
30701 tmp4 = gen_reg_rtx (DImode);
30702 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
30703 tmp5 = gen_reg_rtx (DImode);
30704 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
30706 tmp6 = gen_reg_rtx (DImode);
30707 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
30708 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
30710 else
30711 rs6000_emit_popcount (tmp, src);
30712 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
30716 /* Expand an Altivec constant permutation for little endian mode.
30717 There are two issues: First, the two input operands must be
30718 swapped so that together they form a double-wide array in LE
30719 order. Second, the vperm instruction has surprising behavior
30720 in LE mode: it interprets the elements of the source vectors
30721 in BE mode ("left to right") and interprets the elements of
30722 the destination vector in LE mode ("right to left"). To
30723 correct for this, we must subtract each element of the permute
30724 control vector from 31.
30726 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
30727 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
30728 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
30729 serve as the permute control vector. Then, in BE mode,
30731 vperm 9,10,11,12
30733 places the desired result in vr9. However, in LE mode the
30734 vector contents will be
30736 vr10 = 00000003 00000002 00000001 00000000
30737 vr11 = 00000007 00000006 00000005 00000004
30739 The result of the vperm using the same permute control vector is
30741 vr9 = 05000000 07000000 01000000 03000000
30743 That is, the leftmost 4 bytes of vr10 are interpreted as the
30744 source for the rightmost 4 bytes of vr9, and so on.
30746 If we change the permute control vector to
30748 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
30750 and issue
30752 vperm 9,11,10,12
30754 we get the desired
30756 vr9 = 00000006 00000004 00000002 00000000. */
30758 void
30759 altivec_expand_vec_perm_const_le (rtx operands[4])
30761 unsigned int i;
30762 rtx perm[16];
30763 rtx constv, unspec;
30764 rtx target = operands[0];
30765 rtx op0 = operands[1];
30766 rtx op1 = operands[2];
30767 rtx sel = operands[3];
30769 /* Unpack and adjust the constant selector. */
30770 for (i = 0; i < 16; ++i)
30772 rtx e = XVECEXP (sel, 0, i);
30773 unsigned int elt = 31 - (INTVAL (e) & 31);
30774 perm[i] = GEN_INT (elt);
30777 /* Expand to a permute, swapping the inputs and using the
30778 adjusted selector. */
30779 if (!REG_P (op0))
30780 op0 = force_reg (V16QImode, op0);
30781 if (!REG_P (op1))
30782 op1 = force_reg (V16QImode, op1);
30784 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
30785 constv = force_reg (V16QImode, constv);
30786 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
30787 UNSPEC_VPERM);
30788 if (!REG_P (target))
30790 rtx tmp = gen_reg_rtx (V16QImode);
30791 emit_move_insn (tmp, unspec);
30792 unspec = tmp;
30795 emit_move_insn (target, unspec);
30798 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
30799 permute control vector. But here it's not a constant, so we must
30800 generate a vector NAND or NOR to do the adjustment. */
30802 void
30803 altivec_expand_vec_perm_le (rtx operands[4])
30805 rtx notx, iorx, unspec;
30806 rtx target = operands[0];
30807 rtx op0 = operands[1];
30808 rtx op1 = operands[2];
30809 rtx sel = operands[3];
30810 rtx tmp = target;
30811 rtx norreg = gen_reg_rtx (V16QImode);
30812 enum machine_mode mode = GET_MODE (target);
30814 /* Get everything in regs so the pattern matches. */
30815 if (!REG_P (op0))
30816 op0 = force_reg (mode, op0);
30817 if (!REG_P (op1))
30818 op1 = force_reg (mode, op1);
30819 if (!REG_P (sel))
30820 sel = force_reg (V16QImode, sel);
30821 if (!REG_P (target))
30822 tmp = gen_reg_rtx (mode);
30824 /* Invert the selector with a VNAND if available, else a VNOR.
30825 The VNAND is preferred for future fusion opportunities. */
30826 notx = gen_rtx_NOT (V16QImode, sel);
30827 iorx = (TARGET_P8_VECTOR
30828 ? gen_rtx_IOR (V16QImode, notx, notx)
30829 : gen_rtx_AND (V16QImode, notx, notx));
30830 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
30832 /* Permute with operands reversed and adjusted selector. */
30833 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
30834 UNSPEC_VPERM);
30836 /* Copy into target, possibly by way of a register. */
30837 if (!REG_P (target))
30839 emit_move_insn (tmp, unspec);
30840 unspec = tmp;
30843 emit_move_insn (target, unspec);
30846 /* Expand an Altivec constant permutation. Return true if we match
30847 an efficient implementation; false to fall back to VPERM. */
30849 bool
30850 altivec_expand_vec_perm_const (rtx operands[4])
30852 struct altivec_perm_insn {
30853 HOST_WIDE_INT mask;
30854 enum insn_code impl;
30855 unsigned char perm[16];
30857 static const struct altivec_perm_insn patterns[] = {
30858 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
30859 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
30860 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
30861 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
30862 { OPTION_MASK_ALTIVEC,
30863 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
30864 : CODE_FOR_altivec_vmrglb_direct),
30865 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
30866 { OPTION_MASK_ALTIVEC,
30867 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
30868 : CODE_FOR_altivec_vmrglh_direct),
30869 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
30870 { OPTION_MASK_ALTIVEC,
30871 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
30872 : CODE_FOR_altivec_vmrglw_direct),
30873 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
30874 { OPTION_MASK_ALTIVEC,
30875 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
30876 : CODE_FOR_altivec_vmrghb_direct),
30877 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
30878 { OPTION_MASK_ALTIVEC,
30879 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
30880 : CODE_FOR_altivec_vmrghh_direct),
30881 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
30882 { OPTION_MASK_ALTIVEC,
30883 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
30884 : CODE_FOR_altivec_vmrghw_direct),
30885 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
30886 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
30887 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
30888 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
30889 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
30892 unsigned int i, j, elt, which;
30893 unsigned char perm[16];
30894 rtx target, op0, op1, sel, x;
30895 bool one_vec;
30897 target = operands[0];
30898 op0 = operands[1];
30899 op1 = operands[2];
30900 sel = operands[3];
30902 /* Unpack the constant selector. */
30903 for (i = which = 0; i < 16; ++i)
30905 rtx e = XVECEXP (sel, 0, i);
30906 elt = INTVAL (e) & 31;
30907 which |= (elt < 16 ? 1 : 2);
30908 perm[i] = elt;
30911 /* Simplify the constant selector based on operands. */
30912 switch (which)
30914 default:
30915 gcc_unreachable ();
30917 case 3:
30918 one_vec = false;
30919 if (!rtx_equal_p (op0, op1))
30920 break;
30921 /* FALLTHRU */
30923 case 2:
30924 for (i = 0; i < 16; ++i)
30925 perm[i] &= 15;
30926 op0 = op1;
30927 one_vec = true;
30928 break;
30930 case 1:
30931 op1 = op0;
30932 one_vec = true;
30933 break;
30936 /* Look for splat patterns. */
30937 if (one_vec)
30939 elt = perm[0];
30941 for (i = 0; i < 16; ++i)
30942 if (perm[i] != elt)
30943 break;
30944 if (i == 16)
30946 if (!BYTES_BIG_ENDIAN)
30947 elt = 15 - elt;
30948 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
30949 return true;
30952 if (elt % 2 == 0)
30954 for (i = 0; i < 16; i += 2)
30955 if (perm[i] != elt || perm[i + 1] != elt + 1)
30956 break;
30957 if (i == 16)
30959 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
30960 x = gen_reg_rtx (V8HImode);
30961 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
30962 GEN_INT (field)));
30963 emit_move_insn (target, gen_lowpart (V16QImode, x));
30964 return true;
30968 if (elt % 4 == 0)
30970 for (i = 0; i < 16; i += 4)
30971 if (perm[i] != elt
30972 || perm[i + 1] != elt + 1
30973 || perm[i + 2] != elt + 2
30974 || perm[i + 3] != elt + 3)
30975 break;
30976 if (i == 16)
30978 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
30979 x = gen_reg_rtx (V4SImode);
30980 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
30981 GEN_INT (field)));
30982 emit_move_insn (target, gen_lowpart (V16QImode, x));
30983 return true;
30988 /* Look for merge and pack patterns. */
30989 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
30991 bool swapped;
30993 if ((patterns[j].mask & rs6000_isa_flags) == 0)
30994 continue;
30996 elt = patterns[j].perm[0];
30997 if (perm[0] == elt)
30998 swapped = false;
30999 else if (perm[0] == elt + 16)
31000 swapped = true;
31001 else
31002 continue;
31003 for (i = 1; i < 16; ++i)
31005 elt = patterns[j].perm[i];
31006 if (swapped)
31007 elt = (elt >= 16 ? elt - 16 : elt + 16);
31008 else if (one_vec && elt >= 16)
31009 elt -= 16;
31010 if (perm[i] != elt)
31011 break;
31013 if (i == 16)
31015 enum insn_code icode = patterns[j].impl;
31016 enum machine_mode omode = insn_data[icode].operand[0].mode;
31017 enum machine_mode imode = insn_data[icode].operand[1].mode;
31019 /* For little-endian, don't use vpkuwum and vpkuhum if the
31020 underlying vector type is not V4SI and V8HI, respectively.
31021 For example, using vpkuwum with a V8HI picks up the even
31022 halfwords (BE numbering) when the even halfwords (LE
31023 numbering) are what we need. */
31024 if (!BYTES_BIG_ENDIAN
31025 && icode == CODE_FOR_altivec_vpkuwum_direct
31026 && ((GET_CODE (op0) == REG
31027 && GET_MODE (op0) != V4SImode)
31028 || (GET_CODE (op0) == SUBREG
31029 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31030 continue;
31031 if (!BYTES_BIG_ENDIAN
31032 && icode == CODE_FOR_altivec_vpkuhum_direct
31033 && ((GET_CODE (op0) == REG
31034 && GET_MODE (op0) != V8HImode)
31035 || (GET_CODE (op0) == SUBREG
31036 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31037 continue;
31039 /* For little-endian, the two input operands must be swapped
31040 (or swapped back) to ensure proper right-to-left numbering
31041 from 0 to 2N-1. */
31042 if (swapped ^ !BYTES_BIG_ENDIAN)
31043 x = op0, op0 = op1, op1 = x;
31044 if (imode != V16QImode)
31046 op0 = gen_lowpart (imode, op0);
31047 op1 = gen_lowpart (imode, op1);
31049 if (omode == V16QImode)
31050 x = target;
31051 else
31052 x = gen_reg_rtx (omode);
31053 emit_insn (GEN_FCN (icode) (x, op0, op1));
31054 if (omode != V16QImode)
31055 emit_move_insn (target, gen_lowpart (V16QImode, x));
31056 return true;
31060 if (!BYTES_BIG_ENDIAN)
31062 altivec_expand_vec_perm_const_le (operands);
31063 return true;
31066 return false;
31069 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31070 Return true if we match an efficient implementation. */
31072 static bool
31073 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31074 unsigned char perm0, unsigned char perm1)
31076 rtx x;
31078 /* If both selectors come from the same operand, fold to single op. */
31079 if ((perm0 & 2) == (perm1 & 2))
31081 if (perm0 & 2)
31082 op0 = op1;
31083 else
31084 op1 = op0;
31086 /* If both operands are equal, fold to simpler permutation. */
31087 if (rtx_equal_p (op0, op1))
31089 perm0 = perm0 & 1;
31090 perm1 = (perm1 & 1) + 2;
31092 /* If the first selector comes from the second operand, swap. */
31093 else if (perm0 & 2)
31095 if (perm1 & 2)
31096 return false;
31097 perm0 -= 2;
31098 perm1 += 2;
31099 x = op0, op0 = op1, op1 = x;
31101 /* If the second selector does not come from the second operand, fail. */
31102 else if ((perm1 & 2) == 0)
31103 return false;
31105 /* Success! */
31106 if (target != NULL)
31108 enum machine_mode vmode, dmode;
31109 rtvec v;
31111 vmode = GET_MODE (target);
31112 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31113 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31114 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31115 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31116 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31117 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31119 return true;
31122 bool
31123 rs6000_expand_vec_perm_const (rtx operands[4])
31125 rtx target, op0, op1, sel;
31126 unsigned char perm0, perm1;
31128 target = operands[0];
31129 op0 = operands[1];
31130 op1 = operands[2];
31131 sel = operands[3];
31133 /* Unpack the constant selector. */
31134 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31135 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31137 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31140 /* Test whether a constant permutation is supported. */
31142 static bool
31143 rs6000_vectorize_vec_perm_const_ok (enum machine_mode vmode,
31144 const unsigned char *sel)
31146 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31147 if (TARGET_ALTIVEC)
31148 return true;
31150 /* Check for ps_merge* or evmerge* insns. */
31151 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31152 || (TARGET_SPE && vmode == V2SImode))
31154 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31155 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31156 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31159 return false;
31162 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31164 static void
31165 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31166 enum machine_mode vmode, unsigned nelt, rtx perm[])
31168 enum machine_mode imode;
31169 rtx x;
31171 imode = vmode;
31172 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31174 imode = GET_MODE_INNER (vmode);
31175 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31176 imode = mode_for_vector (imode, nelt);
31179 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31180 x = expand_vec_perm (vmode, op0, op1, x, target);
31181 if (x != target)
31182 emit_move_insn (target, x);
31185 /* Expand an extract even operation. */
31187 void
31188 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31190 enum machine_mode vmode = GET_MODE (target);
31191 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31192 rtx perm[16];
31194 for (i = 0; i < nelt; i++)
31195 perm[i] = GEN_INT (i * 2);
31197 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31200 /* Expand a vector interleave operation. */
31202 void
31203 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31205 enum machine_mode vmode = GET_MODE (target);
31206 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31207 rtx perm[16];
31209 high = (highp ? 0 : nelt / 2);
31210 for (i = 0; i < nelt / 2; i++)
31212 perm[i * 2] = GEN_INT (i + high);
31213 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31216 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31219 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31220 void
31221 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31223 HOST_WIDE_INT hwi_scale (scale);
31224 REAL_VALUE_TYPE r_pow;
31225 rtvec v = rtvec_alloc (2);
31226 rtx elt;
31227 rtx scale_vec = gen_reg_rtx (V2DFmode);
31228 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31229 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31230 RTVEC_ELT (v, 0) = elt;
31231 RTVEC_ELT (v, 1) = elt;
31232 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31233 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31236 /* Return an RTX representing where to find the function value of a
31237 function returning MODE. */
31238 static rtx
31239 rs6000_complex_function_value (enum machine_mode mode)
31241 unsigned int regno;
31242 rtx r1, r2;
31243 enum machine_mode inner = GET_MODE_INNER (mode);
31244 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31246 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31247 regno = FP_ARG_RETURN;
31248 else
31250 regno = GP_ARG_RETURN;
31252 /* 32-bit is OK since it'll go in r3/r4. */
31253 if (TARGET_32BIT && inner_bytes >= 4)
31254 return gen_rtx_REG (mode, regno);
31257 if (inner_bytes >= 8)
31258 return gen_rtx_REG (mode, regno);
31260 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31261 const0_rtx);
31262 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31263 GEN_INT (inner_bytes));
31264 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31267 /* Target hook for TARGET_FUNCTION_VALUE.
31269 On the SPE, both FPs and vectors are returned in r3.
31271 On RS/6000 an integer value is in r3 and a floating-point value is in
31272 fp1, unless -msoft-float. */
31274 static rtx
31275 rs6000_function_value (const_tree valtype,
31276 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31277 bool outgoing ATTRIBUTE_UNUSED)
31279 enum machine_mode mode;
31280 unsigned int regno;
31281 enum machine_mode elt_mode;
31282 int n_elts;
31284 /* Special handling for structs in darwin64. */
31285 if (TARGET_MACHO
31286 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31288 CUMULATIVE_ARGS valcum;
31289 rtx valret;
31291 valcum.words = 0;
31292 valcum.fregno = FP_ARG_MIN_REG;
31293 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31294 /* Do a trial code generation as if this were going to be passed as
31295 an argument; if any part goes in memory, we return NULL. */
31296 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31297 if (valret)
31298 return valret;
31299 /* Otherwise fall through to standard ABI rules. */
31302 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31303 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
31304 &elt_mode, &n_elts))
31306 int first_reg, n_regs, i;
31307 rtx par;
31309 if (SCALAR_FLOAT_MODE_P (elt_mode))
31311 /* _Decimal128 must use even/odd register pairs. */
31312 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31313 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31315 else
31317 first_reg = ALTIVEC_ARG_RETURN;
31318 n_regs = 1;
31321 par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
31322 for (i = 0; i < n_elts; i++)
31324 rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
31325 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31326 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31329 return par;
31332 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
31334 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31335 return gen_rtx_PARALLEL (DImode,
31336 gen_rtvec (2,
31337 gen_rtx_EXPR_LIST (VOIDmode,
31338 gen_rtx_REG (SImode, GP_ARG_RETURN),
31339 const0_rtx),
31340 gen_rtx_EXPR_LIST (VOIDmode,
31341 gen_rtx_REG (SImode,
31342 GP_ARG_RETURN + 1),
31343 GEN_INT (4))));
31345 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
31347 return gen_rtx_PARALLEL (DCmode,
31348 gen_rtvec (4,
31349 gen_rtx_EXPR_LIST (VOIDmode,
31350 gen_rtx_REG (SImode, GP_ARG_RETURN),
31351 const0_rtx),
31352 gen_rtx_EXPR_LIST (VOIDmode,
31353 gen_rtx_REG (SImode,
31354 GP_ARG_RETURN + 1),
31355 GEN_INT (4)),
31356 gen_rtx_EXPR_LIST (VOIDmode,
31357 gen_rtx_REG (SImode,
31358 GP_ARG_RETURN + 2),
31359 GEN_INT (8)),
31360 gen_rtx_EXPR_LIST (VOIDmode,
31361 gen_rtx_REG (SImode,
31362 GP_ARG_RETURN + 3),
31363 GEN_INT (12))));
31366 mode = TYPE_MODE (valtype);
31367 if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
31368 || POINTER_TYPE_P (valtype))
31369 mode = TARGET_32BIT ? SImode : DImode;
31371 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31372 /* _Decimal128 must use an even/odd register pair. */
31373 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31374 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31375 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31376 regno = FP_ARG_RETURN;
31377 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31378 && targetm.calls.split_complex_arg)
31379 return rs6000_complex_function_value (mode);
31380 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31381 return register is used in both cases, and we won't see V2DImode/V2DFmode
31382 for pure altivec, combine the two cases. */
31383 else if (TREE_CODE (valtype) == VECTOR_TYPE
31384 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31385 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31386 regno = ALTIVEC_ARG_RETURN;
31387 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31388 && (mode == DFmode || mode == DCmode
31389 || mode == TFmode || mode == TCmode))
31390 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31391 else
31392 regno = GP_ARG_RETURN;
31394 return gen_rtx_REG (mode, regno);
31397 /* Define how to find the value returned by a library function
31398 assuming the value has mode MODE. */
31400 rs6000_libcall_value (enum machine_mode mode)
31402 unsigned int regno;
31404 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31406 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31407 return gen_rtx_PARALLEL (DImode,
31408 gen_rtvec (2,
31409 gen_rtx_EXPR_LIST (VOIDmode,
31410 gen_rtx_REG (SImode, GP_ARG_RETURN),
31411 const0_rtx),
31412 gen_rtx_EXPR_LIST (VOIDmode,
31413 gen_rtx_REG (SImode,
31414 GP_ARG_RETURN + 1),
31415 GEN_INT (4))));
31418 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31419 /* _Decimal128 must use an even/odd register pair. */
31420 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31421 else if (SCALAR_FLOAT_MODE_P (mode)
31422 && TARGET_HARD_FLOAT && TARGET_FPRS
31423 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31424 regno = FP_ARG_RETURN;
31425 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31426 return register is used in both cases, and we won't see V2DImode/V2DFmode
31427 for pure altivec, combine the two cases. */
31428 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31429 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31430 regno = ALTIVEC_ARG_RETURN;
31431 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31432 return rs6000_complex_function_value (mode);
31433 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31434 && (mode == DFmode || mode == DCmode
31435 || mode == TFmode || mode == TCmode))
31436 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31437 else
31438 regno = GP_ARG_RETURN;
31440 return gen_rtx_REG (mode, regno);
31444 /* Return true if we use LRA instead of reload pass. */
31445 static bool
31446 rs6000_lra_p (void)
31448 return rs6000_lra_flag;
31451 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31452 Frame pointer elimination is automatically handled.
31454 For the RS/6000, if frame pointer elimination is being done, we would like
31455 to convert ap into fp, not sp.
31457 We need r30 if -mminimal-toc was specified, and there are constant pool
31458 references. */
31460 static bool
31461 rs6000_can_eliminate (const int from, const int to)
31463 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31464 ? ! frame_pointer_needed
31465 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31466 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31467 : true);
31470 /* Define the offset between two registers, FROM to be eliminated and its
31471 replacement TO, at the start of a routine. */
31472 HOST_WIDE_INT
31473 rs6000_initial_elimination_offset (int from, int to)
31475 rs6000_stack_t *info = rs6000_stack_info ();
31476 HOST_WIDE_INT offset;
31478 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31479 offset = info->push_p ? 0 : -info->total_size;
31480 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31482 offset = info->push_p ? 0 : -info->total_size;
31483 if (FRAME_GROWS_DOWNWARD)
31484 offset += info->fixed_size + info->vars_size + info->parm_size;
31486 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31487 offset = FRAME_GROWS_DOWNWARD
31488 ? info->fixed_size + info->vars_size + info->parm_size
31489 : 0;
31490 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31491 offset = info->total_size;
31492 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31493 offset = info->push_p ? info->total_size : 0;
31494 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31495 offset = 0;
31496 else
31497 gcc_unreachable ();
31499 return offset;
31502 static rtx
31503 rs6000_dwarf_register_span (rtx reg)
31505 rtx parts[8];
31506 int i, words;
31507 unsigned regno = REGNO (reg);
31508 enum machine_mode mode = GET_MODE (reg);
31510 if (TARGET_SPE
31511 && regno < 32
31512 && (SPE_VECTOR_MODE (GET_MODE (reg))
31513 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31514 && mode != SFmode && mode != SDmode && mode != SCmode)))
31516 else
31517 return NULL_RTX;
31519 regno = REGNO (reg);
31521 /* The duality of the SPE register size wreaks all kinds of havoc.
31522 This is a way of distinguishing r0 in 32-bits from r0 in
31523 64-bits. */
31524 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31525 gcc_assert (words <= 4);
31526 for (i = 0; i < words; i++, regno++)
31528 if (BYTES_BIG_ENDIAN)
31530 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31531 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31533 else
31535 parts[2 * i] = gen_rtx_REG (SImode, regno);
31536 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31540 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31543 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31545 static void
31546 rs6000_init_dwarf_reg_sizes_extra (tree address)
31548 if (TARGET_SPE)
31550 int i;
31551 enum machine_mode mode = TYPE_MODE (char_type_node);
31552 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31553 rtx mem = gen_rtx_MEM (BLKmode, addr);
31554 rtx value = gen_int_mode (4, mode);
31556 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31558 int column = DWARF_REG_TO_UNWIND_COLUMN
31559 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31560 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31562 emit_move_insn (adjust_address (mem, mode, offset), value);
31566 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31568 int i;
31569 enum machine_mode mode = TYPE_MODE (char_type_node);
31570 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31571 rtx mem = gen_rtx_MEM (BLKmode, addr);
31572 rtx value = gen_int_mode (16, mode);
31574 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31575 The unwinder still needs to know the size of Altivec registers. */
31577 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31579 int column = DWARF_REG_TO_UNWIND_COLUMN
31580 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31581 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31583 emit_move_insn (adjust_address (mem, mode, offset), value);
31588 /* Map internal gcc register numbers to debug format register numbers.
31589 FORMAT specifies the type of debug register number to use:
31590 0 -- debug information, except for frame-related sections
31591 1 -- DWARF .debug_frame section
31592 2 -- DWARF .eh_frame section */
31594 unsigned int
31595 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31597 /* We never use the GCC internal number for SPE high registers.
31598 Those are mapped to the 1200..1231 range for all debug formats. */
31599 if (SPE_HIGH_REGNO_P (regno))
31600 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31602 /* Except for the above, we use the internal number for non-DWARF
31603 debug information, and also for .eh_frame. */
31604 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31605 return regno;
31607 /* On some platforms, we use the standard DWARF register
31608 numbering for .debug_info and .debug_frame. */
31609 #ifdef RS6000_USE_DWARF_NUMBERING
31610 if (regno <= 63)
31611 return regno;
31612 if (regno == LR_REGNO)
31613 return 108;
31614 if (regno == CTR_REGNO)
31615 return 109;
31616 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
31617 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
31618 The actual code emitted saves the whole of CR, so we map CR2_REGNO
31619 to the DWARF reg for CR. */
31620 if (format == 1 && regno == CR2_REGNO)
31621 return 64;
31622 if (CR_REGNO_P (regno))
31623 return regno - CR0_REGNO + 86;
31624 if (regno == CA_REGNO)
31625 return 101; /* XER */
31626 if (ALTIVEC_REGNO_P (regno))
31627 return regno - FIRST_ALTIVEC_REGNO + 1124;
31628 if (regno == VRSAVE_REGNO)
31629 return 356;
31630 if (regno == VSCR_REGNO)
31631 return 67;
31632 if (regno == SPE_ACC_REGNO)
31633 return 99;
31634 if (regno == SPEFSCR_REGNO)
31635 return 612;
31636 #endif
31637 return regno;
31640 /* target hook eh_return_filter_mode */
31641 static enum machine_mode
31642 rs6000_eh_return_filter_mode (void)
31644 return TARGET_32BIT ? SImode : word_mode;
31647 /* Target hook for scalar_mode_supported_p. */
31648 static bool
31649 rs6000_scalar_mode_supported_p (enum machine_mode mode)
31651 if (DECIMAL_FLOAT_MODE_P (mode))
31652 return default_decimal_float_supported_p ();
31653 else
31654 return default_scalar_mode_supported_p (mode);
31657 /* Target hook for vector_mode_supported_p. */
31658 static bool
31659 rs6000_vector_mode_supported_p (enum machine_mode mode)
31662 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
31663 return true;
31665 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
31666 return true;
31668 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
31669 return true;
31671 else
31672 return false;
31675 /* Target hook for invalid_arg_for_unprototyped_fn. */
31676 static const char *
31677 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
31679 return (!rs6000_darwin64_abi
31680 && typelist == 0
31681 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
31682 && (funcdecl == NULL_TREE
31683 || (TREE_CODE (funcdecl) == FUNCTION_DECL
31684 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
31685 ? N_("AltiVec argument passed to unprototyped function")
31686 : NULL;
31689 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
31690 setup by using __stack_chk_fail_local hidden function instead of
31691 calling __stack_chk_fail directly. Otherwise it is better to call
31692 __stack_chk_fail directly. */
31694 static tree ATTRIBUTE_UNUSED
31695 rs6000_stack_protect_fail (void)
31697 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
31698 ? default_hidden_stack_protect_fail ()
31699 : default_external_stack_protect_fail ();
31702 void
31703 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
31704 int num_operands ATTRIBUTE_UNUSED)
31706 if (rs6000_warn_cell_microcode)
31708 const char *temp;
31709 int insn_code_number = recog_memoized (insn);
31710 location_t location = INSN_LOCATION (insn);
31712 /* Punt on insns we cannot recognize. */
31713 if (insn_code_number < 0)
31714 return;
31716 temp = get_insn_template (insn_code_number, insn);
31718 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
31719 warning_at (location, OPT_mwarn_cell_microcode,
31720 "emitting microcode insn %s\t[%s] #%d",
31721 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31722 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
31723 warning_at (location, OPT_mwarn_cell_microcode,
31724 "emitting conditional microcode insn %s\t[%s] #%d",
31725 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31729 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31731 #if TARGET_ELF
31732 static unsigned HOST_WIDE_INT
31733 rs6000_asan_shadow_offset (void)
31735 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
31737 #endif
31739 /* Mask options that we want to support inside of attribute((target)) and
31740 #pragma GCC target operations. Note, we do not include things like
31741 64/32-bit, endianess, hard/soft floating point, etc. that would have
31742 different calling sequences. */
31744 struct rs6000_opt_mask {
31745 const char *name; /* option name */
31746 HOST_WIDE_INT mask; /* mask to set */
31747 bool invert; /* invert sense of mask */
31748 bool valid_target; /* option is a target option */
31751 static struct rs6000_opt_mask const rs6000_opt_masks[] =
31753 { "altivec", OPTION_MASK_ALTIVEC, false, true },
31754 { "cmpb", OPTION_MASK_CMPB, false, true },
31755 { "crypto", OPTION_MASK_CRYPTO, false, true },
31756 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
31757 { "dlmzb", OPTION_MASK_DLMZB, false, true },
31758 { "fprnd", OPTION_MASK_FPRND, false, true },
31759 { "hard-dfp", OPTION_MASK_DFP, false, true },
31760 { "htm", OPTION_MASK_HTM, false, true },
31761 { "isel", OPTION_MASK_ISEL, false, true },
31762 { "mfcrf", OPTION_MASK_MFCRF, false, true },
31763 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
31764 { "mulhw", OPTION_MASK_MULHW, false, true },
31765 { "multiple", OPTION_MASK_MULTIPLE, false, true },
31766 { "popcntb", OPTION_MASK_POPCNTB, false, true },
31767 { "popcntd", OPTION_MASK_POPCNTD, false, true },
31768 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
31769 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
31770 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
31771 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
31772 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
31773 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
31774 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
31775 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
31776 { "string", OPTION_MASK_STRING, false, true },
31777 { "update", OPTION_MASK_NO_UPDATE, true , true },
31778 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
31779 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
31780 { "vsx", OPTION_MASK_VSX, false, true },
31781 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
31782 #ifdef OPTION_MASK_64BIT
31783 #if TARGET_AIX_OS
31784 { "aix64", OPTION_MASK_64BIT, false, false },
31785 { "aix32", OPTION_MASK_64BIT, true, false },
31786 #else
31787 { "64", OPTION_MASK_64BIT, false, false },
31788 { "32", OPTION_MASK_64BIT, true, false },
31789 #endif
31790 #endif
31791 #ifdef OPTION_MASK_EABI
31792 { "eabi", OPTION_MASK_EABI, false, false },
31793 #endif
31794 #ifdef OPTION_MASK_LITTLE_ENDIAN
31795 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
31796 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
31797 #endif
31798 #ifdef OPTION_MASK_RELOCATABLE
31799 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
31800 #endif
31801 #ifdef OPTION_MASK_STRICT_ALIGN
31802 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
31803 #endif
31804 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
31805 { "string", OPTION_MASK_STRING, false, false },
31808 /* Builtin mask mapping for printing the flags. */
31809 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
31811 { "altivec", RS6000_BTM_ALTIVEC, false, false },
31812 { "vsx", RS6000_BTM_VSX, false, false },
31813 { "spe", RS6000_BTM_SPE, false, false },
31814 { "paired", RS6000_BTM_PAIRED, false, false },
31815 { "fre", RS6000_BTM_FRE, false, false },
31816 { "fres", RS6000_BTM_FRES, false, false },
31817 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
31818 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
31819 { "popcntd", RS6000_BTM_POPCNTD, false, false },
31820 { "cell", RS6000_BTM_CELL, false, false },
31821 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
31822 { "crypto", RS6000_BTM_CRYPTO, false, false },
31823 { "htm", RS6000_BTM_HTM, false, false },
31824 { "hard-dfp", RS6000_BTM_DFP, false, false },
31825 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
31826 { "long-double-128", RS6000_BTM_LDBL128, false, false },
31829 /* Option variables that we want to support inside attribute((target)) and
31830 #pragma GCC target operations. */
31832 struct rs6000_opt_var {
31833 const char *name; /* option name */
31834 size_t global_offset; /* offset of the option in global_options. */
31835 size_t target_offset; /* offset of the option in target optiosn. */
31838 static struct rs6000_opt_var const rs6000_opt_vars[] =
31840 { "friz",
31841 offsetof (struct gcc_options, x_TARGET_FRIZ),
31842 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
31843 { "avoid-indexed-addresses",
31844 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
31845 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
31846 { "paired",
31847 offsetof (struct gcc_options, x_rs6000_paired_float),
31848 offsetof (struct cl_target_option, x_rs6000_paired_float), },
31849 { "longcall",
31850 offsetof (struct gcc_options, x_rs6000_default_long_calls),
31851 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
31854 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
31855 parsing. Return true if there were no errors. */
31857 static bool
31858 rs6000_inner_target_options (tree args, bool attr_p)
31860 bool ret = true;
31862 if (args == NULL_TREE)
31865 else if (TREE_CODE (args) == STRING_CST)
31867 char *p = ASTRDUP (TREE_STRING_POINTER (args));
31868 char *q;
31870 while ((q = strtok (p, ",")) != NULL)
31872 bool error_p = false;
31873 bool not_valid_p = false;
31874 const char *cpu_opt = NULL;
31876 p = NULL;
31877 if (strncmp (q, "cpu=", 4) == 0)
31879 int cpu_index = rs6000_cpu_name_lookup (q+4);
31880 if (cpu_index >= 0)
31881 rs6000_cpu_index = cpu_index;
31882 else
31884 error_p = true;
31885 cpu_opt = q+4;
31888 else if (strncmp (q, "tune=", 5) == 0)
31890 int tune_index = rs6000_cpu_name_lookup (q+5);
31891 if (tune_index >= 0)
31892 rs6000_tune_index = tune_index;
31893 else
31895 error_p = true;
31896 cpu_opt = q+5;
31899 else
31901 size_t i;
31902 bool invert = false;
31903 char *r = q;
31905 error_p = true;
31906 if (strncmp (r, "no-", 3) == 0)
31908 invert = true;
31909 r += 3;
31912 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
31913 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
31915 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
31917 if (!rs6000_opt_masks[i].valid_target)
31918 not_valid_p = true;
31919 else
31921 error_p = false;
31922 rs6000_isa_flags_explicit |= mask;
31924 /* VSX needs altivec, so -mvsx automagically sets
31925 altivec. */
31926 if (mask == OPTION_MASK_VSX && !invert)
31927 mask |= OPTION_MASK_ALTIVEC;
31929 if (rs6000_opt_masks[i].invert)
31930 invert = !invert;
31932 if (invert)
31933 rs6000_isa_flags &= ~mask;
31934 else
31935 rs6000_isa_flags |= mask;
31937 break;
31940 if (error_p && !not_valid_p)
31942 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
31943 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
31945 size_t j = rs6000_opt_vars[i].global_offset;
31946 *((int *) ((char *)&global_options + j)) = !invert;
31947 error_p = false;
31948 break;
31953 if (error_p)
31955 const char *eprefix, *esuffix;
31957 ret = false;
31958 if (attr_p)
31960 eprefix = "__attribute__((__target__(";
31961 esuffix = ")))";
31963 else
31965 eprefix = "#pragma GCC target ";
31966 esuffix = "";
31969 if (cpu_opt)
31970 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
31971 q, esuffix);
31972 else if (not_valid_p)
31973 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
31974 else
31975 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
31980 else if (TREE_CODE (args) == TREE_LIST)
31984 tree value = TREE_VALUE (args);
31985 if (value)
31987 bool ret2 = rs6000_inner_target_options (value, attr_p);
31988 if (!ret2)
31989 ret = false;
31991 args = TREE_CHAIN (args);
31993 while (args != NULL_TREE);
31996 else
31997 gcc_unreachable ();
31999 return ret;
32002 /* Print out the target options as a list for -mdebug=target. */
32004 static void
32005 rs6000_debug_target_options (tree args, const char *prefix)
32007 if (args == NULL_TREE)
32008 fprintf (stderr, "%s<NULL>", prefix);
32010 else if (TREE_CODE (args) == STRING_CST)
32012 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32013 char *q;
32015 while ((q = strtok (p, ",")) != NULL)
32017 p = NULL;
32018 fprintf (stderr, "%s\"%s\"", prefix, q);
32019 prefix = ", ";
32023 else if (TREE_CODE (args) == TREE_LIST)
32027 tree value = TREE_VALUE (args);
32028 if (value)
32030 rs6000_debug_target_options (value, prefix);
32031 prefix = ", ";
32033 args = TREE_CHAIN (args);
32035 while (args != NULL_TREE);
32038 else
32039 gcc_unreachable ();
32041 return;
32045 /* Hook to validate attribute((target("..."))). */
32047 static bool
32048 rs6000_valid_attribute_p (tree fndecl,
32049 tree ARG_UNUSED (name),
32050 tree args,
32051 int flags)
32053 struct cl_target_option cur_target;
32054 bool ret;
32055 tree old_optimize = build_optimization_node (&global_options);
32056 tree new_target, new_optimize;
32057 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32059 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32061 if (TARGET_DEBUG_TARGET)
32063 tree tname = DECL_NAME (fndecl);
32064 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32065 if (tname)
32066 fprintf (stderr, "function: %.*s\n",
32067 (int) IDENTIFIER_LENGTH (tname),
32068 IDENTIFIER_POINTER (tname));
32069 else
32070 fprintf (stderr, "function: unknown\n");
32072 fprintf (stderr, "args:");
32073 rs6000_debug_target_options (args, " ");
32074 fprintf (stderr, "\n");
32076 if (flags)
32077 fprintf (stderr, "flags: 0x%x\n", flags);
32079 fprintf (stderr, "--------------------\n");
32082 old_optimize = build_optimization_node (&global_options);
32083 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32085 /* If the function changed the optimization levels as well as setting target
32086 options, start with the optimizations specified. */
32087 if (func_optimize && func_optimize != old_optimize)
32088 cl_optimization_restore (&global_options,
32089 TREE_OPTIMIZATION (func_optimize));
32091 /* The target attributes may also change some optimization flags, so update
32092 the optimization options if necessary. */
32093 cl_target_option_save (&cur_target, &global_options);
32094 rs6000_cpu_index = rs6000_tune_index = -1;
32095 ret = rs6000_inner_target_options (args, true);
32097 /* Set up any additional state. */
32098 if (ret)
32100 ret = rs6000_option_override_internal (false);
32101 new_target = build_target_option_node (&global_options);
32103 else
32104 new_target = NULL;
32106 new_optimize = build_optimization_node (&global_options);
32108 if (!new_target)
32109 ret = false;
32111 else if (fndecl)
32113 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32115 if (old_optimize != new_optimize)
32116 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32119 cl_target_option_restore (&global_options, &cur_target);
32121 if (old_optimize != new_optimize)
32122 cl_optimization_restore (&global_options,
32123 TREE_OPTIMIZATION (old_optimize));
32125 return ret;
32129 /* Hook to validate the current #pragma GCC target and set the state, and
32130 update the macros based on what was changed. If ARGS is NULL, then
32131 POP_TARGET is used to reset the options. */
32133 bool
32134 rs6000_pragma_target_parse (tree args, tree pop_target)
32136 tree prev_tree = build_target_option_node (&global_options);
32137 tree cur_tree;
32138 struct cl_target_option *prev_opt, *cur_opt;
32139 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32140 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32142 if (TARGET_DEBUG_TARGET)
32144 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32145 fprintf (stderr, "args:");
32146 rs6000_debug_target_options (args, " ");
32147 fprintf (stderr, "\n");
32149 if (pop_target)
32151 fprintf (stderr, "pop_target:\n");
32152 debug_tree (pop_target);
32154 else
32155 fprintf (stderr, "pop_target: <NULL>\n");
32157 fprintf (stderr, "--------------------\n");
32160 if (! args)
32162 cur_tree = ((pop_target)
32163 ? pop_target
32164 : target_option_default_node);
32165 cl_target_option_restore (&global_options,
32166 TREE_TARGET_OPTION (cur_tree));
32168 else
32170 rs6000_cpu_index = rs6000_tune_index = -1;
32171 if (!rs6000_inner_target_options (args, false)
32172 || !rs6000_option_override_internal (false)
32173 || (cur_tree = build_target_option_node (&global_options))
32174 == NULL_TREE)
32176 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32177 fprintf (stderr, "invalid pragma\n");
32179 return false;
32183 target_option_current_node = cur_tree;
32185 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32186 change the macros that are defined. */
32187 if (rs6000_target_modify_macros_ptr)
32189 prev_opt = TREE_TARGET_OPTION (prev_tree);
32190 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32191 prev_flags = prev_opt->x_rs6000_isa_flags;
32193 cur_opt = TREE_TARGET_OPTION (cur_tree);
32194 cur_flags = cur_opt->x_rs6000_isa_flags;
32195 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32197 diff_bumask = (prev_bumask ^ cur_bumask);
32198 diff_flags = (prev_flags ^ cur_flags);
32200 if ((diff_flags != 0) || (diff_bumask != 0))
32202 /* Delete old macros. */
32203 rs6000_target_modify_macros_ptr (false,
32204 prev_flags & diff_flags,
32205 prev_bumask & diff_bumask);
32207 /* Define new macros. */
32208 rs6000_target_modify_macros_ptr (true,
32209 cur_flags & diff_flags,
32210 cur_bumask & diff_bumask);
32214 return true;
32218 /* Remember the last target of rs6000_set_current_function. */
32219 static GTY(()) tree rs6000_previous_fndecl;
32221 /* Establish appropriate back-end context for processing the function
32222 FNDECL. The argument might be NULL to indicate processing at top
32223 level, outside of any function scope. */
32224 static void
32225 rs6000_set_current_function (tree fndecl)
32227 tree old_tree = (rs6000_previous_fndecl
32228 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32229 : NULL_TREE);
32231 tree new_tree = (fndecl
32232 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32233 : NULL_TREE);
32235 if (TARGET_DEBUG_TARGET)
32237 bool print_final = false;
32238 fprintf (stderr, "\n==================== rs6000_set_current_function");
32240 if (fndecl)
32241 fprintf (stderr, ", fndecl %s (%p)",
32242 (DECL_NAME (fndecl)
32243 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32244 : "<unknown>"), (void *)fndecl);
32246 if (rs6000_previous_fndecl)
32247 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32249 fprintf (stderr, "\n");
32250 if (new_tree)
32252 fprintf (stderr, "\nnew fndecl target specific options:\n");
32253 debug_tree (new_tree);
32254 print_final = true;
32257 if (old_tree)
32259 fprintf (stderr, "\nold fndecl target specific options:\n");
32260 debug_tree (old_tree);
32261 print_final = true;
32264 if (print_final)
32265 fprintf (stderr, "--------------------\n");
32268 /* Only change the context if the function changes. This hook is called
32269 several times in the course of compiling a function, and we don't want to
32270 slow things down too much or call target_reinit when it isn't safe. */
32271 if (fndecl && fndecl != rs6000_previous_fndecl)
32273 rs6000_previous_fndecl = fndecl;
32274 if (old_tree == new_tree)
32277 else if (new_tree)
32279 cl_target_option_restore (&global_options,
32280 TREE_TARGET_OPTION (new_tree));
32281 if (TREE_TARGET_GLOBALS (new_tree))
32282 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32283 else
32284 TREE_TARGET_GLOBALS (new_tree)
32285 = save_target_globals_default_opts ();
32288 else if (old_tree)
32290 new_tree = target_option_current_node;
32291 cl_target_option_restore (&global_options,
32292 TREE_TARGET_OPTION (new_tree));
32293 if (TREE_TARGET_GLOBALS (new_tree))
32294 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32295 else if (new_tree == target_option_default_node)
32296 restore_target_globals (&default_target_globals);
32297 else
32298 TREE_TARGET_GLOBALS (new_tree)
32299 = save_target_globals_default_opts ();
32305 /* Save the current options */
32307 static void
32308 rs6000_function_specific_save (struct cl_target_option *ptr,
32309 struct gcc_options *opts)
32311 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32312 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32315 /* Restore the current options */
32317 static void
32318 rs6000_function_specific_restore (struct gcc_options *opts,
32319 struct cl_target_option *ptr)
32322 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32323 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32324 (void) rs6000_option_override_internal (false);
32327 /* Print the current options */
32329 static void
32330 rs6000_function_specific_print (FILE *file, int indent,
32331 struct cl_target_option *ptr)
32333 rs6000_print_isa_options (file, indent, "Isa options set",
32334 ptr->x_rs6000_isa_flags);
32336 rs6000_print_isa_options (file, indent, "Isa options explicit",
32337 ptr->x_rs6000_isa_flags_explicit);
32340 /* Helper function to print the current isa or misc options on a line. */
32342 static void
32343 rs6000_print_options_internal (FILE *file,
32344 int indent,
32345 const char *string,
32346 HOST_WIDE_INT flags,
32347 const char *prefix,
32348 const struct rs6000_opt_mask *opts,
32349 size_t num_elements)
32351 size_t i;
32352 size_t start_column = 0;
32353 size_t cur_column;
32354 size_t max_column = 76;
32355 const char *comma = "";
32357 if (indent)
32358 start_column += fprintf (file, "%*s", indent, "");
32360 if (!flags)
32362 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32363 return;
32366 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32368 /* Print the various mask options. */
32369 cur_column = start_column;
32370 for (i = 0; i < num_elements; i++)
32372 if ((flags & opts[i].mask) != 0)
32374 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32375 size_t len = (strlen (comma)
32376 + strlen (prefix)
32377 + strlen (no_str)
32378 + strlen (rs6000_opt_masks[i].name));
32380 cur_column += len;
32381 if (cur_column > max_column)
32383 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32384 cur_column = start_column + len;
32385 comma = "";
32388 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32389 rs6000_opt_masks[i].name);
32390 flags &= ~ opts[i].mask;
32391 comma = ", ";
32395 fputs ("\n", file);
32398 /* Helper function to print the current isa options on a line. */
32400 static void
32401 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32402 HOST_WIDE_INT flags)
32404 rs6000_print_options_internal (file, indent, string, flags, "-m",
32405 &rs6000_opt_masks[0],
32406 ARRAY_SIZE (rs6000_opt_masks));
32409 static void
32410 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32411 HOST_WIDE_INT flags)
32413 rs6000_print_options_internal (file, indent, string, flags, "",
32414 &rs6000_builtin_mask_names[0],
32415 ARRAY_SIZE (rs6000_builtin_mask_names));
32419 /* Hook to determine if one function can safely inline another. */
32421 static bool
32422 rs6000_can_inline_p (tree caller, tree callee)
32424 bool ret = false;
32425 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32426 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32428 /* If callee has no option attributes, then it is ok to inline. */
32429 if (!callee_tree)
32430 ret = true;
32432 /* If caller has no option attributes, but callee does then it is not ok to
32433 inline. */
32434 else if (!caller_tree)
32435 ret = false;
32437 else
32439 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32440 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32442 /* Callee's options should a subset of the caller's, i.e. a vsx function
32443 can inline an altivec function but a non-vsx function can't inline a
32444 vsx function. */
32445 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32446 == callee_opts->x_rs6000_isa_flags)
32447 ret = true;
32450 if (TARGET_DEBUG_TARGET)
32451 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32452 (DECL_NAME (caller)
32453 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32454 : "<unknown>"),
32455 (DECL_NAME (callee)
32456 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32457 : "<unknown>"),
32458 (ret ? "can" : "cannot"));
32460 return ret;
32463 /* Allocate a stack temp and fixup the address so it meets the particular
32464 memory requirements (either offetable or REG+REG addressing). */
32467 rs6000_allocate_stack_temp (enum machine_mode mode,
32468 bool offsettable_p,
32469 bool reg_reg_p)
32471 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32472 rtx addr = XEXP (stack, 0);
32473 int strict_p = (reload_in_progress || reload_completed);
32475 if (!legitimate_indirect_address_p (addr, strict_p))
32477 if (offsettable_p
32478 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32479 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32481 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32482 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32485 return stack;
32488 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32489 to such a form to deal with memory reference instructions like STFIWX that
32490 only take reg+reg addressing. */
32493 rs6000_address_for_fpconvert (rtx x)
32495 int strict_p = (reload_in_progress || reload_completed);
32496 rtx addr;
32498 gcc_assert (MEM_P (x));
32499 addr = XEXP (x, 0);
32500 if (! legitimate_indirect_address_p (addr, strict_p)
32501 && ! legitimate_indexed_address_p (addr, strict_p))
32503 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32505 rtx reg = XEXP (addr, 0);
32506 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32507 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32508 gcc_assert (REG_P (reg));
32509 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32510 addr = reg;
32512 else if (GET_CODE (addr) == PRE_MODIFY)
32514 rtx reg = XEXP (addr, 0);
32515 rtx expr = XEXP (addr, 1);
32516 gcc_assert (REG_P (reg));
32517 gcc_assert (GET_CODE (expr) == PLUS);
32518 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32519 addr = reg;
32522 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32525 return x;
32528 /* Given a memory reference, if it is not in the form for altivec memory
32529 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32530 convert to the altivec format. */
32533 rs6000_address_for_altivec (rtx x)
32535 gcc_assert (MEM_P (x));
32536 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32538 rtx addr = XEXP (x, 0);
32539 int strict_p = (reload_in_progress || reload_completed);
32541 if (!legitimate_indexed_address_p (addr, strict_p)
32542 && !legitimate_indirect_address_p (addr, strict_p))
32543 addr = copy_to_mode_reg (Pmode, addr);
32545 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32546 x = change_address (x, GET_MODE (x), addr);
32549 return x;
32552 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32554 On the RS/6000, all integer constants are acceptable, most won't be valid
32555 for particular insns, though. Only easy FP constants are acceptable. */
32557 static bool
32558 rs6000_legitimate_constant_p (enum machine_mode mode, rtx x)
32560 if (TARGET_ELF && tls_referenced_p (x))
32561 return false;
32563 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32564 || GET_MODE (x) == VOIDmode
32565 || (TARGET_POWERPC64 && mode == DImode)
32566 || easy_fp_constant (x, mode)
32567 || easy_vector_constant (x, mode));
32572 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32574 void
32575 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32577 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32578 rtx toc_load = NULL_RTX;
32579 rtx toc_restore = NULL_RTX;
32580 rtx func_addr;
32581 rtx abi_reg = NULL_RTX;
32582 rtx call[4];
32583 int n_call;
32584 rtx insn;
32586 /* Handle longcall attributes. */
32587 if (INTVAL (cookie) & CALL_LONG)
32588 func_desc = rs6000_longcall_ref (func_desc);
32590 /* Handle indirect calls. */
32591 if (GET_CODE (func_desc) != SYMBOL_REF
32592 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32594 /* Save the TOC into its reserved slot before the call,
32595 and prepare to restore it after the call. */
32596 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32597 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32598 rtx stack_toc_mem = gen_frame_mem (Pmode,
32599 gen_rtx_PLUS (Pmode, stack_ptr,
32600 stack_toc_offset));
32601 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
32603 /* Can we optimize saving the TOC in the prologue or
32604 do we need to do it at every call? */
32605 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32606 cfun->machine->save_toc_in_prologue = true;
32607 else
32609 MEM_VOLATILE_P (stack_toc_mem) = 1;
32610 emit_move_insn (stack_toc_mem, toc_reg);
32613 if (DEFAULT_ABI == ABI_ELFv2)
32615 /* A function pointer in the ELFv2 ABI is just a plain address, but
32616 the ABI requires it to be loaded into r12 before the call. */
32617 func_addr = gen_rtx_REG (Pmode, 12);
32618 emit_move_insn (func_addr, func_desc);
32619 abi_reg = func_addr;
32621 else
32623 /* A function pointer under AIX is a pointer to a data area whose
32624 first word contains the actual address of the function, whose
32625 second word contains a pointer to its TOC, and whose third word
32626 contains a value to place in the static chain register (r11).
32627 Note that if we load the static chain, our "trampoline" need
32628 not have any executable code. */
32630 /* Load up address of the actual function. */
32631 func_desc = force_reg (Pmode, func_desc);
32632 func_addr = gen_reg_rtx (Pmode);
32633 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
32635 /* Prepare to load the TOC of the called function. Note that the
32636 TOC load must happen immediately before the actual call so
32637 that unwinding the TOC registers works correctly. See the
32638 comment in frob_update_context. */
32639 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32640 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32641 gen_rtx_PLUS (Pmode, func_desc,
32642 func_toc_offset));
32643 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32645 /* If we have a static chain, load it up. */
32646 if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32648 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32649 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32650 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32651 gen_rtx_PLUS (Pmode, func_desc,
32652 func_sc_offset));
32653 emit_move_insn (sc_reg, func_sc_mem);
32654 abi_reg = sc_reg;
32658 else
32660 /* Direct calls use the TOC: for local calls, the callee will
32661 assume the TOC register is set; for non-local calls, the
32662 PLT stub needs the TOC register. */
32663 abi_reg = toc_reg;
32664 func_addr = func_desc;
32667 /* Create the call. */
32668 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
32669 if (value != NULL_RTX)
32670 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32671 n_call = 1;
32673 if (toc_load)
32674 call[n_call++] = toc_load;
32675 if (toc_restore)
32676 call[n_call++] = toc_restore;
32678 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
32680 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32681 insn = emit_call_insn (insn);
32683 /* Mention all registers defined by the ABI to hold information
32684 as uses in CALL_INSN_FUNCTION_USAGE. */
32685 if (abi_reg)
32686 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32689 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32691 void
32692 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32694 rtx call[2];
32695 rtx insn;
32697 gcc_assert (INTVAL (cookie) == 0);
32699 /* Create the call. */
32700 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
32701 if (value != NULL_RTX)
32702 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32704 call[1] = simple_return_rtx;
32706 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32707 insn = emit_call_insn (insn);
32709 /* Note use of the TOC register. */
32710 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
32711 /* We need to also mark a use of the link register since the function we
32712 sibling-call to will use it to return to our caller. */
32713 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
32716 /* Return whether we need to always update the saved TOC pointer when we update
32717 the stack pointer. */
32719 static bool
32720 rs6000_save_toc_in_prologue_p (void)
32722 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
32725 #ifdef HAVE_GAS_HIDDEN
32726 # define USE_HIDDEN_LINKONCE 1
32727 #else
32728 # define USE_HIDDEN_LINKONCE 0
32729 #endif
32731 /* Fills in the label name that should be used for a 476 link stack thunk. */
32733 void
32734 get_ppc476_thunk_name (char name[32])
32736 gcc_assert (TARGET_LINK_STACK);
32738 if (USE_HIDDEN_LINKONCE)
32739 sprintf (name, "__ppc476.get_thunk");
32740 else
32741 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
32744 /* This function emits the simple thunk routine that is used to preserve
32745 the link stack on the 476 cpu. */
32747 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
32748 static void
32749 rs6000_code_end (void)
32751 char name[32];
32752 tree decl;
32754 if (!TARGET_LINK_STACK)
32755 return;
32757 get_ppc476_thunk_name (name);
32759 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
32760 build_function_type_list (void_type_node, NULL_TREE));
32761 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
32762 NULL_TREE, void_type_node);
32763 TREE_PUBLIC (decl) = 1;
32764 TREE_STATIC (decl) = 1;
32766 #if RS6000_WEAK
32767 if (USE_HIDDEN_LINKONCE)
32769 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
32770 targetm.asm_out.unique_section (decl, 0);
32771 switch_to_section (get_named_section (decl, NULL, 0));
32772 DECL_WEAK (decl) = 1;
32773 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
32774 targetm.asm_out.globalize_label (asm_out_file, name);
32775 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
32776 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
32778 else
32779 #endif
32781 switch_to_section (text_section);
32782 ASM_OUTPUT_LABEL (asm_out_file, name);
32785 DECL_INITIAL (decl) = make_node (BLOCK);
32786 current_function_decl = decl;
32787 init_function_start (decl);
32788 first_function_block_is_cold = false;
32789 /* Make sure unwind info is emitted for the thunk if needed. */
32790 final_start_function (emit_barrier (), asm_out_file, 1);
32792 fputs ("\tblr\n", asm_out_file);
32794 final_end_function ();
32795 init_insn_lengths ();
32796 free_after_compilation (cfun);
32797 set_cfun (NULL);
32798 current_function_decl = NULL;
32801 /* Add r30 to hard reg set if the prologue sets it up and it is not
32802 pic_offset_table_rtx. */
32804 static void
32805 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
32807 if (!TARGET_SINGLE_PIC_BASE
32808 && TARGET_TOC
32809 && TARGET_MINIMAL_TOC
32810 && get_pool_size () != 0)
32811 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
32815 /* Helper function for rs6000_split_logical to emit a logical instruction after
32816 spliting the operation to single GPR registers.
32818 DEST is the destination register.
32819 OP1 and OP2 are the input source registers.
32820 CODE is the base operation (AND, IOR, XOR, NOT).
32821 MODE is the machine mode.
32822 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32823 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32824 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
32826 static void
32827 rs6000_split_logical_inner (rtx dest,
32828 rtx op1,
32829 rtx op2,
32830 enum rtx_code code,
32831 enum machine_mode mode,
32832 bool complement_final_p,
32833 bool complement_op1_p,
32834 bool complement_op2_p)
32836 rtx bool_rtx;
32838 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
32839 if (op2 && GET_CODE (op2) == CONST_INT
32840 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
32841 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32843 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
32844 HOST_WIDE_INT value = INTVAL (op2) & mask;
32846 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
32847 if (code == AND)
32849 if (value == 0)
32851 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
32852 return;
32855 else if (value == mask)
32857 if (!rtx_equal_p (dest, op1))
32858 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
32859 return;
32863 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
32864 into separate ORI/ORIS or XORI/XORIS instrucitons. */
32865 else if (code == IOR || code == XOR)
32867 if (value == 0)
32869 if (!rtx_equal_p (dest, op1))
32870 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
32871 return;
32876 if (code == AND && mode == SImode
32877 && !complement_final_p && !complement_op1_p && !complement_op2_p)
32879 emit_insn (gen_andsi3 (dest, op1, op2));
32880 return;
32883 if (complement_op1_p)
32884 op1 = gen_rtx_NOT (mode, op1);
32886 if (complement_op2_p)
32887 op2 = gen_rtx_NOT (mode, op2);
32889 bool_rtx = ((code == NOT)
32890 ? gen_rtx_NOT (mode, op1)
32891 : gen_rtx_fmt_ee (code, mode, op1, op2));
32893 if (complement_final_p)
32894 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
32896 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
32899 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
32900 operations are split immediately during RTL generation to allow for more
32901 optimizations of the AND/IOR/XOR.
32903 OPERANDS is an array containing the destination and two input operands.
32904 CODE is the base operation (AND, IOR, XOR, NOT).
32905 MODE is the machine mode.
32906 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
32907 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
32908 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
32909 CLOBBER_REG is either NULL or a scratch register of type CC to allow
32910 formation of the AND instructions. */
32912 static void
32913 rs6000_split_logical_di (rtx operands[3],
32914 enum rtx_code code,
32915 bool complement_final_p,
32916 bool complement_op1_p,
32917 bool complement_op2_p)
32919 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
32920 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
32921 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
32922 enum hi_lo { hi = 0, lo = 1 };
32923 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
32924 size_t i;
32926 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
32927 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
32928 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
32929 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
32931 if (code == NOT)
32932 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
32933 else
32935 if (GET_CODE (operands[2]) != CONST_INT)
32937 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
32938 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
32940 else
32942 HOST_WIDE_INT value = INTVAL (operands[2]);
32943 HOST_WIDE_INT value_hi_lo[2];
32945 gcc_assert (!complement_final_p);
32946 gcc_assert (!complement_op1_p);
32947 gcc_assert (!complement_op2_p);
32949 value_hi_lo[hi] = value >> 32;
32950 value_hi_lo[lo] = value & lower_32bits;
32952 for (i = 0; i < 2; i++)
32954 HOST_WIDE_INT sub_value = value_hi_lo[i];
32956 if (sub_value & sign_bit)
32957 sub_value |= upper_32bits;
32959 op2_hi_lo[i] = GEN_INT (sub_value);
32961 /* If this is an AND instruction, check to see if we need to load
32962 the value in a register. */
32963 if (code == AND && sub_value != -1 && sub_value != 0
32964 && !and_operand (op2_hi_lo[i], SImode))
32965 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
32970 for (i = 0; i < 2; i++)
32972 /* Split large IOR/XOR operations. */
32973 if ((code == IOR || code == XOR)
32974 && GET_CODE (op2_hi_lo[i]) == CONST_INT
32975 && !complement_final_p
32976 && !complement_op1_p
32977 && !complement_op2_p
32978 && !logical_const_operand (op2_hi_lo[i], SImode))
32980 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
32981 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
32982 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
32983 rtx tmp = gen_reg_rtx (SImode);
32985 /* Make sure the constant is sign extended. */
32986 if ((hi_16bits & sign_bit) != 0)
32987 hi_16bits |= upper_32bits;
32989 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
32990 code, SImode, false, false, false);
32992 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
32993 code, SImode, false, false, false);
32995 else
32996 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
32997 code, SImode, complement_final_p,
32998 complement_op1_p, complement_op2_p);
33001 return;
33004 /* Split the insns that make up boolean operations operating on multiple GPR
33005 registers. The boolean MD patterns ensure that the inputs either are
33006 exactly the same as the output registers, or there is no overlap.
33008 OPERANDS is an array containing the destination and two input operands.
33009 CODE is the base operation (AND, IOR, XOR, NOT).
33010 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33011 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33012 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33014 void
33015 rs6000_split_logical (rtx operands[3],
33016 enum rtx_code code,
33017 bool complement_final_p,
33018 bool complement_op1_p,
33019 bool complement_op2_p)
33021 enum machine_mode mode = GET_MODE (operands[0]);
33022 enum machine_mode sub_mode;
33023 rtx op0, op1, op2;
33024 int sub_size, regno0, regno1, nregs, i;
33026 /* If this is DImode, use the specialized version that can run before
33027 register allocation. */
33028 if (mode == DImode && !TARGET_POWERPC64)
33030 rs6000_split_logical_di (operands, code, complement_final_p,
33031 complement_op1_p, complement_op2_p);
33032 return;
33035 op0 = operands[0];
33036 op1 = operands[1];
33037 op2 = (code == NOT) ? NULL_RTX : operands[2];
33038 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33039 sub_size = GET_MODE_SIZE (sub_mode);
33040 regno0 = REGNO (op0);
33041 regno1 = REGNO (op1);
33043 gcc_assert (reload_completed);
33044 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33045 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33047 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33048 gcc_assert (nregs > 1);
33050 if (op2 && REG_P (op2))
33051 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33053 for (i = 0; i < nregs; i++)
33055 int offset = i * sub_size;
33056 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33057 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33058 rtx sub_op2 = ((code == NOT)
33059 ? NULL_RTX
33060 : simplify_subreg (sub_mode, op2, mode, offset));
33062 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33063 complement_final_p, complement_op1_p,
33064 complement_op2_p);
33067 return;
33071 /* Return true if the peephole2 can combine a load involving a combination of
33072 an addis instruction and a load with an offset that can be fused together on
33073 a power8. */
33075 bool
33076 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33077 rtx addis_value, /* addis value. */
33078 rtx target, /* target register that is loaded. */
33079 rtx mem) /* bottom part of the memory addr. */
33081 rtx addr;
33082 rtx base_reg;
33084 /* Validate arguments. */
33085 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33086 return false;
33088 if (!base_reg_operand (target, GET_MODE (target)))
33089 return false;
33091 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33092 return false;
33094 /* Allow sign/zero extension. */
33095 if (GET_CODE (mem) == ZERO_EXTEND
33096 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33097 mem = XEXP (mem, 0);
33099 if (!MEM_P (mem))
33100 return false;
33102 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33103 return false;
33105 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33106 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33107 return false;
33109 /* Validate that the register used to load the high value is either the
33110 register being loaded, or we can safely replace its use.
33112 This function is only called from the peephole2 pass and we assume that
33113 there are 2 instructions in the peephole (addis and load), so we want to
33114 check if the target register was not used in the memory address and the
33115 register to hold the addis result is dead after the peephole. */
33116 if (REGNO (addis_reg) != REGNO (target))
33118 if (reg_mentioned_p (target, mem))
33119 return false;
33121 if (!peep2_reg_dead_p (2, addis_reg))
33122 return false;
33124 /* If the target register being loaded is the stack pointer, we must
33125 avoid loading any other value into it, even temporarily. */
33126 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33127 return false;
33130 base_reg = XEXP (addr, 0);
33131 return REGNO (addis_reg) == REGNO (base_reg);
33134 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33135 sequence. We adjust the addis register to use the target register. If the
33136 load sign extends, we adjust the code to do the zero extending load, and an
33137 explicit sign extension later since the fusion only covers zero extending
33138 loads.
33140 The operands are:
33141 operands[0] register set with addis (to be replaced with target)
33142 operands[1] value set via addis
33143 operands[2] target register being loaded
33144 operands[3] D-form memory reference using operands[0]. */
33146 void
33147 expand_fusion_gpr_load (rtx *operands)
33149 rtx addis_value = operands[1];
33150 rtx target = operands[2];
33151 rtx orig_mem = operands[3];
33152 rtx new_addr, new_mem, orig_addr, offset;
33153 enum rtx_code plus_or_lo_sum;
33154 enum machine_mode target_mode = GET_MODE (target);
33155 enum machine_mode extend_mode = target_mode;
33156 enum machine_mode ptr_mode = Pmode;
33157 enum rtx_code extend = UNKNOWN;
33159 if (GET_CODE (orig_mem) == ZERO_EXTEND
33160 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33162 extend = GET_CODE (orig_mem);
33163 orig_mem = XEXP (orig_mem, 0);
33164 target_mode = GET_MODE (orig_mem);
33167 gcc_assert (MEM_P (orig_mem));
33169 orig_addr = XEXP (orig_mem, 0);
33170 plus_or_lo_sum = GET_CODE (orig_addr);
33171 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33173 offset = XEXP (orig_addr, 1);
33174 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33175 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33177 if (extend != UNKNOWN)
33178 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33180 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33181 UNSPEC_FUSION_GPR);
33182 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33184 if (extend == SIGN_EXTEND)
33186 int sub_off = ((BYTES_BIG_ENDIAN)
33187 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33188 : 0);
33189 rtx sign_reg
33190 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33192 emit_insn (gen_rtx_SET (VOIDmode, target,
33193 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33196 return;
33199 /* Return a string to fuse an addis instruction with a gpr load to the same
33200 register that we loaded up the addis instruction. The address that is used
33201 is the logical address that was formed during peephole2:
33202 (lo_sum (high) (low-part))
33204 The code is complicated, so we call output_asm_insn directly, and just
33205 return "". */
33207 const char *
33208 emit_fusion_gpr_load (rtx target, rtx mem)
33210 rtx addis_value;
33211 rtx fuse_ops[10];
33212 rtx addr;
33213 rtx load_offset;
33214 const char *addis_str = NULL;
33215 const char *load_str = NULL;
33216 const char *mode_name = NULL;
33217 char insn_template[80];
33218 enum machine_mode mode;
33219 const char *comment_str = ASM_COMMENT_START;
33221 if (GET_CODE (mem) == ZERO_EXTEND)
33222 mem = XEXP (mem, 0);
33224 gcc_assert (REG_P (target) && MEM_P (mem));
33226 if (*comment_str == ' ')
33227 comment_str++;
33229 addr = XEXP (mem, 0);
33230 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33231 gcc_unreachable ();
33233 addis_value = XEXP (addr, 0);
33234 load_offset = XEXP (addr, 1);
33236 /* Now emit the load instruction to the same register. */
33237 mode = GET_MODE (mem);
33238 switch (mode)
33240 case QImode:
33241 mode_name = "char";
33242 load_str = "lbz";
33243 break;
33245 case HImode:
33246 mode_name = "short";
33247 load_str = "lhz";
33248 break;
33250 case SImode:
33251 mode_name = "int";
33252 load_str = "lwz";
33253 break;
33255 case DImode:
33256 gcc_assert (TARGET_POWERPC64);
33257 mode_name = "long";
33258 load_str = "ld";
33259 break;
33261 default:
33262 gcc_unreachable ();
33265 /* Emit the addis instruction. */
33266 fuse_ops[0] = target;
33267 if (satisfies_constraint_L (addis_value))
33269 fuse_ops[1] = addis_value;
33270 addis_str = "lis %0,%v1";
33273 else if (GET_CODE (addis_value) == PLUS)
33275 rtx op0 = XEXP (addis_value, 0);
33276 rtx op1 = XEXP (addis_value, 1);
33278 if (REG_P (op0) && CONST_INT_P (op1)
33279 && satisfies_constraint_L (op1))
33281 fuse_ops[1] = op0;
33282 fuse_ops[2] = op1;
33283 addis_str = "addis %0,%1,%v2";
33287 else if (GET_CODE (addis_value) == HIGH)
33289 rtx value = XEXP (addis_value, 0);
33290 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33292 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33293 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33294 if (TARGET_ELF)
33295 addis_str = "addis %0,%2,%1@toc@ha";
33297 else if (TARGET_XCOFF)
33298 addis_str = "addis %0,%1@u(%2)";
33300 else
33301 gcc_unreachable ();
33304 else if (GET_CODE (value) == PLUS)
33306 rtx op0 = XEXP (value, 0);
33307 rtx op1 = XEXP (value, 1);
33309 if (GET_CODE (op0) == UNSPEC
33310 && XINT (op0, 1) == UNSPEC_TOCREL
33311 && CONST_INT_P (op1))
33313 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33314 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33315 fuse_ops[3] = op1;
33316 if (TARGET_ELF)
33317 addis_str = "addis %0,%2,%1+%3@toc@ha";
33319 else if (TARGET_XCOFF)
33320 addis_str = "addis %0,%1+%3@u(%2)";
33322 else
33323 gcc_unreachable ();
33327 else if (satisfies_constraint_L (value))
33329 fuse_ops[1] = value;
33330 addis_str = "lis %0,%v1";
33333 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33335 fuse_ops[1] = value;
33336 addis_str = "lis %0,%1@ha";
33340 if (!addis_str)
33341 fatal_insn ("Could not generate addis value for fusion", addis_value);
33343 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33344 comment_str, mode_name);
33345 output_asm_insn (insn_template, fuse_ops);
33347 /* Emit the D-form load instruction. */
33348 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33350 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33351 fuse_ops[1] = load_offset;
33352 output_asm_insn (insn_template, fuse_ops);
33355 else if (GET_CODE (load_offset) == UNSPEC
33356 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33358 if (TARGET_ELF)
33359 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33361 else if (TARGET_XCOFF)
33362 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33364 else
33365 gcc_unreachable ();
33367 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33368 output_asm_insn (insn_template, fuse_ops);
33371 else if (GET_CODE (load_offset) == PLUS
33372 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33373 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33374 && CONST_INT_P (XEXP (load_offset, 1)))
33376 rtx tocrel_unspec = XEXP (load_offset, 0);
33377 if (TARGET_ELF)
33378 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33380 else if (TARGET_XCOFF)
33381 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33383 else
33384 gcc_unreachable ();
33386 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33387 fuse_ops[2] = XEXP (load_offset, 1);
33388 output_asm_insn (insn_template, fuse_ops);
33391 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33393 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33395 fuse_ops[1] = load_offset;
33396 output_asm_insn (insn_template, fuse_ops);
33399 else
33400 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33402 return "";
33405 /* Analyze vector computations and remove unnecessary doubleword
33406 swaps (xxswapdi instructions). This pass is performed only
33407 for little-endian VSX code generation.
33409 For this specific case, loads and stores of 4x32 and 2x64 vectors
33410 are inefficient. These are implemented using the lvx2dx and
33411 stvx2dx instructions, which invert the order of doublewords in
33412 a vector register. Thus the code generation inserts an xxswapdi
33413 after each such load, and prior to each such store. (For spill
33414 code after register assignment, an additional xxswapdi is inserted
33415 following each store in order to return a hard register to its
33416 unpermuted value.)
33418 The extra xxswapdi instructions reduce performance. This can be
33419 particularly bad for vectorized code. The purpose of this pass
33420 is to reduce the number of xxswapdi instructions required for
33421 correctness.
33423 The primary insight is that much code that operates on vectors
33424 does not care about the relative order of elements in a register,
33425 so long as the correct memory order is preserved. If we have
33426 a computation where all input values are provided by lvxd2x/xxswapdi
33427 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33428 and all intermediate computations are pure SIMD (independent of
33429 element order), then all the xxswapdi's associated with the loads
33430 and stores may be removed.
33432 This pass uses some of the infrastructure and logical ideas from
33433 the "web" pass in web.c. We create maximal webs of computations
33434 fitting the description above using union-find. Each such web is
33435 then optimized by removing its unnecessary xxswapdi instructions.
33437 The pass is placed prior to global optimization so that we can
33438 perform the optimization in the safest and simplest way possible;
33439 that is, by replacing each xxswapdi insn with a register copy insn.
33440 Subsequent forward propagation will remove copies where possible.
33442 There are some operations sensitive to element order for which we
33443 can still allow the operation, provided we modify those operations.
33444 These include CONST_VECTORs, for which we must swap the first and
33445 second halves of the constant vector; and SUBREGs, for which we
33446 must adjust the byte offset to account for the swapped doublewords.
33447 A remaining opportunity would be non-immediate-form splats, for
33448 which we should adjust the selected lane of the input. We should
33449 also make code generation adjustments for sum-across operations,
33450 since this is a common vectorizer reduction.
33452 Because we run prior to the first split, we can see loads and stores
33453 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33454 vector loads and stores that have not yet been split into a permuting
33455 load/store and a swap. (One way this can happen is with a builtin
33456 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33457 than deleting a swap, we convert the load/store into a permuting
33458 load/store (which effectively removes the swap). */
33460 /* Notes on Permutes
33462 We do not currently handle computations that contain permutes. There
33463 is a general transformation that can be performed correctly, but it
33464 may introduce more expensive code than it replaces. To handle these
33465 would require a cost model to determine when to perform the optimization.
33466 This commentary records how this could be done if desired.
33468 The most general permute is something like this (example for V16QI):
33470 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33471 (parallel [(const_int a0) (const_int a1)
33473 (const_int a14) (const_int a15)]))
33475 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33476 to produce in the result.
33478 Regardless of mode, we can convert the PARALLEL to a mask of 16
33479 byte-element selectors. Let's call this M, with M[i] representing
33480 the ith byte-element selector value. Then if we swap doublewords
33481 throughout the computation, we can get correct behavior by replacing
33482 M with M' as follows:
33484 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33485 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33486 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33487 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33489 This seems promising at first, since we are just replacing one mask
33490 with another. But certain masks are preferable to others. If M
33491 is a mask that matches a vmrghh pattern, for example, M' certainly
33492 will not. Instead of a single vmrghh, we would generate a load of
33493 M' and a vperm. So we would need to know how many xxswapd's we can
33494 remove as a result of this transformation to determine if it's
33495 profitable; and preferably the logic would need to be aware of all
33496 the special preferable masks.
33498 Another form of permute is an UNSPEC_VPERM, in which the mask is
33499 already in a register. In some cases, this mask may be a constant
33500 that we can discover with ud-chains, in which case the above
33501 transformation is ok. However, the common usage here is for the
33502 mask to be produced by an UNSPEC_LVSL, in which case the mask
33503 cannot be known at compile time. In such a case we would have to
33504 generate several instructions to compute M' as above at run time,
33505 and a cost model is needed again. */
33507 /* This is based on the union-find logic in web.c. web_entry_base is
33508 defined in df.h. */
33509 class swap_web_entry : public web_entry_base
33511 public:
33512 /* Pointer to the insn. */
33513 rtx_insn *insn;
33514 /* Set if insn contains a mention of a vector register. All other
33515 fields are undefined if this field is unset. */
33516 unsigned int is_relevant : 1;
33517 /* Set if insn is a load. */
33518 unsigned int is_load : 1;
33519 /* Set if insn is a store. */
33520 unsigned int is_store : 1;
33521 /* Set if insn is a doubleword swap. This can either be a register swap
33522 or a permuting load or store (test is_load and is_store for this). */
33523 unsigned int is_swap : 1;
33524 /* Set if the insn has a live-in use of a parameter register. */
33525 unsigned int is_live_in : 1;
33526 /* Set if the insn has a live-out def of a return register. */
33527 unsigned int is_live_out : 1;
33528 /* Set if the insn contains a subreg reference of a vector register. */
33529 unsigned int contains_subreg : 1;
33530 /* Set if the insn contains a 128-bit integer operand. */
33531 unsigned int is_128_int : 1;
33532 /* Set if this is a call-insn. */
33533 unsigned int is_call : 1;
33534 /* Set if this insn does not perform a vector operation for which
33535 element order matters, or if we know how to fix it up if it does.
33536 Undefined if is_swap is set. */
33537 unsigned int is_swappable : 1;
33538 /* A nonzero value indicates what kind of special handling for this
33539 insn is required if doublewords are swapped. Undefined if
33540 is_swappable is not set. */
33541 unsigned int special_handling : 3;
33542 /* Set if the web represented by this entry cannot be optimized. */
33543 unsigned int web_not_optimizable : 1;
33544 /* Set if this insn should be deleted. */
33545 unsigned int will_delete : 1;
33548 enum special_handling_values {
33549 SH_NONE = 0,
33550 SH_CONST_VECTOR,
33551 SH_SUBREG,
33552 SH_NOSWAP_LD,
33553 SH_NOSWAP_ST,
33554 SH_EXTRACT,
33555 SH_SPLAT
33558 /* Union INSN with all insns containing definitions that reach USE.
33559 Detect whether USE is live-in to the current function. */
33560 static void
33561 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33563 struct df_link *link = DF_REF_CHAIN (use);
33565 if (!link)
33566 insn_entry[INSN_UID (insn)].is_live_in = 1;
33568 while (link)
33570 if (DF_REF_IS_ARTIFICIAL (link->ref))
33571 insn_entry[INSN_UID (insn)].is_live_in = 1;
33573 if (DF_REF_INSN_INFO (link->ref))
33575 rtx def_insn = DF_REF_INSN (link->ref);
33576 (void)unionfind_union (insn_entry + INSN_UID (insn),
33577 insn_entry + INSN_UID (def_insn));
33580 link = link->next;
33584 /* Union INSN with all insns containing uses reached from DEF.
33585 Detect whether DEF is live-out from the current function. */
33586 static void
33587 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
33589 struct df_link *link = DF_REF_CHAIN (def);
33591 if (!link)
33592 insn_entry[INSN_UID (insn)].is_live_out = 1;
33594 while (link)
33596 /* This could be an eh use or some other artificial use;
33597 we treat these all the same (killing the optimization). */
33598 if (DF_REF_IS_ARTIFICIAL (link->ref))
33599 insn_entry[INSN_UID (insn)].is_live_out = 1;
33601 if (DF_REF_INSN_INFO (link->ref))
33603 rtx use_insn = DF_REF_INSN (link->ref);
33604 (void)unionfind_union (insn_entry + INSN_UID (insn),
33605 insn_entry + INSN_UID (use_insn));
33608 link = link->next;
33612 /* Return 1 iff INSN is a load insn, including permuting loads that
33613 represent an lvxd2x instruction; else return 0. */
33614 static unsigned int
33615 insn_is_load_p (rtx insn)
33617 rtx body = PATTERN (insn);
33619 if (GET_CODE (body) == SET)
33621 if (GET_CODE (SET_SRC (body)) == MEM)
33622 return 1;
33624 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
33625 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
33626 return 1;
33628 return 0;
33631 if (GET_CODE (body) != PARALLEL)
33632 return 0;
33634 rtx set = XVECEXP (body, 0, 0);
33636 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
33637 return 1;
33639 return 0;
33642 /* Return 1 iff INSN is a store insn, including permuting stores that
33643 represent an stvxd2x instruction; else return 0. */
33644 static unsigned int
33645 insn_is_store_p (rtx insn)
33647 rtx body = PATTERN (insn);
33648 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
33649 return 1;
33650 if (GET_CODE (body) != PARALLEL)
33651 return 0;
33652 rtx set = XVECEXP (body, 0, 0);
33653 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
33654 return 1;
33655 return 0;
33658 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
33659 a permuting load, or a permuting store. */
33660 static unsigned int
33661 insn_is_swap_p (rtx insn)
33663 rtx body = PATTERN (insn);
33664 if (GET_CODE (body) != SET)
33665 return 0;
33666 rtx rhs = SET_SRC (body);
33667 if (GET_CODE (rhs) != VEC_SELECT)
33668 return 0;
33669 rtx parallel = XEXP (rhs, 1);
33670 if (GET_CODE (parallel) != PARALLEL)
33671 return 0;
33672 unsigned int len = XVECLEN (parallel, 0);
33673 if (len != 2 && len != 4 && len != 8 && len != 16)
33674 return 0;
33675 for (unsigned int i = 0; i < len / 2; ++i)
33677 rtx op = XVECEXP (parallel, 0, i);
33678 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
33679 return 0;
33681 for (unsigned int i = len / 2; i < len; ++i)
33683 rtx op = XVECEXP (parallel, 0, i);
33684 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
33685 return 0;
33687 return 1;
33690 /* Return 1 iff OP is an operand that will not be affected by having
33691 vector doublewords swapped in memory. */
33692 static unsigned int
33693 rtx_is_swappable_p (rtx op, unsigned int *special)
33695 enum rtx_code code = GET_CODE (op);
33696 int i, j;
33697 rtx parallel;
33699 switch (code)
33701 case LABEL_REF:
33702 case SYMBOL_REF:
33703 case CLOBBER:
33704 case REG:
33705 return 1;
33707 case VEC_CONCAT:
33708 case ASM_INPUT:
33709 case ASM_OPERANDS:
33710 return 0;
33712 case CONST_VECTOR:
33714 *special = SH_CONST_VECTOR;
33715 return 1;
33718 case VEC_DUPLICATE:
33719 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
33720 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
33721 it represents a vector splat for which we can do special
33722 handling. */
33723 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
33724 return 1;
33725 else if (GET_CODE (XEXP (op, 0)) == REG
33726 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
33727 /* This catches V2DF and V2DI splat, at a minimum. */
33728 return 1;
33729 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
33730 /* If the duplicated item is from a select, defer to the select
33731 processing to see if we can change the lane for the splat. */
33732 return rtx_is_swappable_p (XEXP (op, 0), special);
33733 else
33734 return 0;
33736 case VEC_SELECT:
33737 /* A vec_extract operation is ok if we change the lane. */
33738 if (GET_CODE (XEXP (op, 0)) == REG
33739 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
33740 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
33741 && XVECLEN (parallel, 0) == 1
33742 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
33744 *special = SH_EXTRACT;
33745 return 1;
33747 else
33748 return 0;
33750 case UNSPEC:
33752 /* Various operations are unsafe for this optimization, at least
33753 without significant additional work. Permutes are obviously
33754 problematic, as both the permute control vector and the ordering
33755 of the target values are invalidated by doubleword swapping.
33756 Vector pack and unpack modify the number of vector lanes.
33757 Merge-high/low will not operate correctly on swapped operands.
33758 Vector shifts across element boundaries are clearly uncool,
33759 as are vector select and concatenate operations. Vector
33760 sum-across instructions define one operand with a specific
33761 order-dependent element, so additional fixup code would be
33762 needed to make those work. Vector set and non-immediate-form
33763 vector splat are element-order sensitive. A few of these
33764 cases might be workable with special handling if required. */
33765 int val = XINT (op, 1);
33766 switch (val)
33768 default:
33769 break;
33770 case UNSPEC_VMRGH_DIRECT:
33771 case UNSPEC_VMRGL_DIRECT:
33772 case UNSPEC_VPACK_SIGN_SIGN_SAT:
33773 case UNSPEC_VPACK_SIGN_UNS_SAT:
33774 case UNSPEC_VPACK_UNS_UNS_MOD:
33775 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
33776 case UNSPEC_VPACK_UNS_UNS_SAT:
33777 case UNSPEC_VPERM:
33778 case UNSPEC_VPERM_UNS:
33779 case UNSPEC_VPERMHI:
33780 case UNSPEC_VPERMSI:
33781 case UNSPEC_VPKPX:
33782 case UNSPEC_VSLDOI:
33783 case UNSPEC_VSLO:
33784 case UNSPEC_VSRO:
33785 case UNSPEC_VSUM2SWS:
33786 case UNSPEC_VSUM4S:
33787 case UNSPEC_VSUM4UBS:
33788 case UNSPEC_VSUMSWS:
33789 case UNSPEC_VSUMSWS_DIRECT:
33790 case UNSPEC_VSX_CONCAT:
33791 case UNSPEC_VSX_SET:
33792 case UNSPEC_VSX_SLDWI:
33793 case UNSPEC_VUNPACK_HI_SIGN:
33794 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
33795 case UNSPEC_VUNPACK_LO_SIGN:
33796 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
33797 case UNSPEC_VUPKHPX:
33798 case UNSPEC_VUPKHS_V4SF:
33799 case UNSPEC_VUPKHU_V4SF:
33800 case UNSPEC_VUPKLPX:
33801 case UNSPEC_VUPKLS_V4SF:
33802 case UNSPEC_VUPKLU_V4SF:
33803 /* The following could be handled as an idiom with XXSPLTW.
33804 These place a scalar in BE element zero, but the XXSPLTW
33805 will currently expect it in BE element 2 in a swapped
33806 region. When one of these feeds an XXSPLTW with no other
33807 defs/uses either way, we can avoid the lane change for
33808 XXSPLTW and things will be correct. TBD. */
33809 case UNSPEC_VSX_CVDPSPN:
33810 case UNSPEC_VSX_CVSPDP:
33811 case UNSPEC_VSX_CVSPDPN:
33812 return 0;
33813 case UNSPEC_VSPLT_DIRECT:
33814 *special = SH_SPLAT;
33815 return 1;
33819 default:
33820 break;
33823 const char *fmt = GET_RTX_FORMAT (code);
33824 int ok = 1;
33826 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
33827 if (fmt[i] == 'e' || fmt[i] == 'u')
33829 unsigned int special_op = SH_NONE;
33830 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
33831 /* Ensure we never have two kinds of special handling
33832 for the same insn. */
33833 if (*special != SH_NONE && special_op != SH_NONE
33834 && *special != special_op)
33835 return 0;
33836 *special = special_op;
33838 else if (fmt[i] == 'E')
33839 for (j = 0; j < XVECLEN (op, i); ++j)
33841 unsigned int special_op = SH_NONE;
33842 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
33843 /* Ensure we never have two kinds of special handling
33844 for the same insn. */
33845 if (*special != SH_NONE && special_op != SH_NONE
33846 && *special != special_op)
33847 return 0;
33848 *special = special_op;
33851 return ok;
33854 /* Return 1 iff INSN is an operand that will not be affected by
33855 having vector doublewords swapped in memory (in which case
33856 *SPECIAL is unchanged), or that can be modified to be correct
33857 if vector doublewords are swapped in memory (in which case
33858 *SPECIAL is changed to a value indicating how). */
33859 static unsigned int
33860 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
33861 unsigned int *special)
33863 /* Calls are always bad. */
33864 if (GET_CODE (insn) == CALL_INSN)
33865 return 0;
33867 /* Loads and stores seen here are not permuting, but we can still
33868 fix them up by converting them to permuting ones. Exceptions:
33869 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
33870 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
33871 for the SET source. */
33872 rtx body = PATTERN (insn);
33873 int i = INSN_UID (insn);
33875 if (insn_entry[i].is_load)
33877 if (GET_CODE (body) == SET)
33879 *special = SH_NOSWAP_LD;
33880 return 1;
33882 else
33883 return 0;
33886 if (insn_entry[i].is_store)
33888 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
33890 *special = SH_NOSWAP_ST;
33891 return 1;
33893 else
33894 return 0;
33897 /* Otherwise check the operands for vector lane violations. */
33898 return rtx_is_swappable_p (body, special);
33901 enum chain_purpose { FOR_LOADS, FOR_STORES };
33903 /* Return true if the UD or DU chain headed by LINK is non-empty,
33904 and every entry on the chain references an insn that is a
33905 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
33906 register swap must have only permuting loads as reaching defs.
33907 If PURPOSE is FOR_STORES, each such register swap must have only
33908 register swaps or permuting stores as reached uses. */
33909 static bool
33910 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
33911 enum chain_purpose purpose)
33913 if (!link)
33914 return false;
33916 for (; link; link = link->next)
33918 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
33919 continue;
33921 if (DF_REF_IS_ARTIFICIAL (link->ref))
33922 return false;
33924 rtx reached_insn = DF_REF_INSN (link->ref);
33925 unsigned uid = INSN_UID (reached_insn);
33926 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
33928 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
33929 || insn_entry[uid].is_store)
33930 return false;
33932 if (purpose == FOR_LOADS)
33934 df_ref use;
33935 FOR_EACH_INSN_INFO_USE (use, insn_info)
33937 struct df_link *swap_link = DF_REF_CHAIN (use);
33939 while (swap_link)
33941 if (DF_REF_IS_ARTIFICIAL (link->ref))
33942 return false;
33944 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
33945 unsigned uid2 = INSN_UID (swap_def_insn);
33947 /* Only permuting loads are allowed. */
33948 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
33949 return false;
33951 swap_link = swap_link->next;
33955 else if (purpose == FOR_STORES)
33957 df_ref def;
33958 FOR_EACH_INSN_INFO_DEF (def, insn_info)
33960 struct df_link *swap_link = DF_REF_CHAIN (def);
33962 while (swap_link)
33964 if (DF_REF_IS_ARTIFICIAL (link->ref))
33965 return false;
33967 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
33968 unsigned uid2 = INSN_UID (swap_use_insn);
33970 /* Permuting stores or register swaps are allowed. */
33971 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
33972 return false;
33974 swap_link = swap_link->next;
33980 return true;
33983 /* Mark the xxswapdi instructions associated with permuting loads and
33984 stores for removal. Note that we only flag them for deletion here,
33985 as there is a possibility of a swap being reached from multiple
33986 loads, etc. */
33987 static void
33988 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
33990 rtx insn = insn_entry[i].insn;
33991 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
33993 if (insn_entry[i].is_load)
33995 df_ref def;
33996 FOR_EACH_INSN_INFO_DEF (def, insn_info)
33998 struct df_link *link = DF_REF_CHAIN (def);
34000 /* We know by now that these are swaps, so we can delete
34001 them confidently. */
34002 while (link)
34004 rtx use_insn = DF_REF_INSN (link->ref);
34005 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34006 link = link->next;
34010 else if (insn_entry[i].is_store)
34012 df_ref use;
34013 FOR_EACH_INSN_INFO_USE (use, insn_info)
34015 /* Ignore uses for addressability. */
34016 enum machine_mode mode = GET_MODE (DF_REF_REG (use));
34017 if (!VECTOR_MODE_P (mode))
34018 continue;
34020 struct df_link *link = DF_REF_CHAIN (use);
34022 /* We know by now that these are swaps, so we can delete
34023 them confidently. */
34024 while (link)
34026 rtx def_insn = DF_REF_INSN (link->ref);
34027 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34028 link = link->next;
34034 /* OP is either a CONST_VECTOR or an expression containing one.
34035 Swap the first half of the vector with the second in the first
34036 case. Recurse to find it in the second. */
34037 static void
34038 swap_const_vector_halves (rtx op)
34040 int i;
34041 enum rtx_code code = GET_CODE (op);
34042 if (GET_CODE (op) == CONST_VECTOR)
34044 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34045 for (i = 0; i < half_units; ++i)
34047 rtx temp = CONST_VECTOR_ELT (op, i);
34048 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34049 CONST_VECTOR_ELT (op, i + half_units) = temp;
34052 else
34054 int j;
34055 const char *fmt = GET_RTX_FORMAT (code);
34056 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34057 if (fmt[i] == 'e' || fmt[i] == 'u')
34058 swap_const_vector_halves (XEXP (op, i));
34059 else if (fmt[i] == 'E')
34060 for (j = 0; j < XVECLEN (op, i); ++j)
34061 swap_const_vector_halves (XVECEXP (op, i, j));
34065 /* Find all subregs of a vector expression that perform a narrowing,
34066 and adjust the subreg index to account for doubleword swapping. */
34067 static void
34068 adjust_subreg_index (rtx op)
34070 enum rtx_code code = GET_CODE (op);
34071 if (code == SUBREG
34072 && (GET_MODE_SIZE (GET_MODE (op))
34073 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34075 unsigned int index = SUBREG_BYTE (op);
34076 if (index < 8)
34077 index += 8;
34078 else
34079 index -= 8;
34080 SUBREG_BYTE (op) = index;
34083 const char *fmt = GET_RTX_FORMAT (code);
34084 int i,j;
34085 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34086 if (fmt[i] == 'e' || fmt[i] == 'u')
34087 adjust_subreg_index (XEXP (op, i));
34088 else if (fmt[i] == 'E')
34089 for (j = 0; j < XVECLEN (op, i); ++j)
34090 adjust_subreg_index (XVECEXP (op, i, j));
34093 /* Convert the non-permuting load INSN to a permuting one. */
34094 static void
34095 permute_load (rtx_insn *insn)
34097 rtx body = PATTERN (insn);
34098 rtx mem_op = SET_SRC (body);
34099 rtx tgt_reg = SET_DEST (body);
34100 enum machine_mode mode = GET_MODE (tgt_reg);
34101 int n_elts = GET_MODE_NUNITS (mode);
34102 int half_elts = n_elts / 2;
34103 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34104 int i, j;
34105 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34106 XVECEXP (par, 0, i) = GEN_INT (j);
34107 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34108 XVECEXP (par, 0, i) = GEN_INT (j);
34109 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34110 SET_SRC (body) = sel;
34111 INSN_CODE (insn) = -1; /* Force re-recognition. */
34112 df_insn_rescan (insn);
34114 if (dump_file)
34115 fprintf (dump_file, "Replacing load %d with permuted load\n",
34116 INSN_UID (insn));
34119 /* Convert the non-permuting store INSN to a permuting one. */
34120 static void
34121 permute_store (rtx_insn *insn)
34123 rtx body = PATTERN (insn);
34124 rtx src_reg = SET_SRC (body);
34125 enum machine_mode mode = GET_MODE (src_reg);
34126 int n_elts = GET_MODE_NUNITS (mode);
34127 int half_elts = n_elts / 2;
34128 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34129 int i, j;
34130 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34131 XVECEXP (par, 0, i) = GEN_INT (j);
34132 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34133 XVECEXP (par, 0, i) = GEN_INT (j);
34134 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34135 SET_SRC (body) = sel;
34136 INSN_CODE (insn) = -1; /* Force re-recognition. */
34137 df_insn_rescan (insn);
34139 if (dump_file)
34140 fprintf (dump_file, "Replacing store %d with permuted store\n",
34141 INSN_UID (insn));
34144 /* Given OP that contains a vector extract operation, adjust the index
34145 of the extracted lane to account for the doubleword swap. */
34146 static void
34147 adjust_extract (rtx_insn *insn)
34149 rtx src = SET_SRC (PATTERN (insn));
34150 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34151 account for that. */
34152 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34153 rtx par = XEXP (sel, 1);
34154 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34155 int lane = INTVAL (XVECEXP (par, 0, 0));
34156 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34157 XVECEXP (par, 0, 0) = GEN_INT (lane);
34158 INSN_CODE (insn) = -1; /* Force re-recognition. */
34159 df_insn_rescan (insn);
34161 if (dump_file)
34162 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34165 /* Given OP that contains a vector direct-splat operation, adjust the index
34166 of the source lane to account for the doubleword swap. */
34167 static void
34168 adjust_splat (rtx_insn *insn)
34170 rtx body = PATTERN (insn);
34171 rtx unspec = XEXP (body, 1);
34172 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34173 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34174 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34175 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34176 INSN_CODE (insn) = -1; /* Force re-recognition. */
34177 df_insn_rescan (insn);
34179 if (dump_file)
34180 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34183 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34184 with special handling. Take care of that here. */
34185 static void
34186 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34188 rtx_insn *insn = insn_entry[i].insn;
34189 rtx body = PATTERN (insn);
34191 switch (insn_entry[i].special_handling)
34193 default:
34194 gcc_unreachable ();
34195 case SH_CONST_VECTOR:
34197 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34198 gcc_assert (GET_CODE (body) == SET);
34199 rtx rhs = SET_SRC (body);
34200 swap_const_vector_halves (rhs);
34201 if (dump_file)
34202 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34203 break;
34205 case SH_SUBREG:
34206 /* A subreg of the same size is already safe. For subregs that
34207 select a smaller portion of a reg, adjust the index for
34208 swapped doublewords. */
34209 adjust_subreg_index (body);
34210 if (dump_file)
34211 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34212 break;
34213 case SH_NOSWAP_LD:
34214 /* Convert a non-permuting load to a permuting one. */
34215 permute_load (insn);
34216 break;
34217 case SH_NOSWAP_ST:
34218 /* Convert a non-permuting store to a permuting one. */
34219 permute_store (insn);
34220 break;
34221 case SH_EXTRACT:
34222 /* Change the lane on an extract operation. */
34223 adjust_extract (insn);
34224 break;
34225 case SH_SPLAT:
34226 /* Change the lane on a direct-splat operation. */
34227 adjust_splat (insn);
34228 break;
34232 /* Find the insn from the Ith table entry, which is known to be a
34233 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34234 static void
34235 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34237 rtx_insn *insn = insn_entry[i].insn;
34238 rtx body = PATTERN (insn);
34239 rtx src_reg = XEXP (SET_SRC (body), 0);
34240 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34241 rtx_insn *new_insn = emit_insn_before (copy, insn);
34242 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34243 df_insn_rescan (new_insn);
34245 if (dump_file)
34247 unsigned int new_uid = INSN_UID (new_insn);
34248 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34251 df_insn_delete (insn);
34252 remove_insn (insn);
34253 insn->set_deleted ();
34256 /* Dump the swap table to DUMP_FILE. */
34257 static void
34258 dump_swap_insn_table (swap_web_entry *insn_entry)
34260 int e = get_max_uid ();
34261 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34263 for (int i = 0; i < e; ++i)
34264 if (insn_entry[i].is_relevant)
34266 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34267 fprintf (dump_file, "%6d %6d ", i,
34268 pred_entry && pred_entry->insn
34269 ? INSN_UID (pred_entry->insn) : 0);
34270 if (insn_entry[i].is_load)
34271 fputs ("load ", dump_file);
34272 if (insn_entry[i].is_store)
34273 fputs ("store ", dump_file);
34274 if (insn_entry[i].is_swap)
34275 fputs ("swap ", dump_file);
34276 if (insn_entry[i].is_live_in)
34277 fputs ("live-in ", dump_file);
34278 if (insn_entry[i].is_live_out)
34279 fputs ("live-out ", dump_file);
34280 if (insn_entry[i].contains_subreg)
34281 fputs ("subreg ", dump_file);
34282 if (insn_entry[i].is_128_int)
34283 fputs ("int128 ", dump_file);
34284 if (insn_entry[i].is_call)
34285 fputs ("call ", dump_file);
34286 if (insn_entry[i].is_swappable)
34288 fputs ("swappable ", dump_file);
34289 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34290 fputs ("special:constvec ", dump_file);
34291 else if (insn_entry[i].special_handling == SH_SUBREG)
34292 fputs ("special:subreg ", dump_file);
34293 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34294 fputs ("special:load ", dump_file);
34295 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34296 fputs ("special:store ", dump_file);
34297 else if (insn_entry[i].special_handling == SH_EXTRACT)
34298 fputs ("special:extract ", dump_file);
34299 else if (insn_entry[i].special_handling == SH_SPLAT)
34300 fputs ("special:splat ", dump_file);
34302 if (insn_entry[i].web_not_optimizable)
34303 fputs ("unoptimizable ", dump_file);
34304 if (insn_entry[i].will_delete)
34305 fputs ("delete ", dump_file);
34306 fputs ("\n", dump_file);
34308 fputs ("\n", dump_file);
34311 /* Main entry point for this pass. */
34312 unsigned int
34313 rs6000_analyze_swaps (function *fun)
34315 swap_web_entry *insn_entry;
34316 basic_block bb;
34317 rtx_insn *insn;
34319 /* Dataflow analysis for use-def chains. */
34320 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34321 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34322 df_analyze ();
34323 df_set_flags (DF_DEFER_INSN_RESCAN);
34325 /* Allocate structure to represent webs of insns. */
34326 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34328 /* Walk the insns to gather basic data. */
34329 FOR_ALL_BB_FN (bb, fun)
34330 FOR_BB_INSNS (bb, insn)
34332 unsigned int uid = INSN_UID (insn);
34333 if (NONDEBUG_INSN_P (insn))
34335 insn_entry[uid].insn = insn;
34337 if (GET_CODE (insn) == CALL_INSN)
34338 insn_entry[uid].is_call = 1;
34340 /* Walk the uses and defs to see if we mention vector regs.
34341 Record any constraints on optimization of such mentions. */
34342 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34343 df_ref mention;
34344 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34346 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34347 enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34349 /* If a use gets its value from a call insn, it will be
34350 a hard register and will look like (reg:V4SI 3 3).
34351 The df analysis creates two mentions for GPR3 and GPR4,
34352 both DImode. We must recognize this and treat it as a
34353 vector mention to ensure the call is unioned with this
34354 use. */
34355 if (mode == DImode && DF_REF_INSN_INFO (mention))
34357 rtx feeder = DF_REF_INSN (mention);
34358 /* FIXME: It is pretty hard to get from the df mention
34359 to the mode of the use in the insn. We arbitrarily
34360 pick a vector mode here, even though the use might
34361 be a real DImode. We can be too conservative
34362 (create a web larger than necessary) because of
34363 this, so consider eventually fixing this. */
34364 if (GET_CODE (feeder) == CALL_INSN)
34365 mode = V4SImode;
34368 if (VECTOR_MODE_P (mode))
34370 insn_entry[uid].is_relevant = 1;
34371 if (mode == TImode || mode == V1TImode)
34372 insn_entry[uid].is_128_int = 1;
34373 if (DF_REF_INSN_INFO (mention))
34374 insn_entry[uid].contains_subreg
34375 = !rtx_equal_p (DF_REF_REG (mention),
34376 DF_REF_REAL_REG (mention));
34377 union_defs (insn_entry, insn, mention);
34380 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34382 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34383 enum machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34385 /* If we're loading up a hard vector register for a call,
34386 it looks like (set (reg:V4SI 9 9) (...)). The df
34387 analysis creates two mentions for GPR9 and GPR10, both
34388 DImode. So relying on the mode from the mentions
34389 isn't sufficient to ensure we union the call into the
34390 web with the parameter setup code. */
34391 if (mode == DImode && GET_CODE (insn) == SET
34392 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34393 mode = GET_MODE (SET_DEST (insn));
34395 if (VECTOR_MODE_P (mode))
34397 insn_entry[uid].is_relevant = 1;
34398 if (mode == TImode || mode == V1TImode)
34399 insn_entry[uid].is_128_int = 1;
34400 if (DF_REF_INSN_INFO (mention))
34401 insn_entry[uid].contains_subreg
34402 = !rtx_equal_p (DF_REF_REG (mention),
34403 DF_REF_REAL_REG (mention));
34404 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34405 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34406 insn_entry[uid].is_live_out = 1;
34407 union_uses (insn_entry, insn, mention);
34411 if (insn_entry[uid].is_relevant)
34413 /* Determine if this is a load or store. */
34414 insn_entry[uid].is_load = insn_is_load_p (insn);
34415 insn_entry[uid].is_store = insn_is_store_p (insn);
34417 /* Determine if this is a doubleword swap. If not,
34418 determine whether it can legally be swapped. */
34419 if (insn_is_swap_p (insn))
34420 insn_entry[uid].is_swap = 1;
34421 else
34423 unsigned int special = SH_NONE;
34424 insn_entry[uid].is_swappable
34425 = insn_is_swappable_p (insn_entry, insn, &special);
34426 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34427 insn_entry[uid].is_swappable = 0;
34428 else if (special != SH_NONE)
34429 insn_entry[uid].special_handling = special;
34430 else if (insn_entry[uid].contains_subreg)
34431 insn_entry[uid].special_handling = SH_SUBREG;
34437 if (dump_file)
34439 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34440 dump_swap_insn_table (insn_entry);
34443 /* Record unoptimizable webs. */
34444 unsigned e = get_max_uid (), i;
34445 for (i = 0; i < e; ++i)
34447 if (!insn_entry[i].is_relevant)
34448 continue;
34450 swap_web_entry *root
34451 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34453 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34454 || (insn_entry[i].contains_subreg
34455 && insn_entry[i].special_handling != SH_SUBREG)
34456 || insn_entry[i].is_128_int || insn_entry[i].is_call
34457 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34458 root->web_not_optimizable = 1;
34460 /* If we have loads or stores that aren't permuting then the
34461 optimization isn't appropriate. */
34462 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34463 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34464 root->web_not_optimizable = 1;
34466 /* If we have permuting loads or stores that are not accompanied
34467 by a register swap, the optimization isn't appropriate. */
34468 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34470 rtx insn = insn_entry[i].insn;
34471 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34472 df_ref def;
34474 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34476 struct df_link *link = DF_REF_CHAIN (def);
34478 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34480 root->web_not_optimizable = 1;
34481 break;
34485 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34487 rtx insn = insn_entry[i].insn;
34488 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34489 df_ref use;
34491 FOR_EACH_INSN_INFO_USE (use, insn_info)
34493 struct df_link *link = DF_REF_CHAIN (use);
34495 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34497 root->web_not_optimizable = 1;
34498 break;
34504 if (dump_file)
34506 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34507 dump_swap_insn_table (insn_entry);
34510 /* For each load and store in an optimizable web (which implies
34511 the loads and stores are permuting), find the associated
34512 register swaps and mark them for removal. Due to various
34513 optimizations we may mark the same swap more than once. Also
34514 perform special handling for swappable insns that require it. */
34515 for (i = 0; i < e; ++i)
34516 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34517 && insn_entry[i].is_swap)
34519 swap_web_entry* root_entry
34520 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34521 if (!root_entry->web_not_optimizable)
34522 mark_swaps_for_removal (insn_entry, i);
34524 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34526 swap_web_entry* root_entry
34527 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34528 if (!root_entry->web_not_optimizable)
34529 handle_special_swappables (insn_entry, i);
34532 /* Now delete the swaps marked for removal. */
34533 for (i = 0; i < e; ++i)
34534 if (insn_entry[i].will_delete)
34535 replace_swap_with_copy (insn_entry, i);
34537 /* Clean up. */
34538 free (insn_entry);
34539 return 0;
34542 const pass_data pass_data_analyze_swaps =
34544 RTL_PASS, /* type */
34545 "swaps", /* name */
34546 OPTGROUP_NONE, /* optinfo_flags */
34547 TV_NONE, /* tv_id */
34548 0, /* properties_required */
34549 0, /* properties_provided */
34550 0, /* properties_destroyed */
34551 0, /* todo_flags_start */
34552 TODO_df_finish, /* todo_flags_finish */
34555 class pass_analyze_swaps : public rtl_opt_pass
34557 public:
34558 pass_analyze_swaps(gcc::context *ctxt)
34559 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34562 /* opt_pass methods: */
34563 virtual bool gate (function *)
34565 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34566 && rs6000_optimize_swaps);
34569 virtual unsigned int execute (function *fun)
34571 return rs6000_analyze_swaps (fun);
34574 }; // class pass_analyze_swaps
34576 rtl_opt_pass *
34577 make_pass_analyze_swaps (gcc::context *ctxt)
34579 return new pass_analyze_swaps (ctxt);
34582 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
34584 static void
34585 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
34587 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
34588 return;
34590 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
34591 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
34592 tree call_mffs = build_call_expr (mffs, 0);
34594 /* Generates the equivalent of feholdexcept (&fenv_var)
34596 *fenv_var = __builtin_mffs ();
34597 double fenv_hold;
34598 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
34599 __builtin_mtfsf (0xff, fenv_hold); */
34601 /* Mask to clear everything except for the rounding modes and non-IEEE
34602 arithmetic flag. */
34603 const unsigned HOST_WIDE_INT hold_exception_mask =
34604 HOST_WIDE_INT_C (0xffffffff00000007);
34606 tree fenv_var = create_tmp_var (double_type_node, NULL);
34608 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
34610 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
34611 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34612 build_int_cst (uint64_type_node,
34613 hold_exception_mask));
34615 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34616 fenv_llu_and);
34618 tree hold_mtfsf = build_call_expr (mtfsf, 2,
34619 build_int_cst (unsigned_type_node, 0xff),
34620 fenv_hold_mtfsf);
34622 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
34624 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
34626 double fenv_clear = __builtin_mffs ();
34627 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
34628 __builtin_mtfsf (0xff, fenv_clear); */
34630 /* Mask to clear everything except for the rounding modes and non-IEEE
34631 arithmetic flag. */
34632 const unsigned HOST_WIDE_INT clear_exception_mask =
34633 HOST_WIDE_INT_C (0xffffffff00000000);
34635 tree fenv_clear = create_tmp_var (double_type_node, NULL);
34637 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
34639 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
34640 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
34641 fenv_clean_llu,
34642 build_int_cst (uint64_type_node,
34643 clear_exception_mask));
34645 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34646 fenv_clear_llu_and);
34648 tree clear_mtfsf = build_call_expr (mtfsf, 2,
34649 build_int_cst (unsigned_type_node, 0xff),
34650 fenv_clear_mtfsf);
34652 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
34654 /* Generates the equivalent of feupdateenv (&fenv_var)
34656 double old_fenv = __builtin_mffs ();
34657 double fenv_update;
34658 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
34659 (*(uint64_t*)fenv_var 0x1ff80fff);
34660 __builtin_mtfsf (0xff, fenv_update); */
34662 const unsigned HOST_WIDE_INT update_exception_mask =
34663 HOST_WIDE_INT_C (0xffffffff1fffff00);
34664 const unsigned HOST_WIDE_INT new_exception_mask =
34665 HOST_WIDE_INT_C (0x1ff80fff);
34667 tree old_fenv = create_tmp_var (double_type_node, NULL);
34668 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
34670 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
34671 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
34672 build_int_cst (uint64_type_node,
34673 update_exception_mask));
34675 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34676 build_int_cst (uint64_type_node,
34677 new_exception_mask));
34679 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
34680 old_llu_and, new_llu_and);
34682 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34683 new_llu_mask);
34685 tree update_mtfsf = build_call_expr (mtfsf, 2,
34686 build_int_cst (unsigned_type_node, 0xff),
34687 fenv_update_mtfsf);
34689 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
34693 struct gcc_target targetm = TARGET_INITIALIZER;
34695 #include "gt-rs6000.h"