2014-11-21 Michael Meissner <meissner@linux.vnet.ibm.com>
[official-gcc.git] / gcc / config / rs6000 / rs6000.c
blob6d128c17408e6d013bcbc3a49c957a5886bab864
1 /* Subroutines used for code generation on IBM RS/6000.
2 Copyright (C) 1991-2014 Free Software Foundation, Inc.
3 Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "insn-config.h"
29 #include "conditions.h"
30 #include "insn-attr.h"
31 #include "flags.h"
32 #include "recog.h"
33 #include "obstack.h"
34 #include "tree.h"
35 #include "stringpool.h"
36 #include "stor-layout.h"
37 #include "calls.h"
38 #include "print-tree.h"
39 #include "varasm.h"
40 #include "expr.h"
41 #include "insn-codes.h"
42 #include "optabs.h"
43 #include "except.h"
44 #include "hashtab.h"
45 #include "hash-set.h"
46 #include "vec.h"
47 #include "machmode.h"
48 #include "input.h"
49 #include "function.h"
50 #include "output.h"
51 #include "dbxout.h"
52 #include "predict.h"
53 #include "dominance.h"
54 #include "cfg.h"
55 #include "cfgrtl.h"
56 #include "cfganal.h"
57 #include "lcm.h"
58 #include "cfgbuild.h"
59 #include "cfgcleanup.h"
60 #include "basic-block.h"
61 #include "diagnostic-core.h"
62 #include "toplev.h"
63 #include "ggc.h"
64 #include "tm_p.h"
65 #include "target.h"
66 #include "target-def.h"
67 #include "common/common-target.h"
68 #include "langhooks.h"
69 #include "reload.h"
70 #include "cfgloop.h"
71 #include "sched-int.h"
72 #include "hash-table.h"
73 #include "tree-ssa-alias.h"
74 #include "internal-fn.h"
75 #include "gimple-fold.h"
76 #include "tree-eh.h"
77 #include "gimple-expr.h"
78 #include "is-a.h"
79 #include "gimple.h"
80 #include "gimplify.h"
81 #include "gimple-iterator.h"
82 #include "gimple-walk.h"
83 #include "intl.h"
84 #include "params.h"
85 #include "tm-constrs.h"
86 #include "ira.h"
87 #include "opts.h"
88 #include "tree-vectorizer.h"
89 #include "dumpfile.h"
90 #include "hash-map.h"
91 #include "plugin-api.h"
92 #include "ipa-ref.h"
93 #include "cgraph.h"
94 #include "target-globals.h"
95 #include "builtins.h"
96 #include "context.h"
97 #include "tree-pass.h"
98 #include "real.h"
99 #if TARGET_XCOFF
100 #include "xcoffout.h" /* get declarations of xcoff_*_section_name */
101 #endif
102 #if TARGET_MACHO
103 #include "gstab.h" /* for N_SLINE */
104 #endif
106 #ifndef TARGET_NO_PROTOTYPE
107 #define TARGET_NO_PROTOTYPE 0
108 #endif
110 #define min(A,B) ((A) < (B) ? (A) : (B))
111 #define max(A,B) ((A) > (B) ? (A) : (B))
113 /* Structure used to define the rs6000 stack */
114 typedef struct rs6000_stack {
115 int reload_completed; /* stack info won't change from here on */
116 int first_gp_reg_save; /* first callee saved GP register used */
117 int first_fp_reg_save; /* first callee saved FP register used */
118 int first_altivec_reg_save; /* first callee saved AltiVec register used */
119 int lr_save_p; /* true if the link reg needs to be saved */
120 int cr_save_p; /* true if the CR reg needs to be saved */
121 unsigned int vrsave_mask; /* mask of vec registers to save */
122 int push_p; /* true if we need to allocate stack space */
123 int calls_p; /* true if the function makes any calls */
124 int world_save_p; /* true if we're saving *everything*:
125 r13-r31, cr, f14-f31, vrsave, v20-v31 */
126 enum rs6000_abi abi; /* which ABI to use */
127 int gp_save_offset; /* offset to save GP regs from initial SP */
128 int fp_save_offset; /* offset to save FP regs from initial SP */
129 int altivec_save_offset; /* offset to save AltiVec regs from initial SP */
130 int lr_save_offset; /* offset to save LR from initial SP */
131 int cr_save_offset; /* offset to save CR from initial SP */
132 int vrsave_save_offset; /* offset to save VRSAVE from initial SP */
133 int spe_gp_save_offset; /* offset to save spe 64-bit gprs */
134 int varargs_save_offset; /* offset to save the varargs registers */
135 int ehrd_offset; /* offset to EH return data */
136 int ehcr_offset; /* offset to EH CR field data */
137 int reg_size; /* register size (4 or 8) */
138 HOST_WIDE_INT vars_size; /* variable save area size */
139 int parm_size; /* outgoing parameter size */
140 int save_size; /* save area size */
141 int fixed_size; /* fixed size of stack frame */
142 int gp_size; /* size of saved GP registers */
143 int fp_size; /* size of saved FP registers */
144 int altivec_size; /* size of saved AltiVec registers */
145 int cr_size; /* size to hold CR if not in save_size */
146 int vrsave_size; /* size to hold VRSAVE if not in save_size */
147 int altivec_padding_size; /* size of altivec alignment padding if
148 not in save_size */
149 int spe_gp_size; /* size of 64-bit GPR save size for SPE */
150 int spe_padding_size;
151 HOST_WIDE_INT total_size; /* total bytes allocated for stack */
152 int spe_64bit_regs_used;
153 int savres_strategy;
154 } rs6000_stack_t;
156 /* A C structure for machine-specific, per-function data.
157 This is added to the cfun structure. */
158 typedef struct GTY(()) machine_function
160 /* Whether the instruction chain has been scanned already. */
161 int insn_chain_scanned_p;
162 /* Flags if __builtin_return_address (n) with n >= 1 was used. */
163 int ra_needs_full_frame;
164 /* Flags if __builtin_return_address (0) was used. */
165 int ra_need_lr;
166 /* Cache lr_save_p after expansion of builtin_eh_return. */
167 int lr_save_state;
168 /* Whether we need to save the TOC to the reserved stack location in the
169 function prologue. */
170 bool save_toc_in_prologue;
171 /* Offset from virtual_stack_vars_rtx to the start of the ABI_V4
172 varargs save area. */
173 HOST_WIDE_INT varargs_save_offset;
174 /* Temporary stack slot to use for SDmode copies. This slot is
175 64-bits wide and is allocated early enough so that the offset
176 does not overflow the 16-bit load/store offset field. */
177 rtx sdmode_stack_slot;
178 /* Flag if r2 setup is needed with ELFv2 ABI. */
179 bool r2_setup_needed;
180 } machine_function;
182 /* Support targetm.vectorize.builtin_mask_for_load. */
183 static GTY(()) tree altivec_builtin_mask_for_load;
185 /* Set to nonzero once AIX common-mode calls have been defined. */
186 static GTY(()) int common_mode_defined;
188 /* Label number of label created for -mrelocatable, to call to so we can
189 get the address of the GOT section */
190 static int rs6000_pic_labelno;
192 #ifdef USING_ELFOS_H
193 /* Counter for labels which are to be placed in .fixup. */
194 int fixuplabelno = 0;
195 #endif
197 /* Whether to use variant of AIX ABI for PowerPC64 Linux. */
198 int dot_symbols;
200 /* Specify the machine mode that pointers have. After generation of rtl, the
201 compiler makes no further distinction between pointers and any other objects
202 of this machine mode. The type is unsigned since not all things that
203 include rs6000.h also include machmode.h. */
204 unsigned rs6000_pmode;
206 /* Width in bits of a pointer. */
207 unsigned rs6000_pointer_size;
209 #ifdef HAVE_AS_GNU_ATTRIBUTE
210 /* Flag whether floating point values have been passed/returned. */
211 static bool rs6000_passes_float;
212 /* Flag whether vector values have been passed/returned. */
213 static bool rs6000_passes_vector;
214 /* Flag whether small (<= 8 byte) structures have been returned. */
215 static bool rs6000_returns_struct;
216 #endif
218 /* Value is TRUE if register/mode pair is acceptable. */
219 bool rs6000_hard_regno_mode_ok_p[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
221 /* Maximum number of registers needed for a given register class and mode. */
222 unsigned char rs6000_class_max_nregs[NUM_MACHINE_MODES][LIM_REG_CLASSES];
224 /* How many registers are needed for a given register and mode. */
225 unsigned char rs6000_hard_regno_nregs[NUM_MACHINE_MODES][FIRST_PSEUDO_REGISTER];
227 /* Map register number to register class. */
228 enum reg_class rs6000_regno_regclass[FIRST_PSEUDO_REGISTER];
230 static int dbg_cost_ctrl;
232 /* Built in types. */
233 tree rs6000_builtin_types[RS6000_BTI_MAX];
234 tree rs6000_builtin_decls[RS6000_BUILTIN_COUNT];
236 /* Flag to say the TOC is initialized */
237 int toc_initialized;
238 char toc_label_name[10];
240 /* Cached value of rs6000_variable_issue. This is cached in
241 rs6000_variable_issue hook and returned from rs6000_sched_reorder2. */
242 static short cached_can_issue_more;
244 static GTY(()) section *read_only_data_section;
245 static GTY(()) section *private_data_section;
246 static GTY(()) section *tls_data_section;
247 static GTY(()) section *tls_private_data_section;
248 static GTY(()) section *read_only_private_data_section;
249 static GTY(()) section *sdata2_section;
250 static GTY(()) section *toc_section;
252 struct builtin_description
254 const HOST_WIDE_INT mask;
255 const enum insn_code icode;
256 const char *const name;
257 const enum rs6000_builtins code;
260 /* Describe the vector unit used for modes. */
261 enum rs6000_vector rs6000_vector_unit[NUM_MACHINE_MODES];
262 enum rs6000_vector rs6000_vector_mem[NUM_MACHINE_MODES];
264 /* Register classes for various constraints that are based on the target
265 switches. */
266 enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
268 /* Describe the alignment of a vector. */
269 int rs6000_vector_align[NUM_MACHINE_MODES];
271 /* Map selected modes to types for builtins. */
272 static GTY(()) tree builtin_mode_to_type[MAX_MACHINE_MODE][2];
274 /* What modes to automatically generate reciprocal divide estimate (fre) and
275 reciprocal sqrt (frsqrte) for. */
276 unsigned char rs6000_recip_bits[MAX_MACHINE_MODE];
278 /* Masks to determine which reciprocal esitmate instructions to generate
279 automatically. */
280 enum rs6000_recip_mask {
281 RECIP_SF_DIV = 0x001, /* Use divide estimate */
282 RECIP_DF_DIV = 0x002,
283 RECIP_V4SF_DIV = 0x004,
284 RECIP_V2DF_DIV = 0x008,
286 RECIP_SF_RSQRT = 0x010, /* Use reciprocal sqrt estimate. */
287 RECIP_DF_RSQRT = 0x020,
288 RECIP_V4SF_RSQRT = 0x040,
289 RECIP_V2DF_RSQRT = 0x080,
291 /* Various combination of flags for -mrecip=xxx. */
292 RECIP_NONE = 0,
293 RECIP_ALL = (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
294 | RECIP_V2DF_DIV | RECIP_SF_RSQRT | RECIP_DF_RSQRT
295 | RECIP_V4SF_RSQRT | RECIP_V2DF_RSQRT),
297 RECIP_HIGH_PRECISION = RECIP_ALL,
299 /* On low precision machines like the power5, don't enable double precision
300 reciprocal square root estimate, since it isn't accurate enough. */
301 RECIP_LOW_PRECISION = (RECIP_ALL & ~(RECIP_DF_RSQRT | RECIP_V2DF_RSQRT))
304 /* -mrecip options. */
305 static struct
307 const char *string; /* option name */
308 unsigned int mask; /* mask bits to set */
309 } recip_options[] = {
310 { "all", RECIP_ALL },
311 { "none", RECIP_NONE },
312 { "div", (RECIP_SF_DIV | RECIP_DF_DIV | RECIP_V4SF_DIV
313 | RECIP_V2DF_DIV) },
314 { "divf", (RECIP_SF_DIV | RECIP_V4SF_DIV) },
315 { "divd", (RECIP_DF_DIV | RECIP_V2DF_DIV) },
316 { "rsqrt", (RECIP_SF_RSQRT | RECIP_DF_RSQRT | RECIP_V4SF_RSQRT
317 | RECIP_V2DF_RSQRT) },
318 { "rsqrtf", (RECIP_SF_RSQRT | RECIP_V4SF_RSQRT) },
319 { "rsqrtd", (RECIP_DF_RSQRT | RECIP_V2DF_RSQRT) },
322 /* Pointer to function (in rs6000-c.c) that can define or undefine target
323 macros that have changed. Languages that don't support the preprocessor
324 don't link in rs6000-c.c, so we can't call it directly. */
325 void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
327 /* Simplfy register classes into simpler classifications. We assume
328 GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
329 check for standard register classes (gpr/floating/altivec/vsx) and
330 floating/vector classes (float/altivec/vsx). */
332 enum rs6000_reg_type {
333 NO_REG_TYPE,
334 PSEUDO_REG_TYPE,
335 GPR_REG_TYPE,
336 VSX_REG_TYPE,
337 ALTIVEC_REG_TYPE,
338 FPR_REG_TYPE,
339 SPR_REG_TYPE,
340 CR_REG_TYPE,
341 SPE_ACC_TYPE,
342 SPEFSCR_REG_TYPE
345 /* Map register class to register type. */
346 static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
348 /* First/last register type for the 'normal' register types (i.e. general
349 purpose, floating point, altivec, and VSX registers). */
350 #define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
352 #define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
355 /* Register classes we care about in secondary reload or go if legitimate
356 address. We only need to worry about GPR, FPR, and Altivec registers here,
357 along an ANY field that is the OR of the 3 register classes. */
359 enum rs6000_reload_reg_type {
360 RELOAD_REG_GPR, /* General purpose registers. */
361 RELOAD_REG_FPR, /* Traditional floating point regs. */
362 RELOAD_REG_VMX, /* Altivec (VMX) registers. */
363 RELOAD_REG_ANY, /* OR of GPR, FPR, Altivec masks. */
364 N_RELOAD_REG
367 /* For setting up register classes, loop through the 3 register classes mapping
368 into real registers, and skip the ANY class, which is just an OR of the
369 bits. */
370 #define FIRST_RELOAD_REG_CLASS RELOAD_REG_GPR
371 #define LAST_RELOAD_REG_CLASS RELOAD_REG_VMX
373 /* Map reload register type to a register in the register class. */
374 struct reload_reg_map_type {
375 const char *name; /* Register class name. */
376 int reg; /* Register in the register class. */
379 static const struct reload_reg_map_type reload_reg_map[N_RELOAD_REG] = {
380 { "Gpr", FIRST_GPR_REGNO }, /* RELOAD_REG_GPR. */
381 { "Fpr", FIRST_FPR_REGNO }, /* RELOAD_REG_FPR. */
382 { "VMX", FIRST_ALTIVEC_REGNO }, /* RELOAD_REG_VMX. */
383 { "Any", -1 }, /* RELOAD_REG_ANY. */
386 /* Mask bits for each register class, indexed per mode. Historically the
387 compiler has been more restrictive which types can do PRE_MODIFY instead of
388 PRE_INC and PRE_DEC, so keep track of sepaate bits for these two. */
389 typedef unsigned char addr_mask_type;
391 #define RELOAD_REG_VALID 0x01 /* Mode valid in register.. */
392 #define RELOAD_REG_MULTIPLE 0x02 /* Mode takes multiple registers. */
393 #define RELOAD_REG_INDEXED 0x04 /* Reg+reg addressing. */
394 #define RELOAD_REG_OFFSET 0x08 /* Reg+offset addressing. */
395 #define RELOAD_REG_PRE_INCDEC 0x10 /* PRE_INC/PRE_DEC valid. */
396 #define RELOAD_REG_PRE_MODIFY 0x20 /* PRE_MODIFY valid. */
397 #define RELOAD_REG_AND_M16 0x40 /* AND -16 addressing. */
399 /* Register type masks based on the type, of valid addressing modes. */
400 struct rs6000_reg_addr {
401 enum insn_code reload_load; /* INSN to reload for loading. */
402 enum insn_code reload_store; /* INSN to reload for storing. */
403 enum insn_code reload_fpr_gpr; /* INSN to move from FPR to GPR. */
404 enum insn_code reload_gpr_vsx; /* INSN to move from GPR to VSX. */
405 enum insn_code reload_vsx_gpr; /* INSN to move from VSX to GPR. */
406 addr_mask_type addr_mask[(int)N_RELOAD_REG]; /* Valid address masks. */
407 bool scalar_in_vmx_p; /* Scalar value can go in VMX. */
410 static struct rs6000_reg_addr reg_addr[NUM_MACHINE_MODES];
412 /* Helper function to say whether a mode supports PRE_INC or PRE_DEC. */
413 static inline bool
414 mode_supports_pre_incdec_p (machine_mode mode)
416 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_INCDEC)
417 != 0);
420 /* Helper function to say whether a mode supports PRE_MODIFY. */
421 static inline bool
422 mode_supports_pre_modify_p (machine_mode mode)
424 return ((reg_addr[mode].addr_mask[RELOAD_REG_ANY] & RELOAD_REG_PRE_MODIFY)
425 != 0);
429 /* Target cpu costs. */
431 struct processor_costs {
432 const int mulsi; /* cost of SImode multiplication. */
433 const int mulsi_const; /* cost of SImode multiplication by constant. */
434 const int mulsi_const9; /* cost of SImode mult by short constant. */
435 const int muldi; /* cost of DImode multiplication. */
436 const int divsi; /* cost of SImode division. */
437 const int divdi; /* cost of DImode division. */
438 const int fp; /* cost of simple SFmode and DFmode insns. */
439 const int dmul; /* cost of DFmode multiplication (and fmadd). */
440 const int sdiv; /* cost of SFmode division (fdivs). */
441 const int ddiv; /* cost of DFmode division (fdiv). */
442 const int cache_line_size; /* cache line size in bytes. */
443 const int l1_cache_size; /* size of l1 cache, in kilobytes. */
444 const int l2_cache_size; /* size of l2 cache, in kilobytes. */
445 const int simultaneous_prefetches; /* number of parallel prefetch
446 operations. */
449 const struct processor_costs *rs6000_cost;
451 /* Processor costs (relative to an add) */
453 /* Instruction size costs on 32bit processors. */
454 static const
455 struct processor_costs size32_cost = {
456 COSTS_N_INSNS (1), /* mulsi */
457 COSTS_N_INSNS (1), /* mulsi_const */
458 COSTS_N_INSNS (1), /* mulsi_const9 */
459 COSTS_N_INSNS (1), /* muldi */
460 COSTS_N_INSNS (1), /* divsi */
461 COSTS_N_INSNS (1), /* divdi */
462 COSTS_N_INSNS (1), /* fp */
463 COSTS_N_INSNS (1), /* dmul */
464 COSTS_N_INSNS (1), /* sdiv */
465 COSTS_N_INSNS (1), /* ddiv */
472 /* Instruction size costs on 64bit processors. */
473 static const
474 struct processor_costs size64_cost = {
475 COSTS_N_INSNS (1), /* mulsi */
476 COSTS_N_INSNS (1), /* mulsi_const */
477 COSTS_N_INSNS (1), /* mulsi_const9 */
478 COSTS_N_INSNS (1), /* muldi */
479 COSTS_N_INSNS (1), /* divsi */
480 COSTS_N_INSNS (1), /* divdi */
481 COSTS_N_INSNS (1), /* fp */
482 COSTS_N_INSNS (1), /* dmul */
483 COSTS_N_INSNS (1), /* sdiv */
484 COSTS_N_INSNS (1), /* ddiv */
485 128,
491 /* Instruction costs on RS64A processors. */
492 static const
493 struct processor_costs rs64a_cost = {
494 COSTS_N_INSNS (20), /* mulsi */
495 COSTS_N_INSNS (12), /* mulsi_const */
496 COSTS_N_INSNS (8), /* mulsi_const9 */
497 COSTS_N_INSNS (34), /* muldi */
498 COSTS_N_INSNS (65), /* divsi */
499 COSTS_N_INSNS (67), /* divdi */
500 COSTS_N_INSNS (4), /* fp */
501 COSTS_N_INSNS (4), /* dmul */
502 COSTS_N_INSNS (31), /* sdiv */
503 COSTS_N_INSNS (31), /* ddiv */
504 128, /* cache line size */
505 128, /* l1 cache */
506 2048, /* l2 cache */
507 1, /* streams */
510 /* Instruction costs on MPCCORE processors. */
511 static const
512 struct processor_costs mpccore_cost = {
513 COSTS_N_INSNS (2), /* mulsi */
514 COSTS_N_INSNS (2), /* mulsi_const */
515 COSTS_N_INSNS (2), /* mulsi_const9 */
516 COSTS_N_INSNS (2), /* muldi */
517 COSTS_N_INSNS (6), /* divsi */
518 COSTS_N_INSNS (6), /* divdi */
519 COSTS_N_INSNS (4), /* fp */
520 COSTS_N_INSNS (5), /* dmul */
521 COSTS_N_INSNS (10), /* sdiv */
522 COSTS_N_INSNS (17), /* ddiv */
523 32, /* cache line size */
524 4, /* l1 cache */
525 16, /* l2 cache */
526 1, /* streams */
529 /* Instruction costs on PPC403 processors. */
530 static const
531 struct processor_costs ppc403_cost = {
532 COSTS_N_INSNS (4), /* mulsi */
533 COSTS_N_INSNS (4), /* mulsi_const */
534 COSTS_N_INSNS (4), /* mulsi_const9 */
535 COSTS_N_INSNS (4), /* muldi */
536 COSTS_N_INSNS (33), /* divsi */
537 COSTS_N_INSNS (33), /* divdi */
538 COSTS_N_INSNS (11), /* fp */
539 COSTS_N_INSNS (11), /* dmul */
540 COSTS_N_INSNS (11), /* sdiv */
541 COSTS_N_INSNS (11), /* ddiv */
542 32, /* cache line size */
543 4, /* l1 cache */
544 16, /* l2 cache */
545 1, /* streams */
548 /* Instruction costs on PPC405 processors. */
549 static const
550 struct processor_costs ppc405_cost = {
551 COSTS_N_INSNS (5), /* mulsi */
552 COSTS_N_INSNS (4), /* mulsi_const */
553 COSTS_N_INSNS (3), /* mulsi_const9 */
554 COSTS_N_INSNS (5), /* muldi */
555 COSTS_N_INSNS (35), /* divsi */
556 COSTS_N_INSNS (35), /* divdi */
557 COSTS_N_INSNS (11), /* fp */
558 COSTS_N_INSNS (11), /* dmul */
559 COSTS_N_INSNS (11), /* sdiv */
560 COSTS_N_INSNS (11), /* ddiv */
561 32, /* cache line size */
562 16, /* l1 cache */
563 128, /* l2 cache */
564 1, /* streams */
567 /* Instruction costs on PPC440 processors. */
568 static const
569 struct processor_costs ppc440_cost = {
570 COSTS_N_INSNS (3), /* mulsi */
571 COSTS_N_INSNS (2), /* mulsi_const */
572 COSTS_N_INSNS (2), /* mulsi_const9 */
573 COSTS_N_INSNS (3), /* muldi */
574 COSTS_N_INSNS (34), /* divsi */
575 COSTS_N_INSNS (34), /* divdi */
576 COSTS_N_INSNS (5), /* fp */
577 COSTS_N_INSNS (5), /* dmul */
578 COSTS_N_INSNS (19), /* sdiv */
579 COSTS_N_INSNS (33), /* ddiv */
580 32, /* cache line size */
581 32, /* l1 cache */
582 256, /* l2 cache */
583 1, /* streams */
586 /* Instruction costs on PPC476 processors. */
587 static const
588 struct processor_costs ppc476_cost = {
589 COSTS_N_INSNS (4), /* mulsi */
590 COSTS_N_INSNS (4), /* mulsi_const */
591 COSTS_N_INSNS (4), /* mulsi_const9 */
592 COSTS_N_INSNS (4), /* muldi */
593 COSTS_N_INSNS (11), /* divsi */
594 COSTS_N_INSNS (11), /* divdi */
595 COSTS_N_INSNS (6), /* fp */
596 COSTS_N_INSNS (6), /* dmul */
597 COSTS_N_INSNS (19), /* sdiv */
598 COSTS_N_INSNS (33), /* ddiv */
599 32, /* l1 cache line size */
600 32, /* l1 cache */
601 512, /* l2 cache */
602 1, /* streams */
605 /* Instruction costs on PPC601 processors. */
606 static const
607 struct processor_costs ppc601_cost = {
608 COSTS_N_INSNS (5), /* mulsi */
609 COSTS_N_INSNS (5), /* mulsi_const */
610 COSTS_N_INSNS (5), /* mulsi_const9 */
611 COSTS_N_INSNS (5), /* muldi */
612 COSTS_N_INSNS (36), /* divsi */
613 COSTS_N_INSNS (36), /* divdi */
614 COSTS_N_INSNS (4), /* fp */
615 COSTS_N_INSNS (5), /* dmul */
616 COSTS_N_INSNS (17), /* sdiv */
617 COSTS_N_INSNS (31), /* ddiv */
618 32, /* cache line size */
619 32, /* l1 cache */
620 256, /* l2 cache */
621 1, /* streams */
624 /* Instruction costs on PPC603 processors. */
625 static const
626 struct processor_costs ppc603_cost = {
627 COSTS_N_INSNS (5), /* mulsi */
628 COSTS_N_INSNS (3), /* mulsi_const */
629 COSTS_N_INSNS (2), /* mulsi_const9 */
630 COSTS_N_INSNS (5), /* muldi */
631 COSTS_N_INSNS (37), /* divsi */
632 COSTS_N_INSNS (37), /* divdi */
633 COSTS_N_INSNS (3), /* fp */
634 COSTS_N_INSNS (4), /* dmul */
635 COSTS_N_INSNS (18), /* sdiv */
636 COSTS_N_INSNS (33), /* ddiv */
637 32, /* cache line size */
638 8, /* l1 cache */
639 64, /* l2 cache */
640 1, /* streams */
643 /* Instruction costs on PPC604 processors. */
644 static const
645 struct processor_costs ppc604_cost = {
646 COSTS_N_INSNS (4), /* mulsi */
647 COSTS_N_INSNS (4), /* mulsi_const */
648 COSTS_N_INSNS (4), /* mulsi_const9 */
649 COSTS_N_INSNS (4), /* muldi */
650 COSTS_N_INSNS (20), /* divsi */
651 COSTS_N_INSNS (20), /* divdi */
652 COSTS_N_INSNS (3), /* fp */
653 COSTS_N_INSNS (3), /* dmul */
654 COSTS_N_INSNS (18), /* sdiv */
655 COSTS_N_INSNS (32), /* ddiv */
656 32, /* cache line size */
657 16, /* l1 cache */
658 512, /* l2 cache */
659 1, /* streams */
662 /* Instruction costs on PPC604e processors. */
663 static const
664 struct processor_costs ppc604e_cost = {
665 COSTS_N_INSNS (2), /* mulsi */
666 COSTS_N_INSNS (2), /* mulsi_const */
667 COSTS_N_INSNS (2), /* mulsi_const9 */
668 COSTS_N_INSNS (2), /* muldi */
669 COSTS_N_INSNS (20), /* divsi */
670 COSTS_N_INSNS (20), /* divdi */
671 COSTS_N_INSNS (3), /* fp */
672 COSTS_N_INSNS (3), /* dmul */
673 COSTS_N_INSNS (18), /* sdiv */
674 COSTS_N_INSNS (32), /* ddiv */
675 32, /* cache line size */
676 32, /* l1 cache */
677 1024, /* l2 cache */
678 1, /* streams */
681 /* Instruction costs on PPC620 processors. */
682 static const
683 struct processor_costs ppc620_cost = {
684 COSTS_N_INSNS (5), /* mulsi */
685 COSTS_N_INSNS (4), /* mulsi_const */
686 COSTS_N_INSNS (3), /* mulsi_const9 */
687 COSTS_N_INSNS (7), /* muldi */
688 COSTS_N_INSNS (21), /* divsi */
689 COSTS_N_INSNS (37), /* divdi */
690 COSTS_N_INSNS (3), /* fp */
691 COSTS_N_INSNS (3), /* dmul */
692 COSTS_N_INSNS (18), /* sdiv */
693 COSTS_N_INSNS (32), /* ddiv */
694 128, /* cache line size */
695 32, /* l1 cache */
696 1024, /* l2 cache */
697 1, /* streams */
700 /* Instruction costs on PPC630 processors. */
701 static const
702 struct processor_costs ppc630_cost = {
703 COSTS_N_INSNS (5), /* mulsi */
704 COSTS_N_INSNS (4), /* mulsi_const */
705 COSTS_N_INSNS (3), /* mulsi_const9 */
706 COSTS_N_INSNS (7), /* muldi */
707 COSTS_N_INSNS (21), /* divsi */
708 COSTS_N_INSNS (37), /* divdi */
709 COSTS_N_INSNS (3), /* fp */
710 COSTS_N_INSNS (3), /* dmul */
711 COSTS_N_INSNS (17), /* sdiv */
712 COSTS_N_INSNS (21), /* ddiv */
713 128, /* cache line size */
714 64, /* l1 cache */
715 1024, /* l2 cache */
716 1, /* streams */
719 /* Instruction costs on Cell processor. */
720 /* COSTS_N_INSNS (1) ~ one add. */
721 static const
722 struct processor_costs ppccell_cost = {
723 COSTS_N_INSNS (9/2)+2, /* mulsi */
724 COSTS_N_INSNS (6/2), /* mulsi_const */
725 COSTS_N_INSNS (6/2), /* mulsi_const9 */
726 COSTS_N_INSNS (15/2)+2, /* muldi */
727 COSTS_N_INSNS (38/2), /* divsi */
728 COSTS_N_INSNS (70/2), /* divdi */
729 COSTS_N_INSNS (10/2), /* fp */
730 COSTS_N_INSNS (10/2), /* dmul */
731 COSTS_N_INSNS (74/2), /* sdiv */
732 COSTS_N_INSNS (74/2), /* ddiv */
733 128, /* cache line size */
734 32, /* l1 cache */
735 512, /* l2 cache */
736 6, /* streams */
739 /* Instruction costs on PPC750 and PPC7400 processors. */
740 static const
741 struct processor_costs ppc750_cost = {
742 COSTS_N_INSNS (5), /* mulsi */
743 COSTS_N_INSNS (3), /* mulsi_const */
744 COSTS_N_INSNS (2), /* mulsi_const9 */
745 COSTS_N_INSNS (5), /* muldi */
746 COSTS_N_INSNS (17), /* divsi */
747 COSTS_N_INSNS (17), /* divdi */
748 COSTS_N_INSNS (3), /* fp */
749 COSTS_N_INSNS (3), /* dmul */
750 COSTS_N_INSNS (17), /* sdiv */
751 COSTS_N_INSNS (31), /* ddiv */
752 32, /* cache line size */
753 32, /* l1 cache */
754 512, /* l2 cache */
755 1, /* streams */
758 /* Instruction costs on PPC7450 processors. */
759 static const
760 struct processor_costs ppc7450_cost = {
761 COSTS_N_INSNS (4), /* mulsi */
762 COSTS_N_INSNS (3), /* mulsi_const */
763 COSTS_N_INSNS (3), /* mulsi_const9 */
764 COSTS_N_INSNS (4), /* muldi */
765 COSTS_N_INSNS (23), /* divsi */
766 COSTS_N_INSNS (23), /* divdi */
767 COSTS_N_INSNS (5), /* fp */
768 COSTS_N_INSNS (5), /* dmul */
769 COSTS_N_INSNS (21), /* sdiv */
770 COSTS_N_INSNS (35), /* ddiv */
771 32, /* cache line size */
772 32, /* l1 cache */
773 1024, /* l2 cache */
774 1, /* streams */
777 /* Instruction costs on PPC8540 processors. */
778 static const
779 struct processor_costs ppc8540_cost = {
780 COSTS_N_INSNS (4), /* mulsi */
781 COSTS_N_INSNS (4), /* mulsi_const */
782 COSTS_N_INSNS (4), /* mulsi_const9 */
783 COSTS_N_INSNS (4), /* muldi */
784 COSTS_N_INSNS (19), /* divsi */
785 COSTS_N_INSNS (19), /* divdi */
786 COSTS_N_INSNS (4), /* fp */
787 COSTS_N_INSNS (4), /* dmul */
788 COSTS_N_INSNS (29), /* sdiv */
789 COSTS_N_INSNS (29), /* ddiv */
790 32, /* cache line size */
791 32, /* l1 cache */
792 256, /* l2 cache */
793 1, /* prefetch streams /*/
796 /* Instruction costs on E300C2 and E300C3 cores. */
797 static const
798 struct processor_costs ppce300c2c3_cost = {
799 COSTS_N_INSNS (4), /* mulsi */
800 COSTS_N_INSNS (4), /* mulsi_const */
801 COSTS_N_INSNS (4), /* mulsi_const9 */
802 COSTS_N_INSNS (4), /* muldi */
803 COSTS_N_INSNS (19), /* divsi */
804 COSTS_N_INSNS (19), /* divdi */
805 COSTS_N_INSNS (3), /* fp */
806 COSTS_N_INSNS (4), /* dmul */
807 COSTS_N_INSNS (18), /* sdiv */
808 COSTS_N_INSNS (33), /* ddiv */
810 16, /* l1 cache */
811 16, /* l2 cache */
812 1, /* prefetch streams /*/
815 /* Instruction costs on PPCE500MC processors. */
816 static const
817 struct processor_costs ppce500mc_cost = {
818 COSTS_N_INSNS (4), /* mulsi */
819 COSTS_N_INSNS (4), /* mulsi_const */
820 COSTS_N_INSNS (4), /* mulsi_const9 */
821 COSTS_N_INSNS (4), /* muldi */
822 COSTS_N_INSNS (14), /* divsi */
823 COSTS_N_INSNS (14), /* divdi */
824 COSTS_N_INSNS (8), /* fp */
825 COSTS_N_INSNS (10), /* dmul */
826 COSTS_N_INSNS (36), /* sdiv */
827 COSTS_N_INSNS (66), /* ddiv */
828 64, /* cache line size */
829 32, /* l1 cache */
830 128, /* l2 cache */
831 1, /* prefetch streams /*/
834 /* Instruction costs on PPCE500MC64 processors. */
835 static const
836 struct processor_costs ppce500mc64_cost = {
837 COSTS_N_INSNS (4), /* mulsi */
838 COSTS_N_INSNS (4), /* mulsi_const */
839 COSTS_N_INSNS (4), /* mulsi_const9 */
840 COSTS_N_INSNS (4), /* muldi */
841 COSTS_N_INSNS (14), /* divsi */
842 COSTS_N_INSNS (14), /* divdi */
843 COSTS_N_INSNS (4), /* fp */
844 COSTS_N_INSNS (10), /* dmul */
845 COSTS_N_INSNS (36), /* sdiv */
846 COSTS_N_INSNS (66), /* ddiv */
847 64, /* cache line size */
848 32, /* l1 cache */
849 128, /* l2 cache */
850 1, /* prefetch streams /*/
853 /* Instruction costs on PPCE5500 processors. */
854 static const
855 struct processor_costs ppce5500_cost = {
856 COSTS_N_INSNS (5), /* mulsi */
857 COSTS_N_INSNS (5), /* mulsi_const */
858 COSTS_N_INSNS (4), /* mulsi_const9 */
859 COSTS_N_INSNS (5), /* muldi */
860 COSTS_N_INSNS (14), /* divsi */
861 COSTS_N_INSNS (14), /* divdi */
862 COSTS_N_INSNS (7), /* fp */
863 COSTS_N_INSNS (10), /* dmul */
864 COSTS_N_INSNS (36), /* sdiv */
865 COSTS_N_INSNS (66), /* ddiv */
866 64, /* cache line size */
867 32, /* l1 cache */
868 128, /* l2 cache */
869 1, /* prefetch streams /*/
872 /* Instruction costs on PPCE6500 processors. */
873 static const
874 struct processor_costs ppce6500_cost = {
875 COSTS_N_INSNS (5), /* mulsi */
876 COSTS_N_INSNS (5), /* mulsi_const */
877 COSTS_N_INSNS (4), /* mulsi_const9 */
878 COSTS_N_INSNS (5), /* muldi */
879 COSTS_N_INSNS (14), /* divsi */
880 COSTS_N_INSNS (14), /* divdi */
881 COSTS_N_INSNS (7), /* fp */
882 COSTS_N_INSNS (10), /* dmul */
883 COSTS_N_INSNS (36), /* sdiv */
884 COSTS_N_INSNS (66), /* ddiv */
885 64, /* cache line size */
886 32, /* l1 cache */
887 128, /* l2 cache */
888 1, /* prefetch streams /*/
891 /* Instruction costs on AppliedMicro Titan processors. */
892 static const
893 struct processor_costs titan_cost = {
894 COSTS_N_INSNS (5), /* mulsi */
895 COSTS_N_INSNS (5), /* mulsi_const */
896 COSTS_N_INSNS (5), /* mulsi_const9 */
897 COSTS_N_INSNS (5), /* muldi */
898 COSTS_N_INSNS (18), /* divsi */
899 COSTS_N_INSNS (18), /* divdi */
900 COSTS_N_INSNS (10), /* fp */
901 COSTS_N_INSNS (10), /* dmul */
902 COSTS_N_INSNS (46), /* sdiv */
903 COSTS_N_INSNS (72), /* ddiv */
904 32, /* cache line size */
905 32, /* l1 cache */
906 512, /* l2 cache */
907 1, /* prefetch streams /*/
910 /* Instruction costs on POWER4 and POWER5 processors. */
911 static const
912 struct processor_costs power4_cost = {
913 COSTS_N_INSNS (3), /* mulsi */
914 COSTS_N_INSNS (2), /* mulsi_const */
915 COSTS_N_INSNS (2), /* mulsi_const9 */
916 COSTS_N_INSNS (4), /* muldi */
917 COSTS_N_INSNS (18), /* divsi */
918 COSTS_N_INSNS (34), /* divdi */
919 COSTS_N_INSNS (3), /* fp */
920 COSTS_N_INSNS (3), /* dmul */
921 COSTS_N_INSNS (17), /* sdiv */
922 COSTS_N_INSNS (17), /* ddiv */
923 128, /* cache line size */
924 32, /* l1 cache */
925 1024, /* l2 cache */
926 8, /* prefetch streams /*/
929 /* Instruction costs on POWER6 processors. */
930 static const
931 struct processor_costs power6_cost = {
932 COSTS_N_INSNS (8), /* mulsi */
933 COSTS_N_INSNS (8), /* mulsi_const */
934 COSTS_N_INSNS (8), /* mulsi_const9 */
935 COSTS_N_INSNS (8), /* muldi */
936 COSTS_N_INSNS (22), /* divsi */
937 COSTS_N_INSNS (28), /* divdi */
938 COSTS_N_INSNS (3), /* fp */
939 COSTS_N_INSNS (3), /* dmul */
940 COSTS_N_INSNS (13), /* sdiv */
941 COSTS_N_INSNS (16), /* ddiv */
942 128, /* cache line size */
943 64, /* l1 cache */
944 2048, /* l2 cache */
945 16, /* prefetch streams */
948 /* Instruction costs on POWER7 processors. */
949 static const
950 struct processor_costs power7_cost = {
951 COSTS_N_INSNS (2), /* mulsi */
952 COSTS_N_INSNS (2), /* mulsi_const */
953 COSTS_N_INSNS (2), /* mulsi_const9 */
954 COSTS_N_INSNS (2), /* muldi */
955 COSTS_N_INSNS (18), /* divsi */
956 COSTS_N_INSNS (34), /* divdi */
957 COSTS_N_INSNS (3), /* fp */
958 COSTS_N_INSNS (3), /* dmul */
959 COSTS_N_INSNS (13), /* sdiv */
960 COSTS_N_INSNS (16), /* ddiv */
961 128, /* cache line size */
962 32, /* l1 cache */
963 256, /* l2 cache */
964 12, /* prefetch streams */
967 /* Instruction costs on POWER8 processors. */
968 static const
969 struct processor_costs power8_cost = {
970 COSTS_N_INSNS (3), /* mulsi */
971 COSTS_N_INSNS (3), /* mulsi_const */
972 COSTS_N_INSNS (3), /* mulsi_const9 */
973 COSTS_N_INSNS (3), /* muldi */
974 COSTS_N_INSNS (19), /* divsi */
975 COSTS_N_INSNS (35), /* divdi */
976 COSTS_N_INSNS (3), /* fp */
977 COSTS_N_INSNS (3), /* dmul */
978 COSTS_N_INSNS (14), /* sdiv */
979 COSTS_N_INSNS (17), /* ddiv */
980 128, /* cache line size */
981 32, /* l1 cache */
982 256, /* l2 cache */
983 12, /* prefetch streams */
986 /* Instruction costs on POWER A2 processors. */
987 static const
988 struct processor_costs ppca2_cost = {
989 COSTS_N_INSNS (16), /* mulsi */
990 COSTS_N_INSNS (16), /* mulsi_const */
991 COSTS_N_INSNS (16), /* mulsi_const9 */
992 COSTS_N_INSNS (16), /* muldi */
993 COSTS_N_INSNS (22), /* divsi */
994 COSTS_N_INSNS (28), /* divdi */
995 COSTS_N_INSNS (3), /* fp */
996 COSTS_N_INSNS (3), /* dmul */
997 COSTS_N_INSNS (59), /* sdiv */
998 COSTS_N_INSNS (72), /* ddiv */
1000 16, /* l1 cache */
1001 2048, /* l2 cache */
1002 16, /* prefetch streams */
1006 /* Table that classifies rs6000 builtin functions (pure, const, etc.). */
1007 #undef RS6000_BUILTIN_1
1008 #undef RS6000_BUILTIN_2
1009 #undef RS6000_BUILTIN_3
1010 #undef RS6000_BUILTIN_A
1011 #undef RS6000_BUILTIN_D
1012 #undef RS6000_BUILTIN_E
1013 #undef RS6000_BUILTIN_H
1014 #undef RS6000_BUILTIN_P
1015 #undef RS6000_BUILTIN_Q
1016 #undef RS6000_BUILTIN_S
1017 #undef RS6000_BUILTIN_X
1019 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
1020 { NAME, ICODE, MASK, ATTR },
1022 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
1023 { NAME, ICODE, MASK, ATTR },
1025 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
1026 { NAME, ICODE, MASK, ATTR },
1028 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
1029 { NAME, ICODE, MASK, ATTR },
1031 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
1032 { NAME, ICODE, MASK, ATTR },
1034 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
1035 { NAME, ICODE, MASK, ATTR },
1037 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
1038 { NAME, ICODE, MASK, ATTR },
1040 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
1041 { NAME, ICODE, MASK, ATTR },
1043 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
1044 { NAME, ICODE, MASK, ATTR },
1046 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
1047 { NAME, ICODE, MASK, ATTR },
1049 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE) \
1050 { NAME, ICODE, MASK, ATTR },
1052 struct rs6000_builtin_info_type {
1053 const char *name;
1054 const enum insn_code icode;
1055 const HOST_WIDE_INT mask;
1056 const unsigned attr;
1059 static const struct rs6000_builtin_info_type rs6000_builtin_info[] =
1061 #include "rs6000-builtin.def"
1064 #undef RS6000_BUILTIN_1
1065 #undef RS6000_BUILTIN_2
1066 #undef RS6000_BUILTIN_3
1067 #undef RS6000_BUILTIN_A
1068 #undef RS6000_BUILTIN_D
1069 #undef RS6000_BUILTIN_E
1070 #undef RS6000_BUILTIN_H
1071 #undef RS6000_BUILTIN_P
1072 #undef RS6000_BUILTIN_Q
1073 #undef RS6000_BUILTIN_S
1074 #undef RS6000_BUILTIN_X
1076 /* Support for -mveclibabi=<xxx> to control which vector library to use. */
1077 static tree (*rs6000_veclib_handler) (tree, tree, tree);
1080 static bool rs6000_debug_legitimate_address_p (machine_mode, rtx, bool);
1081 static bool spe_func_has_64bit_regs_p (void);
1082 static struct machine_function * rs6000_init_machine_status (void);
1083 static int rs6000_ra_ever_killed (void);
1084 static tree rs6000_handle_longcall_attribute (tree *, tree, tree, int, bool *);
1085 static tree rs6000_handle_altivec_attribute (tree *, tree, tree, int, bool *);
1086 static tree rs6000_handle_struct_attribute (tree *, tree, tree, int, bool *);
1087 static tree rs6000_builtin_vectorized_libmass (tree, tree, tree);
1088 static void rs6000_emit_set_long_const (rtx, HOST_WIDE_INT);
1089 static int rs6000_memory_move_cost (machine_mode, reg_class_t, bool);
1090 static bool rs6000_debug_rtx_costs (rtx, int, int, int, int *, bool);
1091 static int rs6000_debug_address_cost (rtx, machine_mode, addr_space_t,
1092 bool);
1093 static int rs6000_debug_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
1094 static bool is_microcoded_insn (rtx_insn *);
1095 static bool is_nonpipeline_insn (rtx_insn *);
1096 static bool is_cracked_insn (rtx_insn *);
1097 static bool is_load_insn (rtx, rtx *);
1098 static bool is_store_insn (rtx, rtx *);
1099 static bool set_to_load_agen (rtx_insn *,rtx_insn *);
1100 static bool insn_terminates_group_p (rtx_insn *, enum group_termination);
1101 static bool insn_must_be_first_in_group (rtx_insn *);
1102 static bool insn_must_be_last_in_group (rtx_insn *);
1103 static void altivec_init_builtins (void);
1104 static tree builtin_function_type (machine_mode, machine_mode,
1105 machine_mode, machine_mode,
1106 enum rs6000_builtins, const char *name);
1107 static void rs6000_common_init_builtins (void);
1108 static void paired_init_builtins (void);
1109 static rtx paired_expand_predicate_builtin (enum insn_code, tree, rtx);
1110 static void spe_init_builtins (void);
1111 static void htm_init_builtins (void);
1112 static rtx spe_expand_predicate_builtin (enum insn_code, tree, rtx);
1113 static rtx spe_expand_evsel_builtin (enum insn_code, tree, rtx);
1114 static int rs6000_emit_int_cmove (rtx, rtx, rtx, rtx);
1115 static rs6000_stack_t *rs6000_stack_info (void);
1116 static void is_altivec_return_reg (rtx, void *);
1117 int easy_vector_constant (rtx, machine_mode);
1118 static rtx rs6000_debug_legitimize_address (rtx, rtx, machine_mode);
1119 static rtx rs6000_legitimize_tls_address (rtx, enum tls_model);
1120 static rtx rs6000_darwin64_record_arg (CUMULATIVE_ARGS *, const_tree,
1121 bool, bool);
1122 #if TARGET_MACHO
1123 static void macho_branch_islands (void);
1124 #endif
1125 static rtx rs6000_legitimize_reload_address (rtx, machine_mode, int, int,
1126 int, int *);
1127 static rtx rs6000_debug_legitimize_reload_address (rtx, machine_mode, int,
1128 int, int, int *);
1129 static bool rs6000_mode_dependent_address (const_rtx);
1130 static bool rs6000_debug_mode_dependent_address (const_rtx);
1131 static enum reg_class rs6000_secondary_reload_class (enum reg_class,
1132 machine_mode, rtx);
1133 static enum reg_class rs6000_debug_secondary_reload_class (enum reg_class,
1134 machine_mode,
1135 rtx);
1136 static enum reg_class rs6000_preferred_reload_class (rtx, enum reg_class);
1137 static enum reg_class rs6000_debug_preferred_reload_class (rtx,
1138 enum reg_class);
1139 static bool rs6000_secondary_memory_needed (enum reg_class, enum reg_class,
1140 machine_mode);
1141 static bool rs6000_debug_secondary_memory_needed (enum reg_class,
1142 enum reg_class,
1143 machine_mode);
1144 static bool rs6000_cannot_change_mode_class (machine_mode,
1145 machine_mode,
1146 enum reg_class);
1147 static bool rs6000_debug_cannot_change_mode_class (machine_mode,
1148 machine_mode,
1149 enum reg_class);
1150 static bool rs6000_save_toc_in_prologue_p (void);
1152 rtx (*rs6000_legitimize_reload_address_ptr) (rtx, machine_mode, int, int,
1153 int, int *)
1154 = rs6000_legitimize_reload_address;
1156 static bool (*rs6000_mode_dependent_address_ptr) (const_rtx)
1157 = rs6000_mode_dependent_address;
1159 enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
1160 machine_mode, rtx)
1161 = rs6000_secondary_reload_class;
1163 enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class)
1164 = rs6000_preferred_reload_class;
1166 bool (*rs6000_secondary_memory_needed_ptr) (enum reg_class, enum reg_class,
1167 machine_mode)
1168 = rs6000_secondary_memory_needed;
1170 bool (*rs6000_cannot_change_mode_class_ptr) (machine_mode,
1171 machine_mode,
1172 enum reg_class)
1173 = rs6000_cannot_change_mode_class;
1175 const int INSN_NOT_AVAILABLE = -1;
1177 static void rs6000_print_isa_options (FILE *, int, const char *,
1178 HOST_WIDE_INT);
1179 static void rs6000_print_builtin_options (FILE *, int, const char *,
1180 HOST_WIDE_INT);
1182 static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
1183 static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
1184 enum rs6000_reg_type,
1185 machine_mode,
1186 secondary_reload_info *,
1187 bool);
1188 rtl_opt_pass *make_pass_analyze_swaps (gcc::context*);
1190 /* Hash table stuff for keeping track of TOC entries. */
1192 struct GTY((for_user)) toc_hash_struct
1194 /* `key' will satisfy CONSTANT_P; in fact, it will satisfy
1195 ASM_OUTPUT_SPECIAL_POOL_ENTRY_P. */
1196 rtx key;
1197 machine_mode key_mode;
1198 int labelno;
1201 struct toc_hasher : ggc_hasher<toc_hash_struct *>
1203 static hashval_t hash (toc_hash_struct *);
1204 static bool equal (toc_hash_struct *, toc_hash_struct *);
1207 static GTY (()) hash_table<toc_hasher> *toc_hash_table;
1209 /* Hash table to keep track of the argument types for builtin functions. */
1211 struct GTY((for_user)) builtin_hash_struct
1213 tree type;
1214 machine_mode mode[4]; /* return value + 3 arguments. */
1215 unsigned char uns_p[4]; /* and whether the types are unsigned. */
1218 struct builtin_hasher : ggc_hasher<builtin_hash_struct *>
1220 static hashval_t hash (builtin_hash_struct *);
1221 static bool equal (builtin_hash_struct *, builtin_hash_struct *);
1224 static GTY (()) hash_table<builtin_hasher> *builtin_hash_table;
1227 /* Default register names. */
1228 char rs6000_reg_names[][8] =
1230 "0", "1", "2", "3", "4", "5", "6", "7",
1231 "8", "9", "10", "11", "12", "13", "14", "15",
1232 "16", "17", "18", "19", "20", "21", "22", "23",
1233 "24", "25", "26", "27", "28", "29", "30", "31",
1234 "0", "1", "2", "3", "4", "5", "6", "7",
1235 "8", "9", "10", "11", "12", "13", "14", "15",
1236 "16", "17", "18", "19", "20", "21", "22", "23",
1237 "24", "25", "26", "27", "28", "29", "30", "31",
1238 "mq", "lr", "ctr","ap",
1239 "0", "1", "2", "3", "4", "5", "6", "7",
1240 "ca",
1241 /* AltiVec registers. */
1242 "0", "1", "2", "3", "4", "5", "6", "7",
1243 "8", "9", "10", "11", "12", "13", "14", "15",
1244 "16", "17", "18", "19", "20", "21", "22", "23",
1245 "24", "25", "26", "27", "28", "29", "30", "31",
1246 "vrsave", "vscr",
1247 /* SPE registers. */
1248 "spe_acc", "spefscr",
1249 /* Soft frame pointer. */
1250 "sfp",
1251 /* HTM SPR registers. */
1252 "tfhar", "tfiar", "texasr",
1253 /* SPE High registers. */
1254 "0", "1", "2", "3", "4", "5", "6", "7",
1255 "8", "9", "10", "11", "12", "13", "14", "15",
1256 "16", "17", "18", "19", "20", "21", "22", "23",
1257 "24", "25", "26", "27", "28", "29", "30", "31"
1260 #ifdef TARGET_REGNAMES
1261 static const char alt_reg_names[][8] =
1263 "%r0", "%r1", "%r2", "%r3", "%r4", "%r5", "%r6", "%r7",
1264 "%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
1265 "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23",
1266 "%r24", "%r25", "%r26", "%r27", "%r28", "%r29", "%r30", "%r31",
1267 "%f0", "%f1", "%f2", "%f3", "%f4", "%f5", "%f6", "%f7",
1268 "%f8", "%f9", "%f10", "%f11", "%f12", "%f13", "%f14", "%f15",
1269 "%f16", "%f17", "%f18", "%f19", "%f20", "%f21", "%f22", "%f23",
1270 "%f24", "%f25", "%f26", "%f27", "%f28", "%f29", "%f30", "%f31",
1271 "mq", "lr", "ctr", "ap",
1272 "%cr0", "%cr1", "%cr2", "%cr3", "%cr4", "%cr5", "%cr6", "%cr7",
1273 "ca",
1274 /* AltiVec registers. */
1275 "%v0", "%v1", "%v2", "%v3", "%v4", "%v5", "%v6", "%v7",
1276 "%v8", "%v9", "%v10", "%v11", "%v12", "%v13", "%v14", "%v15",
1277 "%v16", "%v17", "%v18", "%v19", "%v20", "%v21", "%v22", "%v23",
1278 "%v24", "%v25", "%v26", "%v27", "%v28", "%v29", "%v30", "%v31",
1279 "vrsave", "vscr",
1280 /* SPE registers. */
1281 "spe_acc", "spefscr",
1282 /* Soft frame pointer. */
1283 "sfp",
1284 /* HTM SPR registers. */
1285 "tfhar", "tfiar", "texasr",
1286 /* SPE High registers. */
1287 "%rh0", "%rh1", "%rh2", "%rh3", "%rh4", "%rh5", "%rh6", "%rh7",
1288 "%rh8", "%rh9", "%rh10", "%r11", "%rh12", "%rh13", "%rh14", "%rh15",
1289 "%rh16", "%rh17", "%rh18", "%rh19", "%rh20", "%rh21", "%rh22", "%rh23",
1290 "%rh24", "%rh25", "%rh26", "%rh27", "%rh28", "%rh29", "%rh30", "%rh31"
1292 #endif
1294 /* Table of valid machine attributes. */
1296 static const struct attribute_spec rs6000_attribute_table[] =
1298 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
1299 affects_type_identity } */
1300 { "altivec", 1, 1, false, true, false, rs6000_handle_altivec_attribute,
1301 false },
1302 { "longcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1303 false },
1304 { "shortcall", 0, 0, false, true, true, rs6000_handle_longcall_attribute,
1305 false },
1306 { "ms_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1307 false },
1308 { "gcc_struct", 0, 0, false, false, false, rs6000_handle_struct_attribute,
1309 false },
1310 #ifdef SUBTARGET_ATTRIBUTE_TABLE
1311 SUBTARGET_ATTRIBUTE_TABLE,
1312 #endif
1313 { NULL, 0, 0, false, false, false, NULL, false }
1316 #ifndef TARGET_PROFILE_KERNEL
1317 #define TARGET_PROFILE_KERNEL 0
1318 #endif
1320 /* The VRSAVE bitmask puts bit %v0 as the most significant bit. */
1321 #define ALTIVEC_REG_BIT(REGNO) (0x80000000 >> ((REGNO) - FIRST_ALTIVEC_REGNO))
1323 /* Initialize the GCC target structure. */
1324 #undef TARGET_ATTRIBUTE_TABLE
1325 #define TARGET_ATTRIBUTE_TABLE rs6000_attribute_table
1326 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
1327 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES rs6000_set_default_type_attributes
1328 #undef TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P
1329 #define TARGET_ATTRIBUTE_TAKES_IDENTIFIER_P rs6000_attribute_takes_identifier_p
1331 #undef TARGET_ASM_ALIGNED_DI_OP
1332 #define TARGET_ASM_ALIGNED_DI_OP DOUBLE_INT_ASM_OP
1334 /* Default unaligned ops are only provided for ELF. Find the ops needed
1335 for non-ELF systems. */
1336 #ifndef OBJECT_FORMAT_ELF
1337 #if TARGET_XCOFF
1338 /* For XCOFF. rs6000_assemble_integer will handle unaligned DIs on
1339 64-bit targets. */
1340 #undef TARGET_ASM_UNALIGNED_HI_OP
1341 #define TARGET_ASM_UNALIGNED_HI_OP "\t.vbyte\t2,"
1342 #undef TARGET_ASM_UNALIGNED_SI_OP
1343 #define TARGET_ASM_UNALIGNED_SI_OP "\t.vbyte\t4,"
1344 #undef TARGET_ASM_UNALIGNED_DI_OP
1345 #define TARGET_ASM_UNALIGNED_DI_OP "\t.vbyte\t8,"
1346 #else
1347 /* For Darwin. */
1348 #undef TARGET_ASM_UNALIGNED_HI_OP
1349 #define TARGET_ASM_UNALIGNED_HI_OP "\t.short\t"
1350 #undef TARGET_ASM_UNALIGNED_SI_OP
1351 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
1352 #undef TARGET_ASM_UNALIGNED_DI_OP
1353 #define TARGET_ASM_UNALIGNED_DI_OP "\t.quad\t"
1354 #undef TARGET_ASM_ALIGNED_DI_OP
1355 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
1356 #endif
1357 #endif
1359 /* This hook deals with fixups for relocatable code and DI-mode objects
1360 in 64-bit code. */
1361 #undef TARGET_ASM_INTEGER
1362 #define TARGET_ASM_INTEGER rs6000_assemble_integer
1364 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
1365 #undef TARGET_ASM_ASSEMBLE_VISIBILITY
1366 #define TARGET_ASM_ASSEMBLE_VISIBILITY rs6000_assemble_visibility
1367 #endif
1369 #undef TARGET_SET_UP_BY_PROLOGUE
1370 #define TARGET_SET_UP_BY_PROLOGUE rs6000_set_up_by_prologue
1372 #undef TARGET_HAVE_TLS
1373 #define TARGET_HAVE_TLS HAVE_AS_TLS
1375 #undef TARGET_CANNOT_FORCE_CONST_MEM
1376 #define TARGET_CANNOT_FORCE_CONST_MEM rs6000_cannot_force_const_mem
1378 #undef TARGET_DELEGITIMIZE_ADDRESS
1379 #define TARGET_DELEGITIMIZE_ADDRESS rs6000_delegitimize_address
1381 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
1382 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P rs6000_const_not_ok_for_debug_p
1384 #undef TARGET_ASM_FUNCTION_PROLOGUE
1385 #define TARGET_ASM_FUNCTION_PROLOGUE rs6000_output_function_prologue
1386 #undef TARGET_ASM_FUNCTION_EPILOGUE
1387 #define TARGET_ASM_FUNCTION_EPILOGUE rs6000_output_function_epilogue
1389 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
1390 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA rs6000_output_addr_const_extra
1392 #undef TARGET_LEGITIMIZE_ADDRESS
1393 #define TARGET_LEGITIMIZE_ADDRESS rs6000_legitimize_address
1395 #undef TARGET_SCHED_VARIABLE_ISSUE
1396 #define TARGET_SCHED_VARIABLE_ISSUE rs6000_variable_issue
1398 #undef TARGET_SCHED_ISSUE_RATE
1399 #define TARGET_SCHED_ISSUE_RATE rs6000_issue_rate
1400 #undef TARGET_SCHED_ADJUST_COST
1401 #define TARGET_SCHED_ADJUST_COST rs6000_adjust_cost
1402 #undef TARGET_SCHED_ADJUST_PRIORITY
1403 #define TARGET_SCHED_ADJUST_PRIORITY rs6000_adjust_priority
1404 #undef TARGET_SCHED_IS_COSTLY_DEPENDENCE
1405 #define TARGET_SCHED_IS_COSTLY_DEPENDENCE rs6000_is_costly_dependence
1406 #undef TARGET_SCHED_INIT
1407 #define TARGET_SCHED_INIT rs6000_sched_init
1408 #undef TARGET_SCHED_FINISH
1409 #define TARGET_SCHED_FINISH rs6000_sched_finish
1410 #undef TARGET_SCHED_REORDER
1411 #define TARGET_SCHED_REORDER rs6000_sched_reorder
1412 #undef TARGET_SCHED_REORDER2
1413 #define TARGET_SCHED_REORDER2 rs6000_sched_reorder2
1415 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1416 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD rs6000_use_sched_lookahead
1418 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
1419 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD rs6000_use_sched_lookahead_guard
1421 #undef TARGET_SCHED_ALLOC_SCHED_CONTEXT
1422 #define TARGET_SCHED_ALLOC_SCHED_CONTEXT rs6000_alloc_sched_context
1423 #undef TARGET_SCHED_INIT_SCHED_CONTEXT
1424 #define TARGET_SCHED_INIT_SCHED_CONTEXT rs6000_init_sched_context
1425 #undef TARGET_SCHED_SET_SCHED_CONTEXT
1426 #define TARGET_SCHED_SET_SCHED_CONTEXT rs6000_set_sched_context
1427 #undef TARGET_SCHED_FREE_SCHED_CONTEXT
1428 #define TARGET_SCHED_FREE_SCHED_CONTEXT rs6000_free_sched_context
1430 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
1431 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD rs6000_builtin_mask_for_load
1432 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
1433 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
1434 rs6000_builtin_support_vector_misalignment
1435 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
1436 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE rs6000_vector_alignment_reachable
1437 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
1438 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
1439 rs6000_builtin_vectorization_cost
1440 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
1441 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE \
1442 rs6000_preferred_simd_mode
1443 #undef TARGET_VECTORIZE_INIT_COST
1444 #define TARGET_VECTORIZE_INIT_COST rs6000_init_cost
1445 #undef TARGET_VECTORIZE_ADD_STMT_COST
1446 #define TARGET_VECTORIZE_ADD_STMT_COST rs6000_add_stmt_cost
1447 #undef TARGET_VECTORIZE_FINISH_COST
1448 #define TARGET_VECTORIZE_FINISH_COST rs6000_finish_cost
1449 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
1450 #define TARGET_VECTORIZE_DESTROY_COST_DATA rs6000_destroy_cost_data
1452 #undef TARGET_INIT_BUILTINS
1453 #define TARGET_INIT_BUILTINS rs6000_init_builtins
1454 #undef TARGET_BUILTIN_DECL
1455 #define TARGET_BUILTIN_DECL rs6000_builtin_decl
1457 #undef TARGET_EXPAND_BUILTIN
1458 #define TARGET_EXPAND_BUILTIN rs6000_expand_builtin
1460 #undef TARGET_MANGLE_TYPE
1461 #define TARGET_MANGLE_TYPE rs6000_mangle_type
1463 #undef TARGET_INIT_LIBFUNCS
1464 #define TARGET_INIT_LIBFUNCS rs6000_init_libfuncs
1466 #if TARGET_MACHO
1467 #undef TARGET_BINDS_LOCAL_P
1468 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1469 #endif
1471 #undef TARGET_MS_BITFIELD_LAYOUT_P
1472 #define TARGET_MS_BITFIELD_LAYOUT_P rs6000_ms_bitfield_layout_p
1474 #undef TARGET_ASM_OUTPUT_MI_THUNK
1475 #define TARGET_ASM_OUTPUT_MI_THUNK rs6000_output_mi_thunk
1477 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1478 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
1480 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1481 #define TARGET_FUNCTION_OK_FOR_SIBCALL rs6000_function_ok_for_sibcall
1483 #undef TARGET_REGISTER_MOVE_COST
1484 #define TARGET_REGISTER_MOVE_COST rs6000_register_move_cost
1485 #undef TARGET_MEMORY_MOVE_COST
1486 #define TARGET_MEMORY_MOVE_COST rs6000_memory_move_cost
1487 #undef TARGET_RTX_COSTS
1488 #define TARGET_RTX_COSTS rs6000_rtx_costs
1489 #undef TARGET_ADDRESS_COST
1490 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
1492 #undef TARGET_DWARF_REGISTER_SPAN
1493 #define TARGET_DWARF_REGISTER_SPAN rs6000_dwarf_register_span
1495 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
1496 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
1498 #undef TARGET_MEMBER_TYPE_FORCES_BLK
1499 #define TARGET_MEMBER_TYPE_FORCES_BLK rs6000_member_type_forces_blk
1501 /* On rs6000, function arguments are promoted, as are function return
1502 values. */
1503 #undef TARGET_PROMOTE_FUNCTION_MODE
1504 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
1506 #undef TARGET_RETURN_IN_MEMORY
1507 #define TARGET_RETURN_IN_MEMORY rs6000_return_in_memory
1509 #undef TARGET_RETURN_IN_MSB
1510 #define TARGET_RETURN_IN_MSB rs6000_return_in_msb
1512 #undef TARGET_SETUP_INCOMING_VARARGS
1513 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
1515 /* Always strict argument naming on rs6000. */
1516 #undef TARGET_STRICT_ARGUMENT_NAMING
1517 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
1518 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
1519 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED hook_bool_CUMULATIVE_ARGS_true
1520 #undef TARGET_SPLIT_COMPLEX_ARG
1521 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
1522 #undef TARGET_MUST_PASS_IN_STACK
1523 #define TARGET_MUST_PASS_IN_STACK rs6000_must_pass_in_stack
1524 #undef TARGET_PASS_BY_REFERENCE
1525 #define TARGET_PASS_BY_REFERENCE rs6000_pass_by_reference
1526 #undef TARGET_ARG_PARTIAL_BYTES
1527 #define TARGET_ARG_PARTIAL_BYTES rs6000_arg_partial_bytes
1528 #undef TARGET_FUNCTION_ARG_ADVANCE
1529 #define TARGET_FUNCTION_ARG_ADVANCE rs6000_function_arg_advance
1530 #undef TARGET_FUNCTION_ARG
1531 #define TARGET_FUNCTION_ARG rs6000_function_arg
1532 #undef TARGET_FUNCTION_ARG_BOUNDARY
1533 #define TARGET_FUNCTION_ARG_BOUNDARY rs6000_function_arg_boundary
1535 #undef TARGET_BUILD_BUILTIN_VA_LIST
1536 #define TARGET_BUILD_BUILTIN_VA_LIST rs6000_build_builtin_va_list
1538 #undef TARGET_EXPAND_BUILTIN_VA_START
1539 #define TARGET_EXPAND_BUILTIN_VA_START rs6000_va_start
1541 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1542 #define TARGET_GIMPLIFY_VA_ARG_EXPR rs6000_gimplify_va_arg
1544 #undef TARGET_EH_RETURN_FILTER_MODE
1545 #define TARGET_EH_RETURN_FILTER_MODE rs6000_eh_return_filter_mode
1547 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1548 #define TARGET_SCALAR_MODE_SUPPORTED_P rs6000_scalar_mode_supported_p
1550 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1551 #define TARGET_VECTOR_MODE_SUPPORTED_P rs6000_vector_mode_supported_p
1553 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
1554 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN invalid_arg_for_unprototyped_fn
1556 #undef TARGET_ASM_LOOP_ALIGN_MAX_SKIP
1557 #define TARGET_ASM_LOOP_ALIGN_MAX_SKIP rs6000_loop_align_max_skip
1559 #undef TARGET_OPTION_OVERRIDE
1560 #define TARGET_OPTION_OVERRIDE rs6000_option_override
1562 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1563 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
1564 rs6000_builtin_vectorized_function
1566 #if !TARGET_MACHO
1567 #undef TARGET_STACK_PROTECT_FAIL
1568 #define TARGET_STACK_PROTECT_FAIL rs6000_stack_protect_fail
1569 #endif
1571 /* MPC604EUM 3.5.2 Weak Consistency between Multiple Processors
1572 The PowerPC architecture requires only weak consistency among
1573 processors--that is, memory accesses between processors need not be
1574 sequentially consistent and memory accesses among processors can occur
1575 in any order. The ability to order memory accesses weakly provides
1576 opportunities for more efficient use of the system bus. Unless a
1577 dependency exists, the 604e allows read operations to precede store
1578 operations. */
1579 #undef TARGET_RELAXED_ORDERING
1580 #define TARGET_RELAXED_ORDERING true
1582 #ifdef HAVE_AS_TLS
1583 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1584 #define TARGET_ASM_OUTPUT_DWARF_DTPREL rs6000_output_dwarf_dtprel
1585 #endif
1587 /* Use a 32-bit anchor range. This leads to sequences like:
1589 addis tmp,anchor,high
1590 add dest,tmp,low
1592 where tmp itself acts as an anchor, and can be shared between
1593 accesses to the same 64k page. */
1594 #undef TARGET_MIN_ANCHOR_OFFSET
1595 #define TARGET_MIN_ANCHOR_OFFSET -0x7fffffff - 1
1596 #undef TARGET_MAX_ANCHOR_OFFSET
1597 #define TARGET_MAX_ANCHOR_OFFSET 0x7fffffff
1598 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1599 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P rs6000_use_blocks_for_constant_p
1600 #undef TARGET_USE_BLOCKS_FOR_DECL_P
1601 #define TARGET_USE_BLOCKS_FOR_DECL_P rs6000_use_blocks_for_decl_p
1603 #undef TARGET_BUILTIN_RECIPROCAL
1604 #define TARGET_BUILTIN_RECIPROCAL rs6000_builtin_reciprocal
1606 #undef TARGET_EXPAND_TO_RTL_HOOK
1607 #define TARGET_EXPAND_TO_RTL_HOOK rs6000_alloc_sdmode_stack_slot
1609 #undef TARGET_INSTANTIATE_DECLS
1610 #define TARGET_INSTANTIATE_DECLS rs6000_instantiate_decls
1612 #undef TARGET_SECONDARY_RELOAD
1613 #define TARGET_SECONDARY_RELOAD rs6000_secondary_reload
1615 #undef TARGET_LEGITIMATE_ADDRESS_P
1616 #define TARGET_LEGITIMATE_ADDRESS_P rs6000_legitimate_address_p
1618 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
1619 #define TARGET_MODE_DEPENDENT_ADDRESS_P rs6000_mode_dependent_address_p
1621 #undef TARGET_LRA_P
1622 #define TARGET_LRA_P rs6000_lra_p
1624 #undef TARGET_CAN_ELIMINATE
1625 #define TARGET_CAN_ELIMINATE rs6000_can_eliminate
1627 #undef TARGET_CONDITIONAL_REGISTER_USAGE
1628 #define TARGET_CONDITIONAL_REGISTER_USAGE rs6000_conditional_register_usage
1630 #undef TARGET_TRAMPOLINE_INIT
1631 #define TARGET_TRAMPOLINE_INIT rs6000_trampoline_init
1633 #undef TARGET_FUNCTION_VALUE
1634 #define TARGET_FUNCTION_VALUE rs6000_function_value
1636 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
1637 #define TARGET_OPTION_VALID_ATTRIBUTE_P rs6000_valid_attribute_p
1639 #undef TARGET_OPTION_SAVE
1640 #define TARGET_OPTION_SAVE rs6000_function_specific_save
1642 #undef TARGET_OPTION_RESTORE
1643 #define TARGET_OPTION_RESTORE rs6000_function_specific_restore
1645 #undef TARGET_OPTION_PRINT
1646 #define TARGET_OPTION_PRINT rs6000_function_specific_print
1648 #undef TARGET_CAN_INLINE_P
1649 #define TARGET_CAN_INLINE_P rs6000_can_inline_p
1651 #undef TARGET_SET_CURRENT_FUNCTION
1652 #define TARGET_SET_CURRENT_FUNCTION rs6000_set_current_function
1654 #undef TARGET_LEGITIMATE_CONSTANT_P
1655 #define TARGET_LEGITIMATE_CONSTANT_P rs6000_legitimate_constant_p
1657 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
1658 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK rs6000_vectorize_vec_perm_const_ok
1660 #undef TARGET_CAN_USE_DOLOOP_P
1661 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
1663 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
1664 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV rs6000_atomic_assign_expand_fenv
1667 /* Processor table. */
1668 struct rs6000_ptt
1670 const char *const name; /* Canonical processor name. */
1671 const enum processor_type processor; /* Processor type enum value. */
1672 const HOST_WIDE_INT target_enable; /* Target flags to enable. */
1675 static struct rs6000_ptt const processor_target_table[] =
1677 #define RS6000_CPU(NAME, CPU, FLAGS) { NAME, CPU, FLAGS },
1678 #include "rs6000-cpus.def"
1679 #undef RS6000_CPU
1682 /* Look up a processor name for -mcpu=xxx and -mtune=xxx. Return -1 if the
1683 name is invalid. */
1685 static int
1686 rs6000_cpu_name_lookup (const char *name)
1688 size_t i;
1690 if (name != NULL)
1692 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
1693 if (! strcmp (name, processor_target_table[i].name))
1694 return (int)i;
1697 return -1;
1701 /* Return number of consecutive hard regs needed starting at reg REGNO
1702 to hold something of mode MODE.
1703 This is ordinarily the length in words of a value of mode MODE
1704 but can be less for certain modes in special long registers.
1706 For the SPE, GPRs are 64 bits but only 32 bits are visible in
1707 scalar instructions. The upper 32 bits are only available to the
1708 SIMD instructions.
1710 POWER and PowerPC GPRs hold 32 bits worth;
1711 PowerPC64 GPRs and FPRs point register holds 64 bits worth. */
1713 static int
1714 rs6000_hard_regno_nregs_internal (int regno, machine_mode mode)
1716 unsigned HOST_WIDE_INT reg_size;
1718 /* TF/TD modes are special in that they always take 2 registers. */
1719 if (FP_REGNO_P (regno))
1720 reg_size = ((VECTOR_MEM_VSX_P (mode) && mode != TDmode && mode != TFmode)
1721 ? UNITS_PER_VSX_WORD
1722 : UNITS_PER_FP_WORD);
1724 else if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1725 reg_size = UNITS_PER_SPE_WORD;
1727 else if (ALTIVEC_REGNO_P (regno))
1728 reg_size = UNITS_PER_ALTIVEC_WORD;
1730 /* The value returned for SCmode in the E500 double case is 2 for
1731 ABI compatibility; storing an SCmode value in a single register
1732 would require function_arg and rs6000_spe_function_arg to handle
1733 SCmode so as to pass the value correctly in a pair of
1734 registers. */
1735 else if (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode) && mode != SCmode
1736 && !DECIMAL_FLOAT_MODE_P (mode) && SPE_SIMD_REGNO_P (regno))
1737 reg_size = UNITS_PER_FP_WORD;
1739 else
1740 reg_size = UNITS_PER_WORD;
1742 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
1745 /* Value is 1 if hard register REGNO can hold a value of machine-mode
1746 MODE. */
1747 static int
1748 rs6000_hard_regno_mode_ok (int regno, machine_mode mode)
1750 int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
1752 /* PTImode can only go in GPRs. Quad word memory operations require even/odd
1753 register combinations, and use PTImode where we need to deal with quad
1754 word memory operations. Don't allow quad words in the argument or frame
1755 pointer registers, just registers 0..31. */
1756 if (mode == PTImode)
1757 return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1758 && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
1759 && ((regno & 1) == 0));
1761 /* VSX registers that overlap the FPR registers are larger than for non-VSX
1762 implementations. Don't allow an item to be split between a FP register
1763 and an Altivec register. Allow TImode in all VSX registers if the user
1764 asked for it. */
1765 if (TARGET_VSX && VSX_REGNO_P (regno)
1766 && (VECTOR_MEM_VSX_P (mode)
1767 || reg_addr[mode].scalar_in_vmx_p
1768 || (TARGET_VSX_TIMODE && mode == TImode)
1769 || (TARGET_VADDUQM && mode == V1TImode)))
1771 if (FP_REGNO_P (regno))
1772 return FP_REGNO_P (last_regno);
1774 if (ALTIVEC_REGNO_P (regno))
1776 if (GET_MODE_SIZE (mode) != 16 && !reg_addr[mode].scalar_in_vmx_p)
1777 return 0;
1779 return ALTIVEC_REGNO_P (last_regno);
1783 /* The GPRs can hold any mode, but values bigger than one register
1784 cannot go past R31. */
1785 if (INT_REGNO_P (regno))
1786 return INT_REGNO_P (last_regno);
1788 /* The float registers (except for VSX vector modes) can only hold floating
1789 modes and DImode. */
1790 if (FP_REGNO_P (regno))
1792 if (SCALAR_FLOAT_MODE_P (mode)
1793 && (mode != TDmode || (regno % 2) == 0)
1794 && FP_REGNO_P (last_regno))
1795 return 1;
1797 if (GET_MODE_CLASS (mode) == MODE_INT
1798 && GET_MODE_SIZE (mode) == UNITS_PER_FP_WORD)
1799 return 1;
1801 if (PAIRED_SIMD_REGNO_P (regno) && TARGET_PAIRED_FLOAT
1802 && PAIRED_VECTOR_MODE (mode))
1803 return 1;
1805 return 0;
1808 /* The CR register can only hold CC modes. */
1809 if (CR_REGNO_P (regno))
1810 return GET_MODE_CLASS (mode) == MODE_CC;
1812 if (CA_REGNO_P (regno))
1813 return mode == Pmode || mode == SImode;
1815 /* AltiVec only in AldyVec registers. */
1816 if (ALTIVEC_REGNO_P (regno))
1817 return (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode)
1818 || mode == V1TImode);
1820 /* ...but GPRs can hold SIMD data on the SPE in one register. */
1821 if (SPE_SIMD_REGNO_P (regno) && TARGET_SPE && SPE_VECTOR_MODE (mode))
1822 return 1;
1824 /* We cannot put non-VSX TImode or PTImode anywhere except general register
1825 and it must be able to fit within the register set. */
1827 return GET_MODE_SIZE (mode) <= UNITS_PER_WORD;
1830 /* Print interesting facts about registers. */
1831 static void
1832 rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
1834 int r, m;
1836 for (r = first_regno; r <= last_regno; ++r)
1838 const char *comma = "";
1839 int len;
1841 if (first_regno == last_regno)
1842 fprintf (stderr, "%s:\t", reg_name);
1843 else
1844 fprintf (stderr, "%s%d:\t", reg_name, r - first_regno);
1846 len = 8;
1847 for (m = 0; m < NUM_MACHINE_MODES; ++m)
1848 if (rs6000_hard_regno_mode_ok_p[m][r] && rs6000_hard_regno_nregs[m][r])
1850 if (len > 70)
1852 fprintf (stderr, ",\n\t");
1853 len = 8;
1854 comma = "";
1857 if (rs6000_hard_regno_nregs[m][r] > 1)
1858 len += fprintf (stderr, "%s%s/%d", comma, GET_MODE_NAME (m),
1859 rs6000_hard_regno_nregs[m][r]);
1860 else
1861 len += fprintf (stderr, "%s%s", comma, GET_MODE_NAME (m));
1863 comma = ", ";
1866 if (call_used_regs[r])
1868 if (len > 70)
1870 fprintf (stderr, ",\n\t");
1871 len = 8;
1872 comma = "";
1875 len += fprintf (stderr, "%s%s", comma, "call-used");
1876 comma = ", ";
1879 if (fixed_regs[r])
1881 if (len > 70)
1883 fprintf (stderr, ",\n\t");
1884 len = 8;
1885 comma = "";
1888 len += fprintf (stderr, "%s%s", comma, "fixed");
1889 comma = ", ";
1892 if (len > 70)
1894 fprintf (stderr, ",\n\t");
1895 comma = "";
1898 len += fprintf (stderr, "%sreg-class = %s", comma,
1899 reg_class_names[(int)rs6000_regno_regclass[r]]);
1900 comma = ", ";
1902 if (len > 70)
1904 fprintf (stderr, ",\n\t");
1905 comma = "";
1908 fprintf (stderr, "%sregno = %d\n", comma, r);
1912 static const char *
1913 rs6000_debug_vector_unit (enum rs6000_vector v)
1915 const char *ret;
1917 switch (v)
1919 case VECTOR_NONE: ret = "none"; break;
1920 case VECTOR_ALTIVEC: ret = "altivec"; break;
1921 case VECTOR_VSX: ret = "vsx"; break;
1922 case VECTOR_P8_VECTOR: ret = "p8_vector"; break;
1923 case VECTOR_PAIRED: ret = "paired"; break;
1924 case VECTOR_SPE: ret = "spe"; break;
1925 case VECTOR_OTHER: ret = "other"; break;
1926 default: ret = "unknown"; break;
1929 return ret;
1932 /* Inner function printing just the address mask for a particular reload
1933 register class. */
1934 DEBUG_FUNCTION char *
1935 rs6000_debug_addr_mask (addr_mask_type mask, bool keep_spaces)
1937 static char ret[8];
1938 char *p = ret;
1940 if ((mask & RELOAD_REG_VALID) != 0)
1941 *p++ = 'v';
1942 else if (keep_spaces)
1943 *p++ = ' ';
1945 if ((mask & RELOAD_REG_MULTIPLE) != 0)
1946 *p++ = 'm';
1947 else if (keep_spaces)
1948 *p++ = ' ';
1950 if ((mask & RELOAD_REG_INDEXED) != 0)
1951 *p++ = 'i';
1952 else if (keep_spaces)
1953 *p++ = ' ';
1955 if ((mask & RELOAD_REG_OFFSET) != 0)
1956 *p++ = 'o';
1957 else if (keep_spaces)
1958 *p++ = ' ';
1960 if ((mask & RELOAD_REG_PRE_INCDEC) != 0)
1961 *p++ = '+';
1962 else if (keep_spaces)
1963 *p++ = ' ';
1965 if ((mask & RELOAD_REG_PRE_MODIFY) != 0)
1966 *p++ = '+';
1967 else if (keep_spaces)
1968 *p++ = ' ';
1970 if ((mask & RELOAD_REG_AND_M16) != 0)
1971 *p++ = '&';
1972 else if (keep_spaces)
1973 *p++ = ' ';
1975 *p = '\0';
1977 return ret;
1980 /* Print the address masks in a human readble fashion. */
1981 DEBUG_FUNCTION void
1982 rs6000_debug_print_mode (ssize_t m)
1984 ssize_t rc;
1986 fprintf (stderr, "Mode: %-5s", GET_MODE_NAME (m));
1987 for (rc = 0; rc < N_RELOAD_REG; rc++)
1988 fprintf (stderr, " %s: %s", reload_reg_map[rc].name,
1989 rs6000_debug_addr_mask (reg_addr[m].addr_mask[rc], true));
1991 if (rs6000_vector_unit[m] != VECTOR_NONE
1992 || rs6000_vector_mem[m] != VECTOR_NONE
1993 || (reg_addr[m].reload_store != CODE_FOR_nothing)
1994 || (reg_addr[m].reload_load != CODE_FOR_nothing)
1995 || reg_addr[m].scalar_in_vmx_p)
1997 fprintf (stderr,
1998 " Vector-arith=%-10s Vector-mem=%-10s Reload=%c%c Upper=%c",
1999 rs6000_debug_vector_unit (rs6000_vector_unit[m]),
2000 rs6000_debug_vector_unit (rs6000_vector_mem[m]),
2001 (reg_addr[m].reload_store != CODE_FOR_nothing) ? 's' : '*',
2002 (reg_addr[m].reload_load != CODE_FOR_nothing) ? 'l' : '*',
2003 (reg_addr[m].scalar_in_vmx_p) ? 'y' : 'n');
2006 fputs ("\n", stderr);
2009 #define DEBUG_FMT_ID "%-32s= "
2010 #define DEBUG_FMT_D DEBUG_FMT_ID "%d\n"
2011 #define DEBUG_FMT_WX DEBUG_FMT_ID "%#.12" HOST_WIDE_INT_PRINT "x: "
2012 #define DEBUG_FMT_S DEBUG_FMT_ID "%s\n"
2014 /* Print various interesting information with -mdebug=reg. */
2015 static void
2016 rs6000_debug_reg_global (void)
2018 static const char *const tf[2] = { "false", "true" };
2019 const char *nl = (const char *)0;
2020 int m;
2021 size_t m1, m2, v;
2022 char costly_num[20];
2023 char nop_num[20];
2024 char flags_buffer[40];
2025 const char *costly_str;
2026 const char *nop_str;
2027 const char *trace_str;
2028 const char *abi_str;
2029 const char *cmodel_str;
2030 struct cl_target_option cl_opts;
2032 /* Modes we want tieable information on. */
2033 static const machine_mode print_tieable_modes[] = {
2034 QImode,
2035 HImode,
2036 SImode,
2037 DImode,
2038 TImode,
2039 PTImode,
2040 SFmode,
2041 DFmode,
2042 TFmode,
2043 SDmode,
2044 DDmode,
2045 TDmode,
2046 V8QImode,
2047 V4HImode,
2048 V2SImode,
2049 V16QImode,
2050 V8HImode,
2051 V4SImode,
2052 V2DImode,
2053 V1TImode,
2054 V32QImode,
2055 V16HImode,
2056 V8SImode,
2057 V4DImode,
2058 V2TImode,
2059 V2SFmode,
2060 V4SFmode,
2061 V2DFmode,
2062 V8SFmode,
2063 V4DFmode,
2064 CCmode,
2065 CCUNSmode,
2066 CCEQmode,
2069 /* Virtual regs we are interested in. */
2070 const static struct {
2071 int regno; /* register number. */
2072 const char *name; /* register name. */
2073 } virtual_regs[] = {
2074 { STACK_POINTER_REGNUM, "stack pointer:" },
2075 { TOC_REGNUM, "toc: " },
2076 { STATIC_CHAIN_REGNUM, "static chain: " },
2077 { RS6000_PIC_OFFSET_TABLE_REGNUM, "pic offset: " },
2078 { HARD_FRAME_POINTER_REGNUM, "hard frame: " },
2079 { ARG_POINTER_REGNUM, "arg pointer: " },
2080 { FRAME_POINTER_REGNUM, "frame pointer:" },
2081 { FIRST_PSEUDO_REGISTER, "first pseudo: " },
2082 { FIRST_VIRTUAL_REGISTER, "first virtual:" },
2083 { VIRTUAL_INCOMING_ARGS_REGNUM, "incoming_args:" },
2084 { VIRTUAL_STACK_VARS_REGNUM, "stack_vars: " },
2085 { VIRTUAL_STACK_DYNAMIC_REGNUM, "stack_dynamic:" },
2086 { VIRTUAL_OUTGOING_ARGS_REGNUM, "outgoing_args:" },
2087 { VIRTUAL_CFA_REGNUM, "cfa (frame): " },
2088 { VIRTUAL_PREFERRED_STACK_BOUNDARY_REGNUM, "stack boundry:" },
2089 { LAST_VIRTUAL_REGISTER, "last virtual: " },
2092 fputs ("\nHard register information:\n", stderr);
2093 rs6000_debug_reg_print (FIRST_GPR_REGNO, LAST_GPR_REGNO, "gr");
2094 rs6000_debug_reg_print (FIRST_FPR_REGNO, LAST_FPR_REGNO, "fp");
2095 rs6000_debug_reg_print (FIRST_ALTIVEC_REGNO,
2096 LAST_ALTIVEC_REGNO,
2097 "vs");
2098 rs6000_debug_reg_print (LR_REGNO, LR_REGNO, "lr");
2099 rs6000_debug_reg_print (CTR_REGNO, CTR_REGNO, "ctr");
2100 rs6000_debug_reg_print (CR0_REGNO, CR7_REGNO, "cr");
2101 rs6000_debug_reg_print (CA_REGNO, CA_REGNO, "ca");
2102 rs6000_debug_reg_print (VRSAVE_REGNO, VRSAVE_REGNO, "vrsave");
2103 rs6000_debug_reg_print (VSCR_REGNO, VSCR_REGNO, "vscr");
2104 rs6000_debug_reg_print (SPE_ACC_REGNO, SPE_ACC_REGNO, "spe_a");
2105 rs6000_debug_reg_print (SPEFSCR_REGNO, SPEFSCR_REGNO, "spe_f");
2107 fputs ("\nVirtual/stack/frame registers:\n", stderr);
2108 for (v = 0; v < ARRAY_SIZE (virtual_regs); v++)
2109 fprintf (stderr, "%s regno = %3d\n", virtual_regs[v].name, virtual_regs[v].regno);
2111 fprintf (stderr,
2112 "\n"
2113 "d reg_class = %s\n"
2114 "f reg_class = %s\n"
2115 "v reg_class = %s\n"
2116 "wa reg_class = %s\n"
2117 "wd reg_class = %s\n"
2118 "wf reg_class = %s\n"
2119 "wg reg_class = %s\n"
2120 "wh reg_class = %s\n"
2121 "wi reg_class = %s\n"
2122 "wj reg_class = %s\n"
2123 "wk reg_class = %s\n"
2124 "wl reg_class = %s\n"
2125 "wm reg_class = %s\n"
2126 "wr reg_class = %s\n"
2127 "ws reg_class = %s\n"
2128 "wt reg_class = %s\n"
2129 "wu reg_class = %s\n"
2130 "wv reg_class = %s\n"
2131 "ww reg_class = %s\n"
2132 "wx reg_class = %s\n"
2133 "wy reg_class = %s\n"
2134 "wz reg_class = %s\n"
2135 "\n",
2136 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_d]],
2137 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_f]],
2138 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_v]],
2139 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wa]],
2140 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wd]],
2141 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
2142 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
2143 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wh]],
2144 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wi]],
2145 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wj]],
2146 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wk]],
2147 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
2148 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
2149 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
2150 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
2151 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
2152 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wu]],
2153 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
2154 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ww]],
2155 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
2156 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wy]],
2157 reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
2159 nl = "\n";
2160 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2161 rs6000_debug_print_mode (m);
2163 fputs ("\n", stderr);
2165 for (m1 = 0; m1 < ARRAY_SIZE (print_tieable_modes); m1++)
2167 machine_mode mode1 = print_tieable_modes[m1];
2168 bool first_time = true;
2170 nl = (const char *)0;
2171 for (m2 = 0; m2 < ARRAY_SIZE (print_tieable_modes); m2++)
2173 machine_mode mode2 = print_tieable_modes[m2];
2174 if (mode1 != mode2 && MODES_TIEABLE_P (mode1, mode2))
2176 if (first_time)
2178 fprintf (stderr, "Tieable modes %s:", GET_MODE_NAME (mode1));
2179 nl = "\n";
2180 first_time = false;
2183 fprintf (stderr, " %s", GET_MODE_NAME (mode2));
2187 if (!first_time)
2188 fputs ("\n", stderr);
2191 if (nl)
2192 fputs (nl, stderr);
2194 if (rs6000_recip_control)
2196 fprintf (stderr, "\nReciprocal mask = 0x%x\n", rs6000_recip_control);
2198 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2199 if (rs6000_recip_bits[m])
2201 fprintf (stderr,
2202 "Reciprocal estimate mode: %-5s divide: %s rsqrt: %s\n",
2203 GET_MODE_NAME (m),
2204 (RS6000_RECIP_AUTO_RE_P (m)
2205 ? "auto"
2206 : (RS6000_RECIP_HAVE_RE_P (m) ? "have" : "none")),
2207 (RS6000_RECIP_AUTO_RSQRTE_P (m)
2208 ? "auto"
2209 : (RS6000_RECIP_HAVE_RSQRTE_P (m) ? "have" : "none")));
2212 fputs ("\n", stderr);
2215 if (rs6000_cpu_index >= 0)
2217 const char *name = processor_target_table[rs6000_cpu_index].name;
2218 HOST_WIDE_INT flags
2219 = processor_target_table[rs6000_cpu_index].target_enable;
2221 sprintf (flags_buffer, "-mcpu=%s flags", name);
2222 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2224 else
2225 fprintf (stderr, DEBUG_FMT_S, "cpu", "<none>");
2227 if (rs6000_tune_index >= 0)
2229 const char *name = processor_target_table[rs6000_tune_index].name;
2230 HOST_WIDE_INT flags
2231 = processor_target_table[rs6000_tune_index].target_enable;
2233 sprintf (flags_buffer, "-mtune=%s flags", name);
2234 rs6000_print_isa_options (stderr, 0, flags_buffer, flags);
2236 else
2237 fprintf (stderr, DEBUG_FMT_S, "tune", "<none>");
2239 cl_target_option_save (&cl_opts, &global_options);
2240 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags",
2241 rs6000_isa_flags);
2243 rs6000_print_isa_options (stderr, 0, "rs6000_isa_flags_explicit",
2244 rs6000_isa_flags_explicit);
2246 rs6000_print_builtin_options (stderr, 0, "rs6000_builtin_mask",
2247 rs6000_builtin_mask);
2249 rs6000_print_isa_options (stderr, 0, "TARGET_DEFAULT", TARGET_DEFAULT);
2251 fprintf (stderr, DEBUG_FMT_S, "--with-cpu default",
2252 OPTION_TARGET_CPU_DEFAULT ? OPTION_TARGET_CPU_DEFAULT : "<none>");
2254 switch (rs6000_sched_costly_dep)
2256 case max_dep_latency:
2257 costly_str = "max_dep_latency";
2258 break;
2260 case no_dep_costly:
2261 costly_str = "no_dep_costly";
2262 break;
2264 case all_deps_costly:
2265 costly_str = "all_deps_costly";
2266 break;
2268 case true_store_to_load_dep_costly:
2269 costly_str = "true_store_to_load_dep_costly";
2270 break;
2272 case store_to_load_dep_costly:
2273 costly_str = "store_to_load_dep_costly";
2274 break;
2276 default:
2277 costly_str = costly_num;
2278 sprintf (costly_num, "%d", (int)rs6000_sched_costly_dep);
2279 break;
2282 fprintf (stderr, DEBUG_FMT_S, "sched_costly_dep", costly_str);
2284 switch (rs6000_sched_insert_nops)
2286 case sched_finish_regroup_exact:
2287 nop_str = "sched_finish_regroup_exact";
2288 break;
2290 case sched_finish_pad_groups:
2291 nop_str = "sched_finish_pad_groups";
2292 break;
2294 case sched_finish_none:
2295 nop_str = "sched_finish_none";
2296 break;
2298 default:
2299 nop_str = nop_num;
2300 sprintf (nop_num, "%d", (int)rs6000_sched_insert_nops);
2301 break;
2304 fprintf (stderr, DEBUG_FMT_S, "sched_insert_nops", nop_str);
2306 switch (rs6000_sdata)
2308 default:
2309 case SDATA_NONE:
2310 break;
2312 case SDATA_DATA:
2313 fprintf (stderr, DEBUG_FMT_S, "sdata", "data");
2314 break;
2316 case SDATA_SYSV:
2317 fprintf (stderr, DEBUG_FMT_S, "sdata", "sysv");
2318 break;
2320 case SDATA_EABI:
2321 fprintf (stderr, DEBUG_FMT_S, "sdata", "eabi");
2322 break;
2326 switch (rs6000_traceback)
2328 case traceback_default: trace_str = "default"; break;
2329 case traceback_none: trace_str = "none"; break;
2330 case traceback_part: trace_str = "part"; break;
2331 case traceback_full: trace_str = "full"; break;
2332 default: trace_str = "unknown"; break;
2335 fprintf (stderr, DEBUG_FMT_S, "traceback", trace_str);
2337 switch (rs6000_current_cmodel)
2339 case CMODEL_SMALL: cmodel_str = "small"; break;
2340 case CMODEL_MEDIUM: cmodel_str = "medium"; break;
2341 case CMODEL_LARGE: cmodel_str = "large"; break;
2342 default: cmodel_str = "unknown"; break;
2345 fprintf (stderr, DEBUG_FMT_S, "cmodel", cmodel_str);
2347 switch (rs6000_current_abi)
2349 case ABI_NONE: abi_str = "none"; break;
2350 case ABI_AIX: abi_str = "aix"; break;
2351 case ABI_ELFv2: abi_str = "ELFv2"; break;
2352 case ABI_V4: abi_str = "V4"; break;
2353 case ABI_DARWIN: abi_str = "darwin"; break;
2354 default: abi_str = "unknown"; break;
2357 fprintf (stderr, DEBUG_FMT_S, "abi", abi_str);
2359 if (rs6000_altivec_abi)
2360 fprintf (stderr, DEBUG_FMT_S, "altivec_abi", "true");
2362 if (rs6000_spe_abi)
2363 fprintf (stderr, DEBUG_FMT_S, "spe_abi", "true");
2365 if (rs6000_darwin64_abi)
2366 fprintf (stderr, DEBUG_FMT_S, "darwin64_abi", "true");
2368 if (rs6000_float_gprs)
2369 fprintf (stderr, DEBUG_FMT_S, "float_gprs", "true");
2371 fprintf (stderr, DEBUG_FMT_S, "fprs",
2372 (TARGET_FPRS ? "true" : "false"));
2374 fprintf (stderr, DEBUG_FMT_S, "single_float",
2375 (TARGET_SINGLE_FLOAT ? "true" : "false"));
2377 fprintf (stderr, DEBUG_FMT_S, "double_float",
2378 (TARGET_DOUBLE_FLOAT ? "true" : "false"));
2380 fprintf (stderr, DEBUG_FMT_S, "soft_float",
2381 (TARGET_SOFT_FLOAT ? "true" : "false"));
2383 fprintf (stderr, DEBUG_FMT_S, "e500_single",
2384 (TARGET_E500_SINGLE ? "true" : "false"));
2386 fprintf (stderr, DEBUG_FMT_S, "e500_double",
2387 (TARGET_E500_DOUBLE ? "true" : "false"));
2389 if (TARGET_LINK_STACK)
2390 fprintf (stderr, DEBUG_FMT_S, "link_stack", "true");
2392 if (targetm.lra_p ())
2393 fprintf (stderr, DEBUG_FMT_S, "lra", "true");
2395 if (TARGET_P8_FUSION)
2396 fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
2397 (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
2399 fprintf (stderr, DEBUG_FMT_S, "plt-format",
2400 TARGET_SECURE_PLT ? "secure" : "bss");
2401 fprintf (stderr, DEBUG_FMT_S, "struct-return",
2402 aix_struct_return ? "aix" : "sysv");
2403 fprintf (stderr, DEBUG_FMT_S, "always_hint", tf[!!rs6000_always_hint]);
2404 fprintf (stderr, DEBUG_FMT_S, "sched_groups", tf[!!rs6000_sched_groups]);
2405 fprintf (stderr, DEBUG_FMT_S, "align_branch",
2406 tf[!!rs6000_align_branch_targets]);
2407 fprintf (stderr, DEBUG_FMT_D, "tls_size", rs6000_tls_size);
2408 fprintf (stderr, DEBUG_FMT_D, "long_double_size",
2409 rs6000_long_double_type_size);
2410 fprintf (stderr, DEBUG_FMT_D, "sched_restricted_insns_priority",
2411 (int)rs6000_sched_restricted_insns_priority);
2412 fprintf (stderr, DEBUG_FMT_D, "Number of standard builtins",
2413 (int)END_BUILTINS);
2414 fprintf (stderr, DEBUG_FMT_D, "Number of rs6000 builtins",
2415 (int)RS6000_BUILTIN_COUNT);
2417 if (TARGET_VSX)
2418 fprintf (stderr, DEBUG_FMT_D, "VSX easy 64-bit scalar element",
2419 (int)VECTOR_ELEMENT_SCALAR_64BIT);
2423 /* Update the addr mask bits in reg_addr to help secondary reload and go if
2424 legitimate address support to figure out the appropriate addressing to
2425 use. */
2427 static void
2428 rs6000_setup_reg_addr_masks (void)
2430 ssize_t rc, reg, m, nregs;
2431 addr_mask_type any_addr_mask, addr_mask;
2433 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2435 machine_mode m2 = (machine_mode)m;
2437 /* SDmode is special in that we want to access it only via REG+REG
2438 addressing on power7 and above, since we want to use the LFIWZX and
2439 STFIWZX instructions to load it. */
2440 bool indexed_only_p = (m == SDmode && TARGET_NO_SDMODE_STACK);
2442 any_addr_mask = 0;
2443 for (rc = FIRST_RELOAD_REG_CLASS; rc <= LAST_RELOAD_REG_CLASS; rc++)
2445 addr_mask = 0;
2446 reg = reload_reg_map[rc].reg;
2448 /* Can mode values go in the GPR/FPR/Altivec registers? */
2449 if (reg >= 0 && rs6000_hard_regno_mode_ok_p[m][reg])
2451 nregs = rs6000_hard_regno_nregs[m][reg];
2452 addr_mask |= RELOAD_REG_VALID;
2454 /* Indicate if the mode takes more than 1 physical register. If
2455 it takes a single register, indicate it can do REG+REG
2456 addressing. */
2457 if (nregs > 1 || m == BLKmode)
2458 addr_mask |= RELOAD_REG_MULTIPLE;
2459 else
2460 addr_mask |= RELOAD_REG_INDEXED;
2462 /* Figure out if we can do PRE_INC, PRE_DEC, or PRE_MODIFY
2463 addressing. Restrict addressing on SPE for 64-bit types
2464 because of the SUBREG hackery used to address 64-bit floats in
2465 '32-bit' GPRs. */
2467 if (TARGET_UPDATE
2468 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR)
2469 && GET_MODE_SIZE (m2) <= 8
2470 && !VECTOR_MODE_P (m2)
2471 && !COMPLEX_MODE_P (m2)
2472 && !indexed_only_p
2473 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (m2) == 8))
2475 addr_mask |= RELOAD_REG_PRE_INCDEC;
2477 /* PRE_MODIFY is more restricted than PRE_INC/PRE_DEC in that
2478 we don't allow PRE_MODIFY for some multi-register
2479 operations. */
2480 switch (m)
2482 default:
2483 addr_mask |= RELOAD_REG_PRE_MODIFY;
2484 break;
2486 case DImode:
2487 if (TARGET_POWERPC64)
2488 addr_mask |= RELOAD_REG_PRE_MODIFY;
2489 break;
2491 case DFmode:
2492 case DDmode:
2493 if (TARGET_DF_INSN)
2494 addr_mask |= RELOAD_REG_PRE_MODIFY;
2495 break;
2500 /* GPR and FPR registers can do REG+OFFSET addressing, except
2501 possibly for SDmode. */
2502 if ((addr_mask != 0) && !indexed_only_p
2503 && (rc == RELOAD_REG_GPR || rc == RELOAD_REG_FPR))
2504 addr_mask |= RELOAD_REG_OFFSET;
2506 /* VMX registers can do (REG & -16) and ((REG+REG) & -16)
2507 addressing on 128-bit types. */
2508 if (rc == RELOAD_REG_VMX && GET_MODE_SIZE (m2) == 16)
2509 addr_mask |= RELOAD_REG_AND_M16;
2511 reg_addr[m].addr_mask[rc] = addr_mask;
2512 any_addr_mask |= addr_mask;
2515 reg_addr[m].addr_mask[RELOAD_REG_ANY] = any_addr_mask;
2520 /* Initialize the various global tables that are based on register size. */
2521 static void
2522 rs6000_init_hard_regno_mode_ok (bool global_init_p)
2524 ssize_t r, m, c;
2525 int align64;
2526 int align32;
2528 /* Precalculate REGNO_REG_CLASS. */
2529 rs6000_regno_regclass[0] = GENERAL_REGS;
2530 for (r = 1; r < 32; ++r)
2531 rs6000_regno_regclass[r] = BASE_REGS;
2533 for (r = 32; r < 64; ++r)
2534 rs6000_regno_regclass[r] = FLOAT_REGS;
2536 for (r = 64; r < FIRST_PSEUDO_REGISTER; ++r)
2537 rs6000_regno_regclass[r] = NO_REGS;
2539 for (r = FIRST_ALTIVEC_REGNO; r <= LAST_ALTIVEC_REGNO; ++r)
2540 rs6000_regno_regclass[r] = ALTIVEC_REGS;
2542 rs6000_regno_regclass[CR0_REGNO] = CR0_REGS;
2543 for (r = CR1_REGNO; r <= CR7_REGNO; ++r)
2544 rs6000_regno_regclass[r] = CR_REGS;
2546 rs6000_regno_regclass[LR_REGNO] = LINK_REGS;
2547 rs6000_regno_regclass[CTR_REGNO] = CTR_REGS;
2548 rs6000_regno_regclass[CA_REGNO] = NO_REGS;
2549 rs6000_regno_regclass[VRSAVE_REGNO] = VRSAVE_REGS;
2550 rs6000_regno_regclass[VSCR_REGNO] = VRSAVE_REGS;
2551 rs6000_regno_regclass[SPE_ACC_REGNO] = SPE_ACC_REGS;
2552 rs6000_regno_regclass[SPEFSCR_REGNO] = SPEFSCR_REGS;
2553 rs6000_regno_regclass[TFHAR_REGNO] = SPR_REGS;
2554 rs6000_regno_regclass[TFIAR_REGNO] = SPR_REGS;
2555 rs6000_regno_regclass[TEXASR_REGNO] = SPR_REGS;
2556 rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
2557 rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
2559 /* Precalculate register class to simpler reload register class. We don't
2560 need all of the register classes that are combinations of different
2561 classes, just the simple ones that have constraint letters. */
2562 for (c = 0; c < N_REG_CLASSES; c++)
2563 reg_class_to_reg_type[c] = NO_REG_TYPE;
2565 reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
2566 reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
2567 reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
2568 reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
2569 reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
2570 reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
2571 reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
2572 reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
2573 reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
2574 reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
2575 reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
2576 reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
2578 if (TARGET_VSX)
2580 reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
2581 reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
2583 else
2585 reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
2586 reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
2589 /* Precalculate the valid memory formats as well as the vector information,
2590 this must be set up before the rs6000_hard_regno_nregs_internal calls
2591 below. */
2592 gcc_assert ((int)VECTOR_NONE == 0);
2593 memset ((void *) &rs6000_vector_unit[0], '\0', sizeof (rs6000_vector_unit));
2594 memset ((void *) &rs6000_vector_mem[0], '\0', sizeof (rs6000_vector_unit));
2596 gcc_assert ((int)CODE_FOR_nothing == 0);
2597 memset ((void *) &reg_addr[0], '\0', sizeof (reg_addr));
2599 gcc_assert ((int)NO_REGS == 0);
2600 memset ((void *) &rs6000_constraints[0], '\0', sizeof (rs6000_constraints));
2602 /* The VSX hardware allows native alignment for vectors, but control whether the compiler
2603 believes it can use native alignment or still uses 128-bit alignment. */
2604 if (TARGET_VSX && !TARGET_VSX_ALIGN_128)
2606 align64 = 64;
2607 align32 = 32;
2609 else
2611 align64 = 128;
2612 align32 = 128;
2615 /* V2DF mode, VSX only. */
2616 if (TARGET_VSX)
2618 rs6000_vector_unit[V2DFmode] = VECTOR_VSX;
2619 rs6000_vector_mem[V2DFmode] = VECTOR_VSX;
2620 rs6000_vector_align[V2DFmode] = align64;
2623 /* V4SF mode, either VSX or Altivec. */
2624 if (TARGET_VSX)
2626 rs6000_vector_unit[V4SFmode] = VECTOR_VSX;
2627 rs6000_vector_mem[V4SFmode] = VECTOR_VSX;
2628 rs6000_vector_align[V4SFmode] = align32;
2630 else if (TARGET_ALTIVEC)
2632 rs6000_vector_unit[V4SFmode] = VECTOR_ALTIVEC;
2633 rs6000_vector_mem[V4SFmode] = VECTOR_ALTIVEC;
2634 rs6000_vector_align[V4SFmode] = align32;
2637 /* V16QImode, V8HImode, V4SImode are Altivec only, but possibly do VSX loads
2638 and stores. */
2639 if (TARGET_ALTIVEC)
2641 rs6000_vector_unit[V4SImode] = VECTOR_ALTIVEC;
2642 rs6000_vector_unit[V8HImode] = VECTOR_ALTIVEC;
2643 rs6000_vector_unit[V16QImode] = VECTOR_ALTIVEC;
2644 rs6000_vector_align[V4SImode] = align32;
2645 rs6000_vector_align[V8HImode] = align32;
2646 rs6000_vector_align[V16QImode] = align32;
2648 if (TARGET_VSX)
2650 rs6000_vector_mem[V4SImode] = VECTOR_VSX;
2651 rs6000_vector_mem[V8HImode] = VECTOR_VSX;
2652 rs6000_vector_mem[V16QImode] = VECTOR_VSX;
2654 else
2656 rs6000_vector_mem[V4SImode] = VECTOR_ALTIVEC;
2657 rs6000_vector_mem[V8HImode] = VECTOR_ALTIVEC;
2658 rs6000_vector_mem[V16QImode] = VECTOR_ALTIVEC;
2662 /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
2663 do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
2664 if (TARGET_VSX)
2666 rs6000_vector_mem[V2DImode] = VECTOR_VSX;
2667 rs6000_vector_unit[V2DImode]
2668 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2669 rs6000_vector_align[V2DImode] = align64;
2671 rs6000_vector_mem[V1TImode] = VECTOR_VSX;
2672 rs6000_vector_unit[V1TImode]
2673 = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
2674 rs6000_vector_align[V1TImode] = 128;
2677 /* DFmode, see if we want to use the VSX unit. Memory is handled
2678 differently, so don't set rs6000_vector_mem. */
2679 if (TARGET_VSX && TARGET_VSX_SCALAR_DOUBLE)
2681 rs6000_vector_unit[DFmode] = VECTOR_VSX;
2682 rs6000_vector_align[DFmode] = 64;
2685 /* SFmode, see if we want to use the VSX unit. */
2686 if (TARGET_P8_VECTOR && TARGET_VSX_SCALAR_FLOAT)
2688 rs6000_vector_unit[SFmode] = VECTOR_VSX;
2689 rs6000_vector_align[SFmode] = 32;
2692 /* Allow TImode in VSX register and set the VSX memory macros. */
2693 if (TARGET_VSX && TARGET_VSX_TIMODE)
2695 rs6000_vector_mem[TImode] = VECTOR_VSX;
2696 rs6000_vector_align[TImode] = align64;
2699 /* TODO add SPE and paired floating point vector support. */
2701 /* Register class constraints for the constraints that depend on compile
2702 switches. When the VSX code was added, different constraints were added
2703 based on the type (DFmode, V2DFmode, V4SFmode). For the vector types, all
2704 of the VSX registers are used. The register classes for scalar floating
2705 point types is set, based on whether we allow that type into the upper
2706 (Altivec) registers. GCC has register classes to target the Altivec
2707 registers for load/store operations, to select using a VSX memory
2708 operation instead of the traditional floating point operation. The
2709 constraints are:
2711 d - Register class to use with traditional DFmode instructions.
2712 f - Register class to use with traditional SFmode instructions.
2713 v - Altivec register.
2714 wa - Any VSX register.
2715 wc - Reserved to represent individual CR bits (used in LLVM).
2716 wd - Preferred register class for V2DFmode.
2717 wf - Preferred register class for V4SFmode.
2718 wg - Float register for power6x move insns.
2719 wh - FP register for direct move instructions.
2720 wi - FP or VSX register to hold 64-bit integers for VSX insns.
2721 wj - FP or VSX register to hold 64-bit integers for direct moves.
2722 wk - FP or VSX register to hold 64-bit doubles for direct moves.
2723 wl - Float register if we can do 32-bit signed int loads.
2724 wm - VSX register for ISA 2.07 direct move operations.
2725 wn - always NO_REGS.
2726 wr - GPR if 64-bit mode is permitted.
2727 ws - Register class to do ISA 2.06 DF operations.
2728 wt - VSX register for TImode in VSX registers.
2729 wu - Altivec register for ISA 2.07 VSX SF/SI load/stores.
2730 wv - Altivec register for ISA 2.06 VSX DF/DI load/stores.
2731 ww - Register class to do SF conversions in with VSX operations.
2732 wx - Float register if we can do 32-bit int stores.
2733 wy - Register class to do ISA 2.07 SF operations.
2734 wz - Float register if we can do 32-bit unsigned int loads. */
2736 if (TARGET_HARD_FLOAT && TARGET_FPRS)
2737 rs6000_constraints[RS6000_CONSTRAINT_f] = FLOAT_REGS; /* SFmode */
2739 if (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
2740 rs6000_constraints[RS6000_CONSTRAINT_d] = FLOAT_REGS; /* DFmode */
2742 if (TARGET_VSX)
2744 rs6000_constraints[RS6000_CONSTRAINT_wa] = VSX_REGS;
2745 rs6000_constraints[RS6000_CONSTRAINT_wd] = VSX_REGS; /* V2DFmode */
2746 rs6000_constraints[RS6000_CONSTRAINT_wf] = VSX_REGS; /* V4SFmode */
2747 rs6000_constraints[RS6000_CONSTRAINT_wi] = FLOAT_REGS; /* DImode */
2749 if (TARGET_VSX_TIMODE)
2750 rs6000_constraints[RS6000_CONSTRAINT_wt] = VSX_REGS; /* TImode */
2752 if (TARGET_UPPER_REGS_DF) /* DFmode */
2754 rs6000_constraints[RS6000_CONSTRAINT_ws] = VSX_REGS;
2755 rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
2757 else
2758 rs6000_constraints[RS6000_CONSTRAINT_ws] = FLOAT_REGS;
2761 /* Add conditional constraints based on various options, to allow us to
2762 collapse multiple insn patterns. */
2763 if (TARGET_ALTIVEC)
2764 rs6000_constraints[RS6000_CONSTRAINT_v] = ALTIVEC_REGS;
2766 if (TARGET_MFPGPR) /* DFmode */
2767 rs6000_constraints[RS6000_CONSTRAINT_wg] = FLOAT_REGS;
2769 if (TARGET_LFIWAX)
2770 rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS; /* DImode */
2772 if (TARGET_DIRECT_MOVE)
2774 rs6000_constraints[RS6000_CONSTRAINT_wh] = FLOAT_REGS;
2775 rs6000_constraints[RS6000_CONSTRAINT_wj] /* DImode */
2776 = rs6000_constraints[RS6000_CONSTRAINT_wi];
2777 rs6000_constraints[RS6000_CONSTRAINT_wk] /* DFmode */
2778 = rs6000_constraints[RS6000_CONSTRAINT_ws];
2779 rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
2782 if (TARGET_POWERPC64)
2783 rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
2785 if (TARGET_P8_VECTOR && TARGET_UPPER_REGS_SF) /* SFmode */
2787 rs6000_constraints[RS6000_CONSTRAINT_wu] = ALTIVEC_REGS;
2788 rs6000_constraints[RS6000_CONSTRAINT_wy] = VSX_REGS;
2789 rs6000_constraints[RS6000_CONSTRAINT_ww] = VSX_REGS;
2791 else if (TARGET_P8_VECTOR)
2793 rs6000_constraints[RS6000_CONSTRAINT_wy] = FLOAT_REGS;
2794 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2796 else if (TARGET_VSX)
2797 rs6000_constraints[RS6000_CONSTRAINT_ww] = FLOAT_REGS;
2799 if (TARGET_STFIWX)
2800 rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS; /* DImode */
2802 if (TARGET_LFIWZX)
2803 rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS; /* DImode */
2805 /* Set up the reload helper and direct move functions. */
2806 if (TARGET_VSX || TARGET_ALTIVEC)
2808 if (TARGET_64BIT)
2810 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_di_store;
2811 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_di_load;
2812 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_di_store;
2813 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_di_load;
2814 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_di_store;
2815 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_di_load;
2816 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_di_store;
2817 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_di_load;
2818 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_di_store;
2819 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_di_load;
2820 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_di_store;
2821 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_di_load;
2822 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_di_store;
2823 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_di_load;
2824 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_di_store;
2825 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_di_load;
2826 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_di_store;
2827 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_di_load;
2828 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_di_store;
2829 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_di_load;
2830 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_di_store;
2831 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_di_load;
2833 if (TARGET_VSX_TIMODE)
2835 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_di_store;
2836 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_di_load;
2839 if (TARGET_DIRECT_MOVE)
2841 reg_addr[TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxti;
2842 reg_addr[V1TImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv1ti;
2843 reg_addr[V2DFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2df;
2844 reg_addr[V2DImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv2di;
2845 reg_addr[V4SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4sf;
2846 reg_addr[V4SImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv4si;
2847 reg_addr[V8HImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv8hi;
2848 reg_addr[V16QImode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxv16qi;
2849 reg_addr[SFmode].reload_gpr_vsx = CODE_FOR_reload_gpr_from_vsxsf;
2851 reg_addr[TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprti;
2852 reg_addr[V1TImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv1ti;
2853 reg_addr[V2DFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2df;
2854 reg_addr[V2DImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv2di;
2855 reg_addr[V4SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4sf;
2856 reg_addr[V4SImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv4si;
2857 reg_addr[V8HImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv8hi;
2858 reg_addr[V16QImode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprv16qi;
2859 reg_addr[SFmode].reload_vsx_gpr = CODE_FOR_reload_vsx_from_gprsf;
2862 else
2864 reg_addr[V16QImode].reload_store = CODE_FOR_reload_v16qi_si_store;
2865 reg_addr[V16QImode].reload_load = CODE_FOR_reload_v16qi_si_load;
2866 reg_addr[V8HImode].reload_store = CODE_FOR_reload_v8hi_si_store;
2867 reg_addr[V8HImode].reload_load = CODE_FOR_reload_v8hi_si_load;
2868 reg_addr[V4SImode].reload_store = CODE_FOR_reload_v4si_si_store;
2869 reg_addr[V4SImode].reload_load = CODE_FOR_reload_v4si_si_load;
2870 reg_addr[V2DImode].reload_store = CODE_FOR_reload_v2di_si_store;
2871 reg_addr[V2DImode].reload_load = CODE_FOR_reload_v2di_si_load;
2872 reg_addr[V1TImode].reload_store = CODE_FOR_reload_v1ti_si_store;
2873 reg_addr[V1TImode].reload_load = CODE_FOR_reload_v1ti_si_load;
2874 reg_addr[V4SFmode].reload_store = CODE_FOR_reload_v4sf_si_store;
2875 reg_addr[V4SFmode].reload_load = CODE_FOR_reload_v4sf_si_load;
2876 reg_addr[V2DFmode].reload_store = CODE_FOR_reload_v2df_si_store;
2877 reg_addr[V2DFmode].reload_load = CODE_FOR_reload_v2df_si_load;
2878 reg_addr[DFmode].reload_store = CODE_FOR_reload_df_si_store;
2879 reg_addr[DFmode].reload_load = CODE_FOR_reload_df_si_load;
2880 reg_addr[DDmode].reload_store = CODE_FOR_reload_dd_si_store;
2881 reg_addr[DDmode].reload_load = CODE_FOR_reload_dd_si_load;
2882 reg_addr[SFmode].reload_store = CODE_FOR_reload_sf_si_store;
2883 reg_addr[SFmode].reload_load = CODE_FOR_reload_sf_si_load;
2884 reg_addr[SDmode].reload_store = CODE_FOR_reload_sd_si_store;
2885 reg_addr[SDmode].reload_load = CODE_FOR_reload_sd_si_load;
2887 if (TARGET_VSX_TIMODE)
2889 reg_addr[TImode].reload_store = CODE_FOR_reload_ti_si_store;
2890 reg_addr[TImode].reload_load = CODE_FOR_reload_ti_si_load;
2893 if (TARGET_DIRECT_MOVE)
2895 reg_addr[DImode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdi;
2896 reg_addr[DDmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdd;
2897 reg_addr[DFmode].reload_fpr_gpr = CODE_FOR_reload_fpr_from_gprdf;
2901 if (TARGET_UPPER_REGS_DF)
2902 reg_addr[DFmode].scalar_in_vmx_p = true;
2904 if (TARGET_UPPER_REGS_SF)
2905 reg_addr[SFmode].scalar_in_vmx_p = true;
2908 /* Precalculate HARD_REGNO_NREGS. */
2909 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2910 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2911 rs6000_hard_regno_nregs[m][r]
2912 = rs6000_hard_regno_nregs_internal (r, (machine_mode)m);
2914 /* Precalculate HARD_REGNO_MODE_OK. */
2915 for (r = 0; r < FIRST_PSEUDO_REGISTER; ++r)
2916 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2917 if (rs6000_hard_regno_mode_ok (r, (machine_mode)m))
2918 rs6000_hard_regno_mode_ok_p[m][r] = true;
2920 /* Precalculate CLASS_MAX_NREGS sizes. */
2921 for (c = 0; c < LIM_REG_CLASSES; ++c)
2923 int reg_size;
2925 if (TARGET_VSX && VSX_REG_CLASS_P (c))
2926 reg_size = UNITS_PER_VSX_WORD;
2928 else if (c == ALTIVEC_REGS)
2929 reg_size = UNITS_PER_ALTIVEC_WORD;
2931 else if (c == FLOAT_REGS)
2932 reg_size = UNITS_PER_FP_WORD;
2934 else
2935 reg_size = UNITS_PER_WORD;
2937 for (m = 0; m < NUM_MACHINE_MODES; ++m)
2939 machine_mode m2 = (machine_mode)m;
2940 int reg_size2 = reg_size;
2942 /* TFmode/TDmode always takes 2 registers, even in VSX. */
2943 if (TARGET_VSX && VSX_REG_CLASS_P (c)
2944 && (m == TDmode || m == TFmode))
2945 reg_size2 = UNITS_PER_FP_WORD;
2947 rs6000_class_max_nregs[m][c]
2948 = (GET_MODE_SIZE (m2) + reg_size2 - 1) / reg_size2;
2952 if (TARGET_E500_DOUBLE)
2953 rs6000_class_max_nregs[DFmode][GENERAL_REGS] = 1;
2955 /* Calculate which modes to automatically generate code to use a the
2956 reciprocal divide and square root instructions. In the future, possibly
2957 automatically generate the instructions even if the user did not specify
2958 -mrecip. The older machines double precision reciprocal sqrt estimate is
2959 not accurate enough. */
2960 memset (rs6000_recip_bits, 0, sizeof (rs6000_recip_bits));
2961 if (TARGET_FRES)
2962 rs6000_recip_bits[SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2963 if (TARGET_FRE)
2964 rs6000_recip_bits[DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2965 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2966 rs6000_recip_bits[V4SFmode] = RS6000_RECIP_MASK_HAVE_RE;
2967 if (VECTOR_UNIT_VSX_P (V2DFmode))
2968 rs6000_recip_bits[V2DFmode] = RS6000_RECIP_MASK_HAVE_RE;
2970 if (TARGET_FRSQRTES)
2971 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2972 if (TARGET_FRSQRTE)
2973 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2974 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode))
2975 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2976 if (VECTOR_UNIT_VSX_P (V2DFmode))
2977 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_HAVE_RSQRTE;
2979 if (rs6000_recip_control)
2981 if (!flag_finite_math_only)
2982 warning (0, "-mrecip requires -ffinite-math or -ffast-math");
2983 if (flag_trapping_math)
2984 warning (0, "-mrecip requires -fno-trapping-math or -ffast-math");
2985 if (!flag_reciprocal_math)
2986 warning (0, "-mrecip requires -freciprocal-math or -ffast-math");
2987 if (flag_finite_math_only && !flag_trapping_math && flag_reciprocal_math)
2989 if (RS6000_RECIP_HAVE_RE_P (SFmode)
2990 && (rs6000_recip_control & RECIP_SF_DIV) != 0)
2991 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2993 if (RS6000_RECIP_HAVE_RE_P (DFmode)
2994 && (rs6000_recip_control & RECIP_DF_DIV) != 0)
2995 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
2997 if (RS6000_RECIP_HAVE_RE_P (V4SFmode)
2998 && (rs6000_recip_control & RECIP_V4SF_DIV) != 0)
2999 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3001 if (RS6000_RECIP_HAVE_RE_P (V2DFmode)
3002 && (rs6000_recip_control & RECIP_V2DF_DIV) != 0)
3003 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RE;
3005 if (RS6000_RECIP_HAVE_RSQRTE_P (SFmode)
3006 && (rs6000_recip_control & RECIP_SF_RSQRT) != 0)
3007 rs6000_recip_bits[SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3009 if (RS6000_RECIP_HAVE_RSQRTE_P (DFmode)
3010 && (rs6000_recip_control & RECIP_DF_RSQRT) != 0)
3011 rs6000_recip_bits[DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3013 if (RS6000_RECIP_HAVE_RSQRTE_P (V4SFmode)
3014 && (rs6000_recip_control & RECIP_V4SF_RSQRT) != 0)
3015 rs6000_recip_bits[V4SFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3017 if (RS6000_RECIP_HAVE_RSQRTE_P (V2DFmode)
3018 && (rs6000_recip_control & RECIP_V2DF_RSQRT) != 0)
3019 rs6000_recip_bits[V2DFmode] |= RS6000_RECIP_MASK_AUTO_RSQRTE;
3023 /* Update the addr mask bits in reg_addr to help secondary reload and go if
3024 legitimate address support to figure out the appropriate addressing to
3025 use. */
3026 rs6000_setup_reg_addr_masks ();
3028 if (global_init_p || TARGET_DEBUG_TARGET)
3030 if (TARGET_DEBUG_REG)
3031 rs6000_debug_reg_global ();
3033 if (TARGET_DEBUG_COST || TARGET_DEBUG_REG)
3034 fprintf (stderr,
3035 "SImode variable mult cost = %d\n"
3036 "SImode constant mult cost = %d\n"
3037 "SImode short constant mult cost = %d\n"
3038 "DImode multipliciation cost = %d\n"
3039 "SImode division cost = %d\n"
3040 "DImode division cost = %d\n"
3041 "Simple fp operation cost = %d\n"
3042 "DFmode multiplication cost = %d\n"
3043 "SFmode division cost = %d\n"
3044 "DFmode division cost = %d\n"
3045 "cache line size = %d\n"
3046 "l1 cache size = %d\n"
3047 "l2 cache size = %d\n"
3048 "simultaneous prefetches = %d\n"
3049 "\n",
3050 rs6000_cost->mulsi,
3051 rs6000_cost->mulsi_const,
3052 rs6000_cost->mulsi_const9,
3053 rs6000_cost->muldi,
3054 rs6000_cost->divsi,
3055 rs6000_cost->divdi,
3056 rs6000_cost->fp,
3057 rs6000_cost->dmul,
3058 rs6000_cost->sdiv,
3059 rs6000_cost->ddiv,
3060 rs6000_cost->cache_line_size,
3061 rs6000_cost->l1_cache_size,
3062 rs6000_cost->l2_cache_size,
3063 rs6000_cost->simultaneous_prefetches);
3067 #if TARGET_MACHO
3068 /* The Darwin version of SUBTARGET_OVERRIDE_OPTIONS. */
3070 static void
3071 darwin_rs6000_override_options (void)
3073 /* The Darwin ABI always includes AltiVec, can't be (validly) turned
3074 off. */
3075 rs6000_altivec_abi = 1;
3076 TARGET_ALTIVEC_VRSAVE = 1;
3077 rs6000_current_abi = ABI_DARWIN;
3079 if (DEFAULT_ABI == ABI_DARWIN
3080 && TARGET_64BIT)
3081 darwin_one_byte_bool = 1;
3083 if (TARGET_64BIT && ! TARGET_POWERPC64)
3085 rs6000_isa_flags |= OPTION_MASK_POWERPC64;
3086 warning (0, "-m64 requires PowerPC64 architecture, enabling");
3088 if (flag_mkernel)
3090 rs6000_default_long_calls = 1;
3091 rs6000_isa_flags |= OPTION_MASK_SOFT_FLOAT;
3094 /* Make -m64 imply -maltivec. Darwin's 64-bit ABI includes
3095 Altivec. */
3096 if (!flag_mkernel && !flag_apple_kext
3097 && TARGET_64BIT
3098 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC))
3099 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3101 /* Unless the user (not the configurer) has explicitly overridden
3102 it with -mcpu=G3 or -mno-altivec, then 10.5+ targets default to
3103 G4 unless targeting the kernel. */
3104 if (!flag_mkernel
3105 && !flag_apple_kext
3106 && strverscmp (darwin_macosx_version_min, "10.5") >= 0
3107 && ! (rs6000_isa_flags_explicit & OPTION_MASK_ALTIVEC)
3108 && ! global_options_set.x_rs6000_cpu_index)
3110 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3113 #endif
3115 /* If not otherwise specified by a target, make 'long double' equivalent to
3116 'double'. */
3118 #ifndef RS6000_DEFAULT_LONG_DOUBLE_SIZE
3119 #define RS6000_DEFAULT_LONG_DOUBLE_SIZE 64
3120 #endif
3122 /* Return the builtin mask of the various options used that could affect which
3123 builtins were used. In the past we used target_flags, but we've run out of
3124 bits, and some options like SPE and PAIRED are no longer in
3125 target_flags. */
3127 HOST_WIDE_INT
3128 rs6000_builtin_mask_calculate (void)
3130 return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
3131 | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
3132 | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
3133 | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
3134 | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
3135 | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
3136 | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
3137 | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
3138 | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
3139 | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
3140 | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
3141 | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0)
3142 | ((TARGET_HTM) ? RS6000_BTM_HTM : 0)
3143 | ((TARGET_DFP) ? RS6000_BTM_DFP : 0)
3144 | ((TARGET_HARD_FLOAT) ? RS6000_BTM_HARD_FLOAT : 0)
3145 | ((TARGET_LONG_DOUBLE_128) ? RS6000_BTM_LDBL128 : 0));
3148 /* Override command line options. Mostly we process the processor type and
3149 sometimes adjust other TARGET_ options. */
3151 static bool
3152 rs6000_option_override_internal (bool global_init_p)
3154 bool ret = true;
3155 bool have_cpu = false;
3157 /* The default cpu requested at configure time, if any. */
3158 const char *implicit_cpu = OPTION_TARGET_CPU_DEFAULT;
3160 HOST_WIDE_INT set_masks;
3161 int cpu_index;
3162 int tune_index;
3163 struct cl_target_option *main_target_opt
3164 = ((global_init_p || target_option_default_node == NULL)
3165 ? NULL : TREE_TARGET_OPTION (target_option_default_node));
3167 /* Remember the explicit arguments. */
3168 if (global_init_p)
3169 rs6000_isa_flags_explicit = global_options_set.x_rs6000_isa_flags;
3171 /* On 64-bit Darwin, power alignment is ABI-incompatible with some C
3172 library functions, so warn about it. The flag may be useful for
3173 performance studies from time to time though, so don't disable it
3174 entirely. */
3175 if (global_options_set.x_rs6000_alignment_flags
3176 && rs6000_alignment_flags == MASK_ALIGN_POWER
3177 && DEFAULT_ABI == ABI_DARWIN
3178 && TARGET_64BIT)
3179 warning (0, "-malign-power is not supported for 64-bit Darwin;"
3180 " it is incompatible with the installed C and C++ libraries");
3182 /* Numerous experiment shows that IRA based loop pressure
3183 calculation works better for RTL loop invariant motion on targets
3184 with enough (>= 32) registers. It is an expensive optimization.
3185 So it is on only for peak performance. */
3186 if (optimize >= 3 && global_init_p
3187 && !global_options_set.x_flag_ira_loop_pressure)
3188 flag_ira_loop_pressure = 1;
3190 /* Set the pointer size. */
3191 if (TARGET_64BIT)
3193 rs6000_pmode = (int)DImode;
3194 rs6000_pointer_size = 64;
3196 else
3198 rs6000_pmode = (int)SImode;
3199 rs6000_pointer_size = 32;
3202 /* Some OSs don't support saving the high part of 64-bit registers on context
3203 switch. Other OSs don't support saving Altivec registers. On those OSs,
3204 we don't touch the OPTION_MASK_POWERPC64 or OPTION_MASK_ALTIVEC settings;
3205 if the user wants either, the user must explicitly specify them and we
3206 won't interfere with the user's specification. */
3208 set_masks = POWERPC_MASKS;
3209 #ifdef OS_MISSING_POWERPC64
3210 if (OS_MISSING_POWERPC64)
3211 set_masks &= ~OPTION_MASK_POWERPC64;
3212 #endif
3213 #ifdef OS_MISSING_ALTIVEC
3214 if (OS_MISSING_ALTIVEC)
3215 set_masks &= ~(OPTION_MASK_ALTIVEC | OPTION_MASK_VSX);
3216 #endif
3218 /* Don't override by the processor default if given explicitly. */
3219 set_masks &= ~rs6000_isa_flags_explicit;
3221 /* Process the -mcpu=<xxx> and -mtune=<xxx> argument. If the user changed
3222 the cpu in a target attribute or pragma, but did not specify a tuning
3223 option, use the cpu for the tuning option rather than the option specified
3224 with -mtune on the command line. Process a '--with-cpu' configuration
3225 request as an implicit --cpu. */
3226 if (rs6000_cpu_index >= 0)
3228 cpu_index = rs6000_cpu_index;
3229 have_cpu = true;
3231 else if (main_target_opt != NULL && main_target_opt->x_rs6000_cpu_index >= 0)
3233 rs6000_cpu_index = cpu_index = main_target_opt->x_rs6000_cpu_index;
3234 have_cpu = true;
3236 else if (implicit_cpu)
3238 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (implicit_cpu);
3239 have_cpu = true;
3241 else
3243 const char *default_cpu = (TARGET_POWERPC64 ? "powerpc64" : "powerpc");
3244 rs6000_cpu_index = cpu_index = rs6000_cpu_name_lookup (default_cpu);
3245 have_cpu = false;
3248 gcc_assert (cpu_index >= 0);
3250 /* If we have a cpu, either through an explicit -mcpu=<xxx> or if the
3251 compiler was configured with --with-cpu=<xxx>, replace all of the ISA bits
3252 with those from the cpu, except for options that were explicitly set. If
3253 we don't have a cpu, do not override the target bits set in
3254 TARGET_DEFAULT. */
3255 if (have_cpu)
3257 rs6000_isa_flags &= ~set_masks;
3258 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3259 & set_masks);
3261 else
3262 rs6000_isa_flags |= (processor_target_table[cpu_index].target_enable
3263 & ~rs6000_isa_flags_explicit);
3265 /* If no -mcpu=<xxx>, inherit any default options that were cleared via
3266 POWERPC_MASKS. Originally, TARGET_DEFAULT was used to initialize
3267 target_flags via the TARGET_DEFAULT_TARGET_FLAGS hook. When we switched
3268 to using rs6000_isa_flags, we need to do the initialization here. */
3269 if (!have_cpu)
3270 rs6000_isa_flags |= (TARGET_DEFAULT & ~rs6000_isa_flags_explicit);
3272 if (rs6000_tune_index >= 0)
3273 tune_index = rs6000_tune_index;
3274 else if (have_cpu)
3275 rs6000_tune_index = tune_index = cpu_index;
3276 else
3278 size_t i;
3279 enum processor_type tune_proc
3280 = (TARGET_POWERPC64 ? PROCESSOR_DEFAULT64 : PROCESSOR_DEFAULT);
3282 tune_index = -1;
3283 for (i = 0; i < ARRAY_SIZE (processor_target_table); i++)
3284 if (processor_target_table[i].processor == tune_proc)
3286 rs6000_tune_index = tune_index = i;
3287 break;
3291 gcc_assert (tune_index >= 0);
3292 rs6000_cpu = processor_target_table[tune_index].processor;
3294 /* Pick defaults for SPE related control flags. Do this early to make sure
3295 that the TARGET_ macros are representative ASAP. */
3297 int spe_capable_cpu =
3298 (rs6000_cpu == PROCESSOR_PPC8540
3299 || rs6000_cpu == PROCESSOR_PPC8548);
3301 if (!global_options_set.x_rs6000_spe_abi)
3302 rs6000_spe_abi = spe_capable_cpu;
3304 if (!global_options_set.x_rs6000_spe)
3305 rs6000_spe = spe_capable_cpu;
3307 if (!global_options_set.x_rs6000_float_gprs)
3308 rs6000_float_gprs =
3309 (rs6000_cpu == PROCESSOR_PPC8540 ? 1
3310 : rs6000_cpu == PROCESSOR_PPC8548 ? 2
3311 : 0);
3314 if (global_options_set.x_rs6000_spe_abi
3315 && rs6000_spe_abi
3316 && !TARGET_SPE_ABI)
3317 error ("not configured for SPE ABI");
3319 if (global_options_set.x_rs6000_spe
3320 && rs6000_spe
3321 && !TARGET_SPE)
3322 error ("not configured for SPE instruction set");
3324 if (main_target_opt != NULL
3325 && ((main_target_opt->x_rs6000_spe_abi != rs6000_spe_abi)
3326 || (main_target_opt->x_rs6000_spe != rs6000_spe)
3327 || (main_target_opt->x_rs6000_float_gprs != rs6000_float_gprs)))
3328 error ("target attribute or pragma changes SPE ABI");
3330 if (rs6000_cpu == PROCESSOR_PPCE300C2 || rs6000_cpu == PROCESSOR_PPCE300C3
3331 || rs6000_cpu == PROCESSOR_PPCE500MC || rs6000_cpu == PROCESSOR_PPCE500MC64
3332 || rs6000_cpu == PROCESSOR_PPCE5500)
3334 if (TARGET_ALTIVEC)
3335 error ("AltiVec not supported in this target");
3336 if (TARGET_SPE)
3337 error ("SPE not supported in this target");
3339 if (rs6000_cpu == PROCESSOR_PPCE6500)
3341 if (TARGET_SPE)
3342 error ("SPE not supported in this target");
3345 /* Disable Cell microcode if we are optimizing for the Cell
3346 and not optimizing for size. */
3347 if (rs6000_gen_cell_microcode == -1)
3348 rs6000_gen_cell_microcode = !(rs6000_cpu == PROCESSOR_CELL
3349 && !optimize_size);
3351 /* If we are optimizing big endian systems for space and it's OK to
3352 use instructions that would be microcoded on the Cell, use the
3353 load/store multiple and string instructions. */
3354 if (BYTES_BIG_ENDIAN && optimize_size && rs6000_gen_cell_microcode)
3355 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & (OPTION_MASK_MULTIPLE
3356 | OPTION_MASK_STRING);
3358 /* Don't allow -mmultiple or -mstring on little endian systems
3359 unless the cpu is a 750, because the hardware doesn't support the
3360 instructions used in little endian mode, and causes an alignment
3361 trap. The 750 does not cause an alignment trap (except when the
3362 target is unaligned). */
3364 if (!BYTES_BIG_ENDIAN && rs6000_cpu != PROCESSOR_PPC750)
3366 if (TARGET_MULTIPLE)
3368 rs6000_isa_flags &= ~OPTION_MASK_MULTIPLE;
3369 if ((rs6000_isa_flags_explicit & OPTION_MASK_MULTIPLE) != 0)
3370 warning (0, "-mmultiple is not supported on little endian systems");
3373 if (TARGET_STRING)
3375 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3376 if ((rs6000_isa_flags_explicit & OPTION_MASK_STRING) != 0)
3377 warning (0, "-mstring is not supported on little endian systems");
3381 /* If little-endian, default to -mstrict-align on older processors.
3382 Testing for htm matches power8 and later. */
3383 if (!BYTES_BIG_ENDIAN
3384 && !(processor_target_table[tune_index].target_enable & OPTION_MASK_HTM))
3385 rs6000_isa_flags |= ~rs6000_isa_flags_explicit & OPTION_MASK_STRICT_ALIGN;
3387 /* -maltivec={le,be} implies -maltivec. */
3388 if (rs6000_altivec_element_order != 0)
3389 rs6000_isa_flags |= OPTION_MASK_ALTIVEC;
3391 /* Disallow -maltivec=le in big endian mode for now. This is not
3392 known to be useful for anyone. */
3393 if (BYTES_BIG_ENDIAN && rs6000_altivec_element_order == 1)
3395 warning (0, N_("-maltivec=le not allowed for big-endian targets"));
3396 rs6000_altivec_element_order = 0;
3399 /* Add some warnings for VSX. */
3400 if (TARGET_VSX)
3402 const char *msg = NULL;
3403 if (!TARGET_HARD_FLOAT || !TARGET_FPRS
3404 || !TARGET_SINGLE_FLOAT || !TARGET_DOUBLE_FLOAT)
3406 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3407 msg = N_("-mvsx requires hardware floating point");
3408 else
3410 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3411 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3414 else if (TARGET_PAIRED_FLOAT)
3415 msg = N_("-mvsx and -mpaired are incompatible");
3416 else if (TARGET_AVOID_XFORM > 0)
3417 msg = N_("-mvsx needs indexed addressing");
3418 else if (!TARGET_ALTIVEC && (rs6000_isa_flags_explicit
3419 & OPTION_MASK_ALTIVEC))
3421 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX)
3422 msg = N_("-mvsx and -mno-altivec are incompatible");
3423 else
3424 msg = N_("-mno-altivec disables vsx");
3427 if (msg)
3429 warning (0, msg);
3430 rs6000_isa_flags &= ~ OPTION_MASK_VSX;
3431 rs6000_isa_flags_explicit |= OPTION_MASK_VSX;
3435 /* If hard-float/altivec/vsx were explicitly turned off then don't allow
3436 the -mcpu setting to enable options that conflict. */
3437 if ((!TARGET_HARD_FLOAT || !TARGET_ALTIVEC || !TARGET_VSX)
3438 && (rs6000_isa_flags_explicit & (OPTION_MASK_SOFT_FLOAT
3439 | OPTION_MASK_ALTIVEC
3440 | OPTION_MASK_VSX)) != 0)
3441 rs6000_isa_flags &= ~((OPTION_MASK_P8_VECTOR | OPTION_MASK_CRYPTO
3442 | OPTION_MASK_DIRECT_MOVE)
3443 & ~rs6000_isa_flags_explicit);
3445 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3446 rs6000_print_isa_options (stderr, 0, "before defaults", rs6000_isa_flags);
3448 /* For the newer switches (vsx, dfp, etc.) set some of the older options,
3449 unless the user explicitly used the -mno-<option> to disable the code. */
3450 if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
3451 rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3452 else if (TARGET_VSX)
3453 rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3454 else if (TARGET_POPCNTD)
3455 rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3456 else if (TARGET_DFP)
3457 rs6000_isa_flags |= (ISA_2_5_MASKS_SERVER & ~rs6000_isa_flags_explicit);
3458 else if (TARGET_CMPB)
3459 rs6000_isa_flags |= (ISA_2_5_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
3460 else if (TARGET_FPRND)
3461 rs6000_isa_flags |= (ISA_2_4_MASKS & ~rs6000_isa_flags_explicit);
3462 else if (TARGET_POPCNTB)
3463 rs6000_isa_flags |= (ISA_2_2_MASKS & ~rs6000_isa_flags_explicit);
3464 else if (TARGET_ALTIVEC)
3465 rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
3467 if (TARGET_CRYPTO && !TARGET_ALTIVEC)
3469 if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
3470 error ("-mcrypto requires -maltivec");
3471 rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
3474 if (TARGET_DIRECT_MOVE && !TARGET_VSX)
3476 if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
3477 error ("-mdirect-move requires -mvsx");
3478 rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
3481 if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
3483 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3484 error ("-mpower8-vector requires -maltivec");
3485 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3488 if (TARGET_P8_VECTOR && !TARGET_VSX)
3490 if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
3491 error ("-mpower8-vector requires -mvsx");
3492 rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
3495 if (TARGET_VSX_TIMODE && !TARGET_VSX)
3497 if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
3498 error ("-mvsx-timode requires -mvsx");
3499 rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
3502 if (TARGET_DFP && !TARGET_HARD_FLOAT)
3504 if (rs6000_isa_flags_explicit & OPTION_MASK_DFP)
3505 error ("-mhard-dfp requires -mhard-float");
3506 rs6000_isa_flags &= ~OPTION_MASK_DFP;
3509 /* Allow an explicit -mupper-regs to set both -mupper-regs-df and
3510 -mupper-regs-sf, depending on the cpu, unless the user explicitly also set
3511 the individual option. */
3512 if (TARGET_UPPER_REGS > 0)
3514 if (TARGET_VSX
3515 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3517 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_DF;
3518 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3520 if (TARGET_P8_VECTOR
3521 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3523 rs6000_isa_flags |= OPTION_MASK_UPPER_REGS_SF;
3524 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3527 else if (TARGET_UPPER_REGS == 0)
3529 if (TARGET_VSX
3530 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF))
3532 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3533 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_DF;
3535 if (TARGET_P8_VECTOR
3536 && !(rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF))
3538 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3539 rs6000_isa_flags_explicit |= OPTION_MASK_UPPER_REGS_SF;
3543 if (TARGET_UPPER_REGS_DF && !TARGET_VSX)
3545 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_DF)
3546 error ("-mupper-regs-df requires -mvsx");
3547 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_DF;
3550 if (TARGET_UPPER_REGS_SF && !TARGET_P8_VECTOR)
3552 if (rs6000_isa_flags_explicit & OPTION_MASK_UPPER_REGS_SF)
3553 error ("-mupper-regs-sf requires -mpower8-vector");
3554 rs6000_isa_flags &= ~OPTION_MASK_UPPER_REGS_SF;
3557 /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
3558 silently turn off quad memory mode. */
3559 if ((TARGET_QUAD_MEMORY || TARGET_QUAD_MEMORY_ATOMIC) && !TARGET_POWERPC64)
3561 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3562 warning (0, N_("-mquad-memory requires 64-bit mode"));
3564 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) != 0)
3565 warning (0, N_("-mquad-memory-atomic requires 64-bit mode"));
3567 rs6000_isa_flags &= ~(OPTION_MASK_QUAD_MEMORY
3568 | OPTION_MASK_QUAD_MEMORY_ATOMIC);
3571 /* Non-atomic quad memory load/store are disabled for little endian, since
3572 the words are reversed, but atomic operations can still be done by
3573 swapping the words. */
3574 if (TARGET_QUAD_MEMORY && !WORDS_BIG_ENDIAN)
3576 if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
3577 warning (0, N_("-mquad-memory is not available in little endian mode"));
3579 rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
3582 /* Assume if the user asked for normal quad memory instructions, they want
3583 the atomic versions as well, unless they explicity told us not to use quad
3584 word atomic instructions. */
3585 if (TARGET_QUAD_MEMORY
3586 && !TARGET_QUAD_MEMORY_ATOMIC
3587 && ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY_ATOMIC) == 0))
3588 rs6000_isa_flags |= OPTION_MASK_QUAD_MEMORY_ATOMIC;
3590 /* Enable power8 fusion if we are tuning for power8, even if we aren't
3591 generating power8 instructions. */
3592 if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION))
3593 rs6000_isa_flags |= (processor_target_table[tune_index].target_enable
3594 & OPTION_MASK_P8_FUSION);
3596 /* Power8 does not fuse sign extended loads with the addis. If we are
3597 optimizing at high levels for speed, convert a sign extended load into a
3598 zero extending load, and an explicit sign extension. */
3599 if (TARGET_P8_FUSION
3600 && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN)
3601 && optimize_function_for_speed_p (cfun)
3602 && optimize >= 3)
3603 rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN;
3605 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3606 rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
3608 /* E500mc does "better" if we inline more aggressively. Respect the
3609 user's opinion, though. */
3610 if (rs6000_block_move_inline_limit == 0
3611 && (rs6000_cpu == PROCESSOR_PPCE500MC
3612 || rs6000_cpu == PROCESSOR_PPCE500MC64
3613 || rs6000_cpu == PROCESSOR_PPCE5500
3614 || rs6000_cpu == PROCESSOR_PPCE6500))
3615 rs6000_block_move_inline_limit = 128;
3617 /* store_one_arg depends on expand_block_move to handle at least the
3618 size of reg_parm_stack_space. */
3619 if (rs6000_block_move_inline_limit < (TARGET_POWERPC64 ? 64 : 32))
3620 rs6000_block_move_inline_limit = (TARGET_POWERPC64 ? 64 : 32);
3622 if (global_init_p)
3624 /* If the appropriate debug option is enabled, replace the target hooks
3625 with debug versions that call the real version and then prints
3626 debugging information. */
3627 if (TARGET_DEBUG_COST)
3629 targetm.rtx_costs = rs6000_debug_rtx_costs;
3630 targetm.address_cost = rs6000_debug_address_cost;
3631 targetm.sched.adjust_cost = rs6000_debug_adjust_cost;
3634 if (TARGET_DEBUG_ADDR)
3636 targetm.legitimate_address_p = rs6000_debug_legitimate_address_p;
3637 targetm.legitimize_address = rs6000_debug_legitimize_address;
3638 rs6000_secondary_reload_class_ptr
3639 = rs6000_debug_secondary_reload_class;
3640 rs6000_secondary_memory_needed_ptr
3641 = rs6000_debug_secondary_memory_needed;
3642 rs6000_cannot_change_mode_class_ptr
3643 = rs6000_debug_cannot_change_mode_class;
3644 rs6000_preferred_reload_class_ptr
3645 = rs6000_debug_preferred_reload_class;
3646 rs6000_legitimize_reload_address_ptr
3647 = rs6000_debug_legitimize_reload_address;
3648 rs6000_mode_dependent_address_ptr
3649 = rs6000_debug_mode_dependent_address;
3652 if (rs6000_veclibabi_name)
3654 if (strcmp (rs6000_veclibabi_name, "mass") == 0)
3655 rs6000_veclib_handler = rs6000_builtin_vectorized_libmass;
3656 else
3658 error ("unknown vectorization library ABI type (%s) for "
3659 "-mveclibabi= switch", rs6000_veclibabi_name);
3660 ret = false;
3665 if (!global_options_set.x_rs6000_long_double_type_size)
3667 if (main_target_opt != NULL
3668 && (main_target_opt->x_rs6000_long_double_type_size
3669 != RS6000_DEFAULT_LONG_DOUBLE_SIZE))
3670 error ("target attribute or pragma changes long double size");
3671 else
3672 rs6000_long_double_type_size = RS6000_DEFAULT_LONG_DOUBLE_SIZE;
3675 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
3676 if (!global_options_set.x_rs6000_ieeequad)
3677 rs6000_ieeequad = 1;
3678 #endif
3680 /* Disable VSX and Altivec silently if the user switched cpus to power7 in a
3681 target attribute or pragma which automatically enables both options,
3682 unless the altivec ABI was set. This is set by default for 64-bit, but
3683 not for 32-bit. */
3684 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3685 rs6000_isa_flags &= ~((OPTION_MASK_VSX | OPTION_MASK_ALTIVEC)
3686 & ~rs6000_isa_flags_explicit);
3688 /* Enable Altivec ABI for AIX -maltivec. */
3689 if (TARGET_XCOFF && (TARGET_ALTIVEC || TARGET_VSX))
3691 if (main_target_opt != NULL && !main_target_opt->x_rs6000_altivec_abi)
3692 error ("target attribute or pragma changes AltiVec ABI");
3693 else
3694 rs6000_altivec_abi = 1;
3697 /* The AltiVec ABI is the default for PowerPC-64 GNU/Linux. For
3698 PowerPC-32 GNU/Linux, -maltivec implies the AltiVec ABI. It can
3699 be explicitly overridden in either case. */
3700 if (TARGET_ELF)
3702 if (!global_options_set.x_rs6000_altivec_abi
3703 && (TARGET_64BIT || TARGET_ALTIVEC || TARGET_VSX))
3705 if (main_target_opt != NULL &&
3706 !main_target_opt->x_rs6000_altivec_abi)
3707 error ("target attribute or pragma changes AltiVec ABI");
3708 else
3709 rs6000_altivec_abi = 1;
3713 /* Set the Darwin64 ABI as default for 64-bit Darwin.
3714 So far, the only darwin64 targets are also MACH-O. */
3715 if (TARGET_MACHO
3716 && DEFAULT_ABI == ABI_DARWIN
3717 && TARGET_64BIT)
3719 if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
3720 error ("target attribute or pragma changes darwin64 ABI");
3721 else
3723 rs6000_darwin64_abi = 1;
3724 /* Default to natural alignment, for better performance. */
3725 rs6000_alignment_flags = MASK_ALIGN_NATURAL;
3729 /* Place FP constants in the constant pool instead of TOC
3730 if section anchors enabled. */
3731 if (flag_section_anchors
3732 && !global_options_set.x_TARGET_NO_FP_IN_TOC)
3733 TARGET_NO_FP_IN_TOC = 1;
3735 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3736 rs6000_print_isa_options (stderr, 0, "before subtarget", rs6000_isa_flags);
3738 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3739 SUBTARGET_OVERRIDE_OPTIONS;
3740 #endif
3741 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
3742 SUBSUBTARGET_OVERRIDE_OPTIONS;
3743 #endif
3744 #ifdef SUB3TARGET_OVERRIDE_OPTIONS
3745 SUB3TARGET_OVERRIDE_OPTIONS;
3746 #endif
3748 if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
3749 rs6000_print_isa_options (stderr, 0, "after subtarget", rs6000_isa_flags);
3751 /* For the E500 family of cores, reset the single/double FP flags to let us
3752 check that they remain constant across attributes or pragmas. Also,
3753 clear a possible request for string instructions, not supported and which
3754 we might have silently queried above for -Os.
3756 For other families, clear ISEL in case it was set implicitly.
3759 switch (rs6000_cpu)
3761 case PROCESSOR_PPC8540:
3762 case PROCESSOR_PPC8548:
3763 case PROCESSOR_PPCE500MC:
3764 case PROCESSOR_PPCE500MC64:
3765 case PROCESSOR_PPCE5500:
3766 case PROCESSOR_PPCE6500:
3768 rs6000_single_float = TARGET_E500_SINGLE || TARGET_E500_DOUBLE;
3769 rs6000_double_float = TARGET_E500_DOUBLE;
3771 rs6000_isa_flags &= ~OPTION_MASK_STRING;
3773 break;
3775 default:
3777 if (have_cpu && !(rs6000_isa_flags_explicit & OPTION_MASK_ISEL))
3778 rs6000_isa_flags &= ~OPTION_MASK_ISEL;
3780 break;
3783 if (main_target_opt)
3785 if (main_target_opt->x_rs6000_single_float != rs6000_single_float)
3786 error ("target attribute or pragma changes single precision floating "
3787 "point");
3788 if (main_target_opt->x_rs6000_double_float != rs6000_double_float)
3789 error ("target attribute or pragma changes double precision floating "
3790 "point");
3793 /* Detect invalid option combinations with E500. */
3794 CHECK_E500_OPTIONS;
3796 rs6000_always_hint = (rs6000_cpu != PROCESSOR_POWER4
3797 && rs6000_cpu != PROCESSOR_POWER5
3798 && rs6000_cpu != PROCESSOR_POWER6
3799 && rs6000_cpu != PROCESSOR_POWER7
3800 && rs6000_cpu != PROCESSOR_POWER8
3801 && rs6000_cpu != PROCESSOR_PPCA2
3802 && rs6000_cpu != PROCESSOR_CELL
3803 && rs6000_cpu != PROCESSOR_PPC476);
3804 rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
3805 || rs6000_cpu == PROCESSOR_POWER5
3806 || rs6000_cpu == PROCESSOR_POWER7
3807 || rs6000_cpu == PROCESSOR_POWER8);
3808 rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
3809 || rs6000_cpu == PROCESSOR_POWER5
3810 || rs6000_cpu == PROCESSOR_POWER6
3811 || rs6000_cpu == PROCESSOR_POWER7
3812 || rs6000_cpu == PROCESSOR_POWER8
3813 || rs6000_cpu == PROCESSOR_PPCE500MC
3814 || rs6000_cpu == PROCESSOR_PPCE500MC64
3815 || rs6000_cpu == PROCESSOR_PPCE5500
3816 || rs6000_cpu == PROCESSOR_PPCE6500);
3818 /* Allow debug switches to override the above settings. These are set to -1
3819 in rs6000.opt to indicate the user hasn't directly set the switch. */
3820 if (TARGET_ALWAYS_HINT >= 0)
3821 rs6000_always_hint = TARGET_ALWAYS_HINT;
3823 if (TARGET_SCHED_GROUPS >= 0)
3824 rs6000_sched_groups = TARGET_SCHED_GROUPS;
3826 if (TARGET_ALIGN_BRANCH_TARGETS >= 0)
3827 rs6000_align_branch_targets = TARGET_ALIGN_BRANCH_TARGETS;
3829 rs6000_sched_restricted_insns_priority
3830 = (rs6000_sched_groups ? 1 : 0);
3832 /* Handle -msched-costly-dep option. */
3833 rs6000_sched_costly_dep
3834 = (rs6000_sched_groups ? true_store_to_load_dep_costly : no_dep_costly);
3836 if (rs6000_sched_costly_dep_str)
3838 if (! strcmp (rs6000_sched_costly_dep_str, "no"))
3839 rs6000_sched_costly_dep = no_dep_costly;
3840 else if (! strcmp (rs6000_sched_costly_dep_str, "all"))
3841 rs6000_sched_costly_dep = all_deps_costly;
3842 else if (! strcmp (rs6000_sched_costly_dep_str, "true_store_to_load"))
3843 rs6000_sched_costly_dep = true_store_to_load_dep_costly;
3844 else if (! strcmp (rs6000_sched_costly_dep_str, "store_to_load"))
3845 rs6000_sched_costly_dep = store_to_load_dep_costly;
3846 else
3847 rs6000_sched_costly_dep = ((enum rs6000_dependence_cost)
3848 atoi (rs6000_sched_costly_dep_str));
3851 /* Handle -minsert-sched-nops option. */
3852 rs6000_sched_insert_nops
3853 = (rs6000_sched_groups ? sched_finish_regroup_exact : sched_finish_none);
3855 if (rs6000_sched_insert_nops_str)
3857 if (! strcmp (rs6000_sched_insert_nops_str, "no"))
3858 rs6000_sched_insert_nops = sched_finish_none;
3859 else if (! strcmp (rs6000_sched_insert_nops_str, "pad"))
3860 rs6000_sched_insert_nops = sched_finish_pad_groups;
3861 else if (! strcmp (rs6000_sched_insert_nops_str, "regroup_exact"))
3862 rs6000_sched_insert_nops = sched_finish_regroup_exact;
3863 else
3864 rs6000_sched_insert_nops = ((enum rs6000_nop_insertion)
3865 atoi (rs6000_sched_insert_nops_str));
3868 if (global_init_p)
3870 #ifdef TARGET_REGNAMES
3871 /* If the user desires alternate register names, copy in the
3872 alternate names now. */
3873 if (TARGET_REGNAMES)
3874 memcpy (rs6000_reg_names, alt_reg_names, sizeof (rs6000_reg_names));
3875 #endif
3877 /* Set aix_struct_return last, after the ABI is determined.
3878 If -maix-struct-return or -msvr4-struct-return was explicitly
3879 used, don't override with the ABI default. */
3880 if (!global_options_set.x_aix_struct_return)
3881 aix_struct_return = (DEFAULT_ABI != ABI_V4 || DRAFT_V4_STRUCT_RET);
3883 #if 0
3884 /* IBM XL compiler defaults to unsigned bitfields. */
3885 if (TARGET_XL_COMPAT)
3886 flag_signed_bitfields = 0;
3887 #endif
3889 if (TARGET_LONG_DOUBLE_128 && !TARGET_IEEEQUAD)
3890 REAL_MODE_FORMAT (TFmode) = &ibm_extended_format;
3892 if (TARGET_TOC)
3893 ASM_GENERATE_INTERNAL_LABEL (toc_label_name, "LCTOC", 1);
3895 /* We can only guarantee the availability of DI pseudo-ops when
3896 assembling for 64-bit targets. */
3897 if (!TARGET_64BIT)
3899 targetm.asm_out.aligned_op.di = NULL;
3900 targetm.asm_out.unaligned_op.di = NULL;
3904 /* Set branch target alignment, if not optimizing for size. */
3905 if (!optimize_size)
3907 /* Cell wants to be aligned 8byte for dual issue. Titan wants to be
3908 aligned 8byte to avoid misprediction by the branch predictor. */
3909 if (rs6000_cpu == PROCESSOR_TITAN
3910 || rs6000_cpu == PROCESSOR_CELL)
3912 if (align_functions <= 0)
3913 align_functions = 8;
3914 if (align_jumps <= 0)
3915 align_jumps = 8;
3916 if (align_loops <= 0)
3917 align_loops = 8;
3919 if (rs6000_align_branch_targets)
3921 if (align_functions <= 0)
3922 align_functions = 16;
3923 if (align_jumps <= 0)
3924 align_jumps = 16;
3925 if (align_loops <= 0)
3927 can_override_loop_align = 1;
3928 align_loops = 16;
3931 if (align_jumps_max_skip <= 0)
3932 align_jumps_max_skip = 15;
3933 if (align_loops_max_skip <= 0)
3934 align_loops_max_skip = 15;
3937 /* Arrange to save and restore machine status around nested functions. */
3938 init_machine_status = rs6000_init_machine_status;
3940 /* We should always be splitting complex arguments, but we can't break
3941 Linux and Darwin ABIs at the moment. For now, only AIX is fixed. */
3942 if (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN)
3943 targetm.calls.split_complex_arg = NULL;
3946 /* Initialize rs6000_cost with the appropriate target costs. */
3947 if (optimize_size)
3948 rs6000_cost = TARGET_POWERPC64 ? &size64_cost : &size32_cost;
3949 else
3950 switch (rs6000_cpu)
3952 case PROCESSOR_RS64A:
3953 rs6000_cost = &rs64a_cost;
3954 break;
3956 case PROCESSOR_MPCCORE:
3957 rs6000_cost = &mpccore_cost;
3958 break;
3960 case PROCESSOR_PPC403:
3961 rs6000_cost = &ppc403_cost;
3962 break;
3964 case PROCESSOR_PPC405:
3965 rs6000_cost = &ppc405_cost;
3966 break;
3968 case PROCESSOR_PPC440:
3969 rs6000_cost = &ppc440_cost;
3970 break;
3972 case PROCESSOR_PPC476:
3973 rs6000_cost = &ppc476_cost;
3974 break;
3976 case PROCESSOR_PPC601:
3977 rs6000_cost = &ppc601_cost;
3978 break;
3980 case PROCESSOR_PPC603:
3981 rs6000_cost = &ppc603_cost;
3982 break;
3984 case PROCESSOR_PPC604:
3985 rs6000_cost = &ppc604_cost;
3986 break;
3988 case PROCESSOR_PPC604e:
3989 rs6000_cost = &ppc604e_cost;
3990 break;
3992 case PROCESSOR_PPC620:
3993 rs6000_cost = &ppc620_cost;
3994 break;
3996 case PROCESSOR_PPC630:
3997 rs6000_cost = &ppc630_cost;
3998 break;
4000 case PROCESSOR_CELL:
4001 rs6000_cost = &ppccell_cost;
4002 break;
4004 case PROCESSOR_PPC750:
4005 case PROCESSOR_PPC7400:
4006 rs6000_cost = &ppc750_cost;
4007 break;
4009 case PROCESSOR_PPC7450:
4010 rs6000_cost = &ppc7450_cost;
4011 break;
4013 case PROCESSOR_PPC8540:
4014 case PROCESSOR_PPC8548:
4015 rs6000_cost = &ppc8540_cost;
4016 break;
4018 case PROCESSOR_PPCE300C2:
4019 case PROCESSOR_PPCE300C3:
4020 rs6000_cost = &ppce300c2c3_cost;
4021 break;
4023 case PROCESSOR_PPCE500MC:
4024 rs6000_cost = &ppce500mc_cost;
4025 break;
4027 case PROCESSOR_PPCE500MC64:
4028 rs6000_cost = &ppce500mc64_cost;
4029 break;
4031 case PROCESSOR_PPCE5500:
4032 rs6000_cost = &ppce5500_cost;
4033 break;
4035 case PROCESSOR_PPCE6500:
4036 rs6000_cost = &ppce6500_cost;
4037 break;
4039 case PROCESSOR_TITAN:
4040 rs6000_cost = &titan_cost;
4041 break;
4043 case PROCESSOR_POWER4:
4044 case PROCESSOR_POWER5:
4045 rs6000_cost = &power4_cost;
4046 break;
4048 case PROCESSOR_POWER6:
4049 rs6000_cost = &power6_cost;
4050 break;
4052 case PROCESSOR_POWER7:
4053 rs6000_cost = &power7_cost;
4054 break;
4056 case PROCESSOR_POWER8:
4057 rs6000_cost = &power8_cost;
4058 break;
4060 case PROCESSOR_PPCA2:
4061 rs6000_cost = &ppca2_cost;
4062 break;
4064 default:
4065 gcc_unreachable ();
4068 if (global_init_p)
4070 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
4071 rs6000_cost->simultaneous_prefetches,
4072 global_options.x_param_values,
4073 global_options_set.x_param_values);
4074 maybe_set_param_value (PARAM_L1_CACHE_SIZE, rs6000_cost->l1_cache_size,
4075 global_options.x_param_values,
4076 global_options_set.x_param_values);
4077 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
4078 rs6000_cost->cache_line_size,
4079 global_options.x_param_values,
4080 global_options_set.x_param_values);
4081 maybe_set_param_value (PARAM_L2_CACHE_SIZE, rs6000_cost->l2_cache_size,
4082 global_options.x_param_values,
4083 global_options_set.x_param_values);
4085 /* Increase loop peeling limits based on performance analysis. */
4086 maybe_set_param_value (PARAM_MAX_PEELED_INSNS, 400,
4087 global_options.x_param_values,
4088 global_options_set.x_param_values);
4089 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 400,
4090 global_options.x_param_values,
4091 global_options_set.x_param_values);
4093 /* If using typedef char *va_list, signal that
4094 __builtin_va_start (&ap, 0) can be optimized to
4095 ap = __builtin_next_arg (0). */
4096 if (DEFAULT_ABI != ABI_V4)
4097 targetm.expand_builtin_va_start = NULL;
4100 /* Set up single/double float flags.
4101 If TARGET_HARD_FLOAT is set, but neither single or double is set,
4102 then set both flags. */
4103 if (TARGET_HARD_FLOAT && TARGET_FPRS
4104 && rs6000_single_float == 0 && rs6000_double_float == 0)
4105 rs6000_single_float = rs6000_double_float = 1;
4107 /* If not explicitly specified via option, decide whether to generate indexed
4108 load/store instructions. */
4109 if (TARGET_AVOID_XFORM == -1)
4110 /* Avoid indexed addressing when targeting Power6 in order to avoid the
4111 DERAT mispredict penalty. However the LVE and STVE altivec instructions
4112 need indexed accesses and the type used is the scalar type of the element
4113 being loaded or stored. */
4114 TARGET_AVOID_XFORM = (rs6000_cpu == PROCESSOR_POWER6 && TARGET_CMPB
4115 && !TARGET_ALTIVEC);
4117 /* Set the -mrecip options. */
4118 if (rs6000_recip_name)
4120 char *p = ASTRDUP (rs6000_recip_name);
4121 char *q;
4122 unsigned int mask, i;
4123 bool invert;
4125 while ((q = strtok (p, ",")) != NULL)
4127 p = NULL;
4128 if (*q == '!')
4130 invert = true;
4131 q++;
4133 else
4134 invert = false;
4136 if (!strcmp (q, "default"))
4137 mask = ((TARGET_RECIP_PRECISION)
4138 ? RECIP_HIGH_PRECISION : RECIP_LOW_PRECISION);
4139 else
4141 for (i = 0; i < ARRAY_SIZE (recip_options); i++)
4142 if (!strcmp (q, recip_options[i].string))
4144 mask = recip_options[i].mask;
4145 break;
4148 if (i == ARRAY_SIZE (recip_options))
4150 error ("unknown option for -mrecip=%s", q);
4151 invert = false;
4152 mask = 0;
4153 ret = false;
4157 if (invert)
4158 rs6000_recip_control &= ~mask;
4159 else
4160 rs6000_recip_control |= mask;
4164 /* Set the builtin mask of the various options used that could affect which
4165 builtins were used. In the past we used target_flags, but we've run out
4166 of bits, and some options like SPE and PAIRED are no longer in
4167 target_flags. */
4168 rs6000_builtin_mask = rs6000_builtin_mask_calculate ();
4169 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
4171 fprintf (stderr,
4172 "new builtin mask = " HOST_WIDE_INT_PRINT_HEX ", ",
4173 rs6000_builtin_mask);
4174 rs6000_print_builtin_options (stderr, 0, NULL, rs6000_builtin_mask);
4177 /* Initialize all of the registers. */
4178 rs6000_init_hard_regno_mode_ok (global_init_p);
4180 /* Save the initial options in case the user does function specific options */
4181 if (global_init_p)
4182 target_option_default_node = target_option_current_node
4183 = build_target_option_node (&global_options);
4185 /* If not explicitly specified via option, decide whether to generate the
4186 extra blr's required to preserve the link stack on some cpus (eg, 476). */
4187 if (TARGET_LINK_STACK == -1)
4188 SET_TARGET_LINK_STACK (rs6000_cpu == PROCESSOR_PPC476 && flag_pic);
4190 return ret;
4193 /* Implement TARGET_OPTION_OVERRIDE. On the RS/6000 this is used to
4194 define the target cpu type. */
4196 static void
4197 rs6000_option_override (void)
4199 (void) rs6000_option_override_internal (true);
4201 /* Register machine-specific passes. This needs to be done at start-up.
4202 It's convenient to do it here (like i386 does). */
4203 opt_pass *pass_analyze_swaps = make_pass_analyze_swaps (g);
4205 struct register_pass_info analyze_swaps_info
4206 = { pass_analyze_swaps, "cse1", 1, PASS_POS_INSERT_BEFORE };
4208 register_pass (&analyze_swaps_info);
4212 /* Implement targetm.vectorize.builtin_mask_for_load. */
4213 static tree
4214 rs6000_builtin_mask_for_load (void)
4216 if (TARGET_ALTIVEC || TARGET_VSX)
4217 return altivec_builtin_mask_for_load;
4218 else
4219 return 0;
4222 /* Implement LOOP_ALIGN. */
4224 rs6000_loop_align (rtx label)
4226 basic_block bb;
4227 int ninsns;
4229 /* Don't override loop alignment if -falign-loops was specified. */
4230 if (!can_override_loop_align)
4231 return align_loops_log;
4233 bb = BLOCK_FOR_INSN (label);
4234 ninsns = num_loop_insns(bb->loop_father);
4236 /* Align small loops to 32 bytes to fit in an icache sector, otherwise return default. */
4237 if (ninsns > 4 && ninsns <= 8
4238 && (rs6000_cpu == PROCESSOR_POWER4
4239 || rs6000_cpu == PROCESSOR_POWER5
4240 || rs6000_cpu == PROCESSOR_POWER6
4241 || rs6000_cpu == PROCESSOR_POWER7
4242 || rs6000_cpu == PROCESSOR_POWER8))
4243 return 5;
4244 else
4245 return align_loops_log;
4248 /* Implement TARGET_LOOP_ALIGN_MAX_SKIP. */
4249 static int
4250 rs6000_loop_align_max_skip (rtx_insn *label)
4252 return (1 << rs6000_loop_align (label)) - 1;
4255 /* Return true iff, data reference of TYPE can reach vector alignment (16)
4256 after applying N number of iterations. This routine does not determine
4257 how may iterations are required to reach desired alignment. */
4259 static bool
4260 rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
4262 if (is_packed)
4263 return false;
4265 if (TARGET_32BIT)
4267 if (rs6000_alignment_flags == MASK_ALIGN_NATURAL)
4268 return true;
4270 if (rs6000_alignment_flags == MASK_ALIGN_POWER)
4271 return true;
4273 return false;
4275 else
4277 if (TARGET_MACHO)
4278 return false;
4280 /* Assuming that all other types are naturally aligned. CHECKME! */
4281 return true;
4285 /* Return true if the vector misalignment factor is supported by the
4286 target. */
4287 static bool
4288 rs6000_builtin_support_vector_misalignment (machine_mode mode,
4289 const_tree type,
4290 int misalignment,
4291 bool is_packed)
4293 if (TARGET_VSX)
4295 /* Return if movmisalign pattern is not supported for this mode. */
4296 if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing)
4297 return false;
4299 if (misalignment == -1)
4301 /* Misalignment factor is unknown at compile time but we know
4302 it's word aligned. */
4303 if (rs6000_vector_alignment_reachable (type, is_packed))
4305 int element_size = TREE_INT_CST_LOW (TYPE_SIZE (type));
4307 if (element_size == 64 || element_size == 32)
4308 return true;
4311 return false;
4314 /* VSX supports word-aligned vector. */
4315 if (misalignment % 4 == 0)
4316 return true;
4318 return false;
4321 /* Implement targetm.vectorize.builtin_vectorization_cost. */
4322 static int
4323 rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
4324 tree vectype, int misalign)
4326 unsigned elements;
4327 tree elem_type;
4329 switch (type_of_cost)
4331 case scalar_stmt:
4332 case scalar_load:
4333 case scalar_store:
4334 case vector_stmt:
4335 case vector_load:
4336 case vector_store:
4337 case vec_to_scalar:
4338 case scalar_to_vec:
4339 case cond_branch_not_taken:
4340 return 1;
4342 case vec_perm:
4343 if (TARGET_VSX)
4344 return 3;
4345 else
4346 return 1;
4348 case vec_promote_demote:
4349 if (TARGET_VSX)
4350 return 4;
4351 else
4352 return 1;
4354 case cond_branch_taken:
4355 return 3;
4357 case unaligned_load:
4358 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4360 elements = TYPE_VECTOR_SUBPARTS (vectype);
4361 if (elements == 2)
4362 /* Double word aligned. */
4363 return 2;
4365 if (elements == 4)
4367 switch (misalign)
4369 case 8:
4370 /* Double word aligned. */
4371 return 2;
4373 case -1:
4374 /* Unknown misalignment. */
4375 case 4:
4376 case 12:
4377 /* Word aligned. */
4378 return 22;
4380 default:
4381 gcc_unreachable ();
4386 if (TARGET_ALTIVEC)
4387 /* Misaligned loads are not supported. */
4388 gcc_unreachable ();
4390 return 2;
4392 case unaligned_store:
4393 if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN)
4395 elements = TYPE_VECTOR_SUBPARTS (vectype);
4396 if (elements == 2)
4397 /* Double word aligned. */
4398 return 2;
4400 if (elements == 4)
4402 switch (misalign)
4404 case 8:
4405 /* Double word aligned. */
4406 return 2;
4408 case -1:
4409 /* Unknown misalignment. */
4410 case 4:
4411 case 12:
4412 /* Word aligned. */
4413 return 23;
4415 default:
4416 gcc_unreachable ();
4421 if (TARGET_ALTIVEC)
4422 /* Misaligned stores are not supported. */
4423 gcc_unreachable ();
4425 return 2;
4427 case vec_construct:
4428 elements = TYPE_VECTOR_SUBPARTS (vectype);
4429 elem_type = TREE_TYPE (vectype);
4430 /* 32-bit vectors loaded into registers are stored as double
4431 precision, so we need n/2 converts in addition to the usual
4432 n/2 merges to construct a vector of short floats from them. */
4433 if (SCALAR_FLOAT_TYPE_P (elem_type)
4434 && TYPE_PRECISION (elem_type) == 32)
4435 return elements + 1;
4436 else
4437 return elements / 2 + 1;
4439 default:
4440 gcc_unreachable ();
4444 /* Implement targetm.vectorize.preferred_simd_mode. */
4446 static machine_mode
4447 rs6000_preferred_simd_mode (machine_mode mode)
4449 if (TARGET_VSX)
4450 switch (mode)
4452 case DFmode:
4453 return V2DFmode;
4454 default:;
4456 if (TARGET_ALTIVEC || TARGET_VSX)
4457 switch (mode)
4459 case SFmode:
4460 return V4SFmode;
4461 case TImode:
4462 return V1TImode;
4463 case DImode:
4464 return V2DImode;
4465 case SImode:
4466 return V4SImode;
4467 case HImode:
4468 return V8HImode;
4469 case QImode:
4470 return V16QImode;
4471 default:;
4473 if (TARGET_SPE)
4474 switch (mode)
4476 case SFmode:
4477 return V2SFmode;
4478 case SImode:
4479 return V2SImode;
4480 default:;
4482 if (TARGET_PAIRED_FLOAT
4483 && mode == SFmode)
4484 return V2SFmode;
4485 return word_mode;
4488 typedef struct _rs6000_cost_data
4490 struct loop *loop_info;
4491 unsigned cost[3];
4492 } rs6000_cost_data;
4494 /* Test for likely overcommitment of vector hardware resources. If a
4495 loop iteration is relatively large, and too large a percentage of
4496 instructions in the loop are vectorized, the cost model may not
4497 adequately reflect delays from unavailable vector resources.
4498 Penalize the loop body cost for this case. */
4500 static void
4501 rs6000_density_test (rs6000_cost_data *data)
4503 const int DENSITY_PCT_THRESHOLD = 85;
4504 const int DENSITY_SIZE_THRESHOLD = 70;
4505 const int DENSITY_PENALTY = 10;
4506 struct loop *loop = data->loop_info;
4507 basic_block *bbs = get_loop_body (loop);
4508 int nbbs = loop->num_nodes;
4509 int vec_cost = data->cost[vect_body], not_vec_cost = 0;
4510 int i, density_pct;
4512 for (i = 0; i < nbbs; i++)
4514 basic_block bb = bbs[i];
4515 gimple_stmt_iterator gsi;
4517 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
4519 gimple stmt = gsi_stmt (gsi);
4520 stmt_vec_info stmt_info = vinfo_for_stmt (stmt);
4522 if (!STMT_VINFO_RELEVANT_P (stmt_info)
4523 && !STMT_VINFO_IN_PATTERN_P (stmt_info))
4524 not_vec_cost++;
4528 free (bbs);
4529 density_pct = (vec_cost * 100) / (vec_cost + not_vec_cost);
4531 if (density_pct > DENSITY_PCT_THRESHOLD
4532 && vec_cost + not_vec_cost > DENSITY_SIZE_THRESHOLD)
4534 data->cost[vect_body] = vec_cost * (100 + DENSITY_PENALTY) / 100;
4535 if (dump_enabled_p ())
4536 dump_printf_loc (MSG_NOTE, vect_location,
4537 "density %d%%, cost %d exceeds threshold, penalizing "
4538 "loop body cost by %d%%", density_pct,
4539 vec_cost + not_vec_cost, DENSITY_PENALTY);
4543 /* Implement targetm.vectorize.init_cost. */
4545 static void *
4546 rs6000_init_cost (struct loop *loop_info)
4548 rs6000_cost_data *data = XNEW (struct _rs6000_cost_data);
4549 data->loop_info = loop_info;
4550 data->cost[vect_prologue] = 0;
4551 data->cost[vect_body] = 0;
4552 data->cost[vect_epilogue] = 0;
4553 return data;
4556 /* Implement targetm.vectorize.add_stmt_cost. */
4558 static unsigned
4559 rs6000_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
4560 struct _stmt_vec_info *stmt_info, int misalign,
4561 enum vect_cost_model_location where)
4563 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4564 unsigned retval = 0;
4566 if (flag_vect_cost_model)
4568 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
4569 int stmt_cost = rs6000_builtin_vectorization_cost (kind, vectype,
4570 misalign);
4571 /* Statements in an inner loop relative to the loop being
4572 vectorized are weighted more heavily. The value here is
4573 arbitrary and could potentially be improved with analysis. */
4574 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
4575 count *= 50; /* FIXME. */
4577 retval = (unsigned) (count * stmt_cost);
4578 cost_data->cost[where] += retval;
4581 return retval;
4584 /* Implement targetm.vectorize.finish_cost. */
4586 static void
4587 rs6000_finish_cost (void *data, unsigned *prologue_cost,
4588 unsigned *body_cost, unsigned *epilogue_cost)
4590 rs6000_cost_data *cost_data = (rs6000_cost_data*) data;
4592 if (cost_data->loop_info)
4593 rs6000_density_test (cost_data);
4595 *prologue_cost = cost_data->cost[vect_prologue];
4596 *body_cost = cost_data->cost[vect_body];
4597 *epilogue_cost = cost_data->cost[vect_epilogue];
4600 /* Implement targetm.vectorize.destroy_cost_data. */
4602 static void
4603 rs6000_destroy_cost_data (void *data)
4605 free (data);
4608 /* Handler for the Mathematical Acceleration Subsystem (mass) interface to a
4609 library with vectorized intrinsics. */
4611 static tree
4612 rs6000_builtin_vectorized_libmass (tree fndecl, tree type_out, tree type_in)
4614 char name[32];
4615 const char *suffix = NULL;
4616 tree fntype, new_fndecl, bdecl = NULL_TREE;
4617 int n_args = 1;
4618 const char *bname;
4619 machine_mode el_mode, in_mode;
4620 int n, in_n;
4622 /* Libmass is suitable for unsafe math only as it does not correctly support
4623 parts of IEEE with the required precision such as denormals. Only support
4624 it if we have VSX to use the simd d2 or f4 functions.
4625 XXX: Add variable length support. */
4626 if (!flag_unsafe_math_optimizations || !TARGET_VSX)
4627 return NULL_TREE;
4629 el_mode = TYPE_MODE (TREE_TYPE (type_out));
4630 n = TYPE_VECTOR_SUBPARTS (type_out);
4631 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4632 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4633 if (el_mode != in_mode
4634 || n != in_n)
4635 return NULL_TREE;
4637 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4639 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4640 switch (fn)
4642 case BUILT_IN_ATAN2:
4643 case BUILT_IN_HYPOT:
4644 case BUILT_IN_POW:
4645 n_args = 2;
4646 /* fall through */
4648 case BUILT_IN_ACOS:
4649 case BUILT_IN_ACOSH:
4650 case BUILT_IN_ASIN:
4651 case BUILT_IN_ASINH:
4652 case BUILT_IN_ATAN:
4653 case BUILT_IN_ATANH:
4654 case BUILT_IN_CBRT:
4655 case BUILT_IN_COS:
4656 case BUILT_IN_COSH:
4657 case BUILT_IN_ERF:
4658 case BUILT_IN_ERFC:
4659 case BUILT_IN_EXP2:
4660 case BUILT_IN_EXP:
4661 case BUILT_IN_EXPM1:
4662 case BUILT_IN_LGAMMA:
4663 case BUILT_IN_LOG10:
4664 case BUILT_IN_LOG1P:
4665 case BUILT_IN_LOG2:
4666 case BUILT_IN_LOG:
4667 case BUILT_IN_SIN:
4668 case BUILT_IN_SINH:
4669 case BUILT_IN_SQRT:
4670 case BUILT_IN_TAN:
4671 case BUILT_IN_TANH:
4672 bdecl = builtin_decl_implicit (fn);
4673 suffix = "d2"; /* pow -> powd2 */
4674 if (el_mode != DFmode
4675 || n != 2
4676 || !bdecl)
4677 return NULL_TREE;
4678 break;
4680 case BUILT_IN_ATAN2F:
4681 case BUILT_IN_HYPOTF:
4682 case BUILT_IN_POWF:
4683 n_args = 2;
4684 /* fall through */
4686 case BUILT_IN_ACOSF:
4687 case BUILT_IN_ACOSHF:
4688 case BUILT_IN_ASINF:
4689 case BUILT_IN_ASINHF:
4690 case BUILT_IN_ATANF:
4691 case BUILT_IN_ATANHF:
4692 case BUILT_IN_CBRTF:
4693 case BUILT_IN_COSF:
4694 case BUILT_IN_COSHF:
4695 case BUILT_IN_ERFF:
4696 case BUILT_IN_ERFCF:
4697 case BUILT_IN_EXP2F:
4698 case BUILT_IN_EXPF:
4699 case BUILT_IN_EXPM1F:
4700 case BUILT_IN_LGAMMAF:
4701 case BUILT_IN_LOG10F:
4702 case BUILT_IN_LOG1PF:
4703 case BUILT_IN_LOG2F:
4704 case BUILT_IN_LOGF:
4705 case BUILT_IN_SINF:
4706 case BUILT_IN_SINHF:
4707 case BUILT_IN_SQRTF:
4708 case BUILT_IN_TANF:
4709 case BUILT_IN_TANHF:
4710 bdecl = builtin_decl_implicit (fn);
4711 suffix = "4"; /* powf -> powf4 */
4712 if (el_mode != SFmode
4713 || n != 4
4714 || !bdecl)
4715 return NULL_TREE;
4716 break;
4718 default:
4719 return NULL_TREE;
4722 else
4723 return NULL_TREE;
4725 gcc_assert (suffix != NULL);
4726 bname = IDENTIFIER_POINTER (DECL_NAME (bdecl));
4727 if (!bname)
4728 return NULL_TREE;
4730 strcpy (name, bname + sizeof ("__builtin_") - 1);
4731 strcat (name, suffix);
4733 if (n_args == 1)
4734 fntype = build_function_type_list (type_out, type_in, NULL);
4735 else if (n_args == 2)
4736 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
4737 else
4738 gcc_unreachable ();
4740 /* Build a function declaration for the vectorized function. */
4741 new_fndecl = build_decl (BUILTINS_LOCATION,
4742 FUNCTION_DECL, get_identifier (name), fntype);
4743 TREE_PUBLIC (new_fndecl) = 1;
4744 DECL_EXTERNAL (new_fndecl) = 1;
4745 DECL_IS_NOVOPS (new_fndecl) = 1;
4746 TREE_READONLY (new_fndecl) = 1;
4748 return new_fndecl;
4751 /* Returns a function decl for a vectorized version of the builtin function
4752 with builtin function code FN and the result vector type TYPE, or NULL_TREE
4753 if it is not available. */
4755 static tree
4756 rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
4757 tree type_in)
4759 machine_mode in_mode, out_mode;
4760 int in_n, out_n;
4762 if (TARGET_DEBUG_BUILTIN)
4763 fprintf (stderr, "rs6000_builtin_vectorized_function (%s, %s, %s)\n",
4764 IDENTIFIER_POINTER (DECL_NAME (fndecl)),
4765 GET_MODE_NAME (TYPE_MODE (type_out)),
4766 GET_MODE_NAME (TYPE_MODE (type_in)));
4768 if (TREE_CODE (type_out) != VECTOR_TYPE
4769 || TREE_CODE (type_in) != VECTOR_TYPE
4770 || !TARGET_VECTORIZE_BUILTINS)
4771 return NULL_TREE;
4773 out_mode = TYPE_MODE (TREE_TYPE (type_out));
4774 out_n = TYPE_VECTOR_SUBPARTS (type_out);
4775 in_mode = TYPE_MODE (TREE_TYPE (type_in));
4776 in_n = TYPE_VECTOR_SUBPARTS (type_in);
4778 if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_NORMAL)
4780 enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
4781 switch (fn)
4783 case BUILT_IN_CLZIMAX:
4784 case BUILT_IN_CLZLL:
4785 case BUILT_IN_CLZL:
4786 case BUILT_IN_CLZ:
4787 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4789 if (out_mode == QImode && out_n == 16)
4790 return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
4791 else if (out_mode == HImode && out_n == 8)
4792 return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
4793 else if (out_mode == SImode && out_n == 4)
4794 return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
4795 else if (out_mode == DImode && out_n == 2)
4796 return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
4798 break;
4799 case BUILT_IN_COPYSIGN:
4800 if (VECTOR_UNIT_VSX_P (V2DFmode)
4801 && out_mode == DFmode && out_n == 2
4802 && in_mode == DFmode && in_n == 2)
4803 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNDP];
4804 break;
4805 case BUILT_IN_COPYSIGNF:
4806 if (out_mode != SFmode || out_n != 4
4807 || in_mode != SFmode || in_n != 4)
4808 break;
4809 if (VECTOR_UNIT_VSX_P (V4SFmode))
4810 return rs6000_builtin_decls[VSX_BUILTIN_CPSGNSP];
4811 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4812 return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
4813 break;
4814 case BUILT_IN_POPCOUNTIMAX:
4815 case BUILT_IN_POPCOUNTLL:
4816 case BUILT_IN_POPCOUNTL:
4817 case BUILT_IN_POPCOUNT:
4818 if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
4820 if (out_mode == QImode && out_n == 16)
4821 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
4822 else if (out_mode == HImode && out_n == 8)
4823 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
4824 else if (out_mode == SImode && out_n == 4)
4825 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
4826 else if (out_mode == DImode && out_n == 2)
4827 return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
4829 break;
4830 case BUILT_IN_SQRT:
4831 if (VECTOR_UNIT_VSX_P (V2DFmode)
4832 && out_mode == DFmode && out_n == 2
4833 && in_mode == DFmode && in_n == 2)
4834 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTDP];
4835 break;
4836 case BUILT_IN_SQRTF:
4837 if (VECTOR_UNIT_VSX_P (V4SFmode)
4838 && out_mode == SFmode && out_n == 4
4839 && in_mode == SFmode && in_n == 4)
4840 return rs6000_builtin_decls[VSX_BUILTIN_XVSQRTSP];
4841 break;
4842 case BUILT_IN_CEIL:
4843 if (VECTOR_UNIT_VSX_P (V2DFmode)
4844 && out_mode == DFmode && out_n == 2
4845 && in_mode == DFmode && in_n == 2)
4846 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIP];
4847 break;
4848 case BUILT_IN_CEILF:
4849 if (out_mode != SFmode || out_n != 4
4850 || in_mode != SFmode || in_n != 4)
4851 break;
4852 if (VECTOR_UNIT_VSX_P (V4SFmode))
4853 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIP];
4854 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4855 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIP];
4856 break;
4857 case BUILT_IN_FLOOR:
4858 if (VECTOR_UNIT_VSX_P (V2DFmode)
4859 && out_mode == DFmode && out_n == 2
4860 && in_mode == DFmode && in_n == 2)
4861 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIM];
4862 break;
4863 case BUILT_IN_FLOORF:
4864 if (out_mode != SFmode || out_n != 4
4865 || in_mode != SFmode || in_n != 4)
4866 break;
4867 if (VECTOR_UNIT_VSX_P (V4SFmode))
4868 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIM];
4869 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4870 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIM];
4871 break;
4872 case BUILT_IN_FMA:
4873 if (VECTOR_UNIT_VSX_P (V2DFmode)
4874 && out_mode == DFmode && out_n == 2
4875 && in_mode == DFmode && in_n == 2)
4876 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDDP];
4877 break;
4878 case BUILT_IN_FMAF:
4879 if (VECTOR_UNIT_VSX_P (V4SFmode)
4880 && out_mode == SFmode && out_n == 4
4881 && in_mode == SFmode && in_n == 4)
4882 return rs6000_builtin_decls[VSX_BUILTIN_XVMADDSP];
4883 else if (VECTOR_UNIT_ALTIVEC_P (V4SFmode)
4884 && out_mode == SFmode && out_n == 4
4885 && in_mode == SFmode && in_n == 4)
4886 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VMADDFP];
4887 break;
4888 case BUILT_IN_TRUNC:
4889 if (VECTOR_UNIT_VSX_P (V2DFmode)
4890 && out_mode == DFmode && out_n == 2
4891 && in_mode == DFmode && in_n == 2)
4892 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIZ];
4893 break;
4894 case BUILT_IN_TRUNCF:
4895 if (out_mode != SFmode || out_n != 4
4896 || in_mode != SFmode || in_n != 4)
4897 break;
4898 if (VECTOR_UNIT_VSX_P (V4SFmode))
4899 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIZ];
4900 if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
4901 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRFIZ];
4902 break;
4903 case BUILT_IN_NEARBYINT:
4904 if (VECTOR_UNIT_VSX_P (V2DFmode)
4905 && flag_unsafe_math_optimizations
4906 && out_mode == DFmode && out_n == 2
4907 && in_mode == DFmode && in_n == 2)
4908 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPI];
4909 break;
4910 case BUILT_IN_NEARBYINTF:
4911 if (VECTOR_UNIT_VSX_P (V4SFmode)
4912 && flag_unsafe_math_optimizations
4913 && out_mode == SFmode && out_n == 4
4914 && in_mode == SFmode && in_n == 4)
4915 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPI];
4916 break;
4917 case BUILT_IN_RINT:
4918 if (VECTOR_UNIT_VSX_P (V2DFmode)
4919 && !flag_trapping_math
4920 && out_mode == DFmode && out_n == 2
4921 && in_mode == DFmode && in_n == 2)
4922 return rs6000_builtin_decls[VSX_BUILTIN_XVRDPIC];
4923 break;
4924 case BUILT_IN_RINTF:
4925 if (VECTOR_UNIT_VSX_P (V4SFmode)
4926 && !flag_trapping_math
4927 && out_mode == SFmode && out_n == 4
4928 && in_mode == SFmode && in_n == 4)
4929 return rs6000_builtin_decls[VSX_BUILTIN_XVRSPIC];
4930 break;
4931 default:
4932 break;
4936 else if (DECL_BUILT_IN_CLASS (fndecl) == BUILT_IN_MD)
4938 enum rs6000_builtins fn
4939 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
4940 switch (fn)
4942 case RS6000_BUILTIN_RSQRTF:
4943 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4944 && out_mode == SFmode && out_n == 4
4945 && in_mode == SFmode && in_n == 4)
4946 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRSQRTFP];
4947 break;
4948 case RS6000_BUILTIN_RSQRT:
4949 if (VECTOR_UNIT_VSX_P (V2DFmode)
4950 && out_mode == DFmode && out_n == 2
4951 && in_mode == DFmode && in_n == 2)
4952 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
4953 break;
4954 case RS6000_BUILTIN_RECIPF:
4955 if (VECTOR_UNIT_ALTIVEC_OR_VSX_P (V4SFmode)
4956 && out_mode == SFmode && out_n == 4
4957 && in_mode == SFmode && in_n == 4)
4958 return rs6000_builtin_decls[ALTIVEC_BUILTIN_VRECIPFP];
4959 break;
4960 case RS6000_BUILTIN_RECIP:
4961 if (VECTOR_UNIT_VSX_P (V2DFmode)
4962 && out_mode == DFmode && out_n == 2
4963 && in_mode == DFmode && in_n == 2)
4964 return rs6000_builtin_decls[VSX_BUILTIN_RECIP_V2DF];
4965 break;
4966 default:
4967 break;
4971 /* Generate calls to libmass if appropriate. */
4972 if (rs6000_veclib_handler)
4973 return rs6000_veclib_handler (fndecl, type_out, type_in);
4975 return NULL_TREE;
4978 /* Default CPU string for rs6000*_file_start functions. */
4979 static const char *rs6000_default_cpu;
4981 /* Do anything needed at the start of the asm file. */
4983 static void
4984 rs6000_file_start (void)
4986 char buffer[80];
4987 const char *start = buffer;
4988 FILE *file = asm_out_file;
4990 rs6000_default_cpu = TARGET_CPU_DEFAULT;
4992 default_file_start ();
4994 if (flag_verbose_asm)
4996 sprintf (buffer, "\n%s rs6000/powerpc options:", ASM_COMMENT_START);
4998 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
5000 fprintf (file, "%s --with-cpu=%s", start, rs6000_default_cpu);
5001 start = "";
5004 if (global_options_set.x_rs6000_cpu_index)
5006 fprintf (file, "%s -mcpu=%s", start,
5007 processor_target_table[rs6000_cpu_index].name);
5008 start = "";
5011 if (global_options_set.x_rs6000_tune_index)
5013 fprintf (file, "%s -mtune=%s", start,
5014 processor_target_table[rs6000_tune_index].name);
5015 start = "";
5018 if (PPC405_ERRATUM77)
5020 fprintf (file, "%s PPC405CR_ERRATUM77", start);
5021 start = "";
5024 #ifdef USING_ELFOS_H
5025 switch (rs6000_sdata)
5027 case SDATA_NONE: fprintf (file, "%s -msdata=none", start); start = ""; break;
5028 case SDATA_DATA: fprintf (file, "%s -msdata=data", start); start = ""; break;
5029 case SDATA_SYSV: fprintf (file, "%s -msdata=sysv", start); start = ""; break;
5030 case SDATA_EABI: fprintf (file, "%s -msdata=eabi", start); start = ""; break;
5033 if (rs6000_sdata && g_switch_value)
5035 fprintf (file, "%s -G %d", start,
5036 g_switch_value);
5037 start = "";
5039 #endif
5041 if (*start == '\0')
5042 putc ('\n', file);
5045 if (DEFAULT_ABI == ABI_ELFv2)
5046 fprintf (file, "\t.abiversion 2\n");
5048 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2
5049 || (TARGET_ELF && flag_pic == 2))
5051 switch_to_section (toc_section);
5052 switch_to_section (text_section);
5057 /* Return nonzero if this function is known to have a null epilogue. */
5060 direct_return (void)
5062 if (reload_completed)
5064 rs6000_stack_t *info = rs6000_stack_info ();
5066 if (info->first_gp_reg_save == 32
5067 && info->first_fp_reg_save == 64
5068 && info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
5069 && ! info->lr_save_p
5070 && ! info->cr_save_p
5071 && info->vrsave_mask == 0
5072 && ! info->push_p)
5073 return 1;
5076 return 0;
5079 /* Return the number of instructions it takes to form a constant in an
5080 integer register. */
5083 num_insns_constant_wide (HOST_WIDE_INT value)
5085 /* signed constant loadable with addi */
5086 if (((unsigned HOST_WIDE_INT) value + 0x8000) < 0x10000)
5087 return 1;
5089 /* constant loadable with addis */
5090 else if ((value & 0xffff) == 0
5091 && (value >> 31 == -1 || value >> 31 == 0))
5092 return 1;
5094 else if (TARGET_POWERPC64)
5096 HOST_WIDE_INT low = ((value & 0xffffffff) ^ 0x80000000) - 0x80000000;
5097 HOST_WIDE_INT high = value >> 31;
5099 if (high == 0 || high == -1)
5100 return 2;
5102 high >>= 1;
5104 if (low == 0)
5105 return num_insns_constant_wide (high) + 1;
5106 else if (high == 0)
5107 return num_insns_constant_wide (low) + 1;
5108 else
5109 return (num_insns_constant_wide (high)
5110 + num_insns_constant_wide (low) + 1);
5113 else
5114 return 2;
5118 num_insns_constant (rtx op, machine_mode mode)
5120 HOST_WIDE_INT low, high;
5122 switch (GET_CODE (op))
5124 case CONST_INT:
5125 if ((INTVAL (op) >> 31) != 0 && (INTVAL (op) >> 31) != -1
5126 && mask64_operand (op, mode))
5127 return 2;
5128 else
5129 return num_insns_constant_wide (INTVAL (op));
5131 case CONST_WIDE_INT:
5133 int i;
5134 int ins = CONST_WIDE_INT_NUNITS (op) - 1;
5135 for (i = 0; i < CONST_WIDE_INT_NUNITS (op); i++)
5136 ins += num_insns_constant_wide (CONST_WIDE_INT_ELT (op, i));
5137 return ins;
5140 case CONST_DOUBLE:
5141 if (mode == SFmode || mode == SDmode)
5143 long l;
5144 REAL_VALUE_TYPE rv;
5146 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5147 if (DECIMAL_FLOAT_MODE_P (mode))
5148 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
5149 else
5150 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
5151 return num_insns_constant_wide ((HOST_WIDE_INT) l);
5154 long l[2];
5155 REAL_VALUE_TYPE rv;
5157 REAL_VALUE_FROM_CONST_DOUBLE (rv, op);
5158 if (DECIMAL_FLOAT_MODE_P (mode))
5159 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, l);
5160 else
5161 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
5162 high = l[WORDS_BIG_ENDIAN == 0];
5163 low = l[WORDS_BIG_ENDIAN != 0];
5165 if (TARGET_32BIT)
5166 return (num_insns_constant_wide (low)
5167 + num_insns_constant_wide (high));
5168 else
5170 if ((high == 0 && low >= 0)
5171 || (high == -1 && low < 0))
5172 return num_insns_constant_wide (low);
5174 else if (mask64_operand (op, mode))
5175 return 2;
5177 else if (low == 0)
5178 return num_insns_constant_wide (high) + 1;
5180 else
5181 return (num_insns_constant_wide (high)
5182 + num_insns_constant_wide (low) + 1);
5185 default:
5186 gcc_unreachable ();
5190 /* Interpret element ELT of the CONST_VECTOR OP as an integer value.
5191 If the mode of OP is MODE_VECTOR_INT, this simply returns the
5192 corresponding element of the vector, but for V4SFmode and V2SFmode,
5193 the corresponding "float" is interpreted as an SImode integer. */
5195 HOST_WIDE_INT
5196 const_vector_elt_as_int (rtx op, unsigned int elt)
5198 rtx tmp;
5200 /* We can't handle V2DImode and V2DFmode vector constants here yet. */
5201 gcc_assert (GET_MODE (op) != V2DImode
5202 && GET_MODE (op) != V2DFmode);
5204 tmp = CONST_VECTOR_ELT (op, elt);
5205 if (GET_MODE (op) == V4SFmode
5206 || GET_MODE (op) == V2SFmode)
5207 tmp = gen_lowpart (SImode, tmp);
5208 return INTVAL (tmp);
5211 /* Return true if OP can be synthesized with a particular vspltisb, vspltish
5212 or vspltisw instruction. OP is a CONST_VECTOR. Which instruction is used
5213 depends on STEP and COPIES, one of which will be 1. If COPIES > 1,
5214 all items are set to the same value and contain COPIES replicas of the
5215 vsplt's operand; if STEP > 1, one in STEP elements is set to the vsplt's
5216 operand and the others are set to the value of the operand's msb. */
5218 static bool
5219 vspltis_constant (rtx op, unsigned step, unsigned copies)
5221 machine_mode mode = GET_MODE (op);
5222 machine_mode inner = GET_MODE_INNER (mode);
5224 unsigned i;
5225 unsigned nunits;
5226 unsigned bitsize;
5227 unsigned mask;
5229 HOST_WIDE_INT val;
5230 HOST_WIDE_INT splat_val;
5231 HOST_WIDE_INT msb_val;
5233 if (mode == V2DImode || mode == V2DFmode || mode == V1TImode)
5234 return false;
5236 nunits = GET_MODE_NUNITS (mode);
5237 bitsize = GET_MODE_BITSIZE (inner);
5238 mask = GET_MODE_MASK (inner);
5240 val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5241 splat_val = val;
5242 msb_val = val >= 0 ? 0 : -1;
5244 /* Construct the value to be splatted, if possible. If not, return 0. */
5245 for (i = 2; i <= copies; i *= 2)
5247 HOST_WIDE_INT small_val;
5248 bitsize /= 2;
5249 small_val = splat_val >> bitsize;
5250 mask >>= bitsize;
5251 if (splat_val != ((small_val << bitsize) | (small_val & mask)))
5252 return false;
5253 splat_val = small_val;
5256 /* Check if SPLAT_VAL can really be the operand of a vspltis[bhw]. */
5257 if (EASY_VECTOR_15 (splat_val))
5260 /* Also check if we can splat, and then add the result to itself. Do so if
5261 the value is positive, of if the splat instruction is using OP's mode;
5262 for splat_val < 0, the splat and the add should use the same mode. */
5263 else if (EASY_VECTOR_15_ADD_SELF (splat_val)
5264 && (splat_val >= 0 || (step == 1 && copies == 1)))
5267 /* Also check if are loading up the most significant bit which can be done by
5268 loading up -1 and shifting the value left by -1. */
5269 else if (EASY_VECTOR_MSB (splat_val, inner))
5272 else
5273 return false;
5275 /* Check if VAL is present in every STEP-th element, and the
5276 other elements are filled with its most significant bit. */
5277 for (i = 1; i < nunits; ++i)
5279 HOST_WIDE_INT desired_val;
5280 unsigned elt = BYTES_BIG_ENDIAN ? nunits - 1 - i : i;
5281 if ((i & (step - 1)) == 0)
5282 desired_val = val;
5283 else
5284 desired_val = msb_val;
5286 if (desired_val != const_vector_elt_as_int (op, elt))
5287 return false;
5290 return true;
5294 /* Return true if OP is of the given MODE and can be synthesized
5295 with a vspltisb, vspltish or vspltisw. */
5297 bool
5298 easy_altivec_constant (rtx op, machine_mode mode)
5300 unsigned step, copies;
5302 if (mode == VOIDmode)
5303 mode = GET_MODE (op);
5304 else if (mode != GET_MODE (op))
5305 return false;
5307 /* V2DI/V2DF was added with VSX. Only allow 0 and all 1's as easy
5308 constants. */
5309 if (mode == V2DFmode)
5310 return zero_constant (op, mode);
5312 else if (mode == V2DImode)
5314 if (GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
5315 || GET_CODE (CONST_VECTOR_ELT (op, 1)) != CONST_INT)
5316 return false;
5318 if (zero_constant (op, mode))
5319 return true;
5321 if (INTVAL (CONST_VECTOR_ELT (op, 0)) == -1
5322 && INTVAL (CONST_VECTOR_ELT (op, 1)) == -1)
5323 return true;
5325 return false;
5328 /* V1TImode is a special container for TImode. Ignore for now. */
5329 else if (mode == V1TImode)
5330 return false;
5332 /* Start with a vspltisw. */
5333 step = GET_MODE_NUNITS (mode) / 4;
5334 copies = 1;
5336 if (vspltis_constant (op, step, copies))
5337 return true;
5339 /* Then try with a vspltish. */
5340 if (step == 1)
5341 copies <<= 1;
5342 else
5343 step >>= 1;
5345 if (vspltis_constant (op, step, copies))
5346 return true;
5348 /* And finally a vspltisb. */
5349 if (step == 1)
5350 copies <<= 1;
5351 else
5352 step >>= 1;
5354 if (vspltis_constant (op, step, copies))
5355 return true;
5357 return false;
5360 /* Generate a VEC_DUPLICATE representing a vspltis[bhw] instruction whose
5361 result is OP. Abort if it is not possible. */
5364 gen_easy_altivec_constant (rtx op)
5366 machine_mode mode = GET_MODE (op);
5367 int nunits = GET_MODE_NUNITS (mode);
5368 rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
5369 unsigned step = nunits / 4;
5370 unsigned copies = 1;
5372 /* Start with a vspltisw. */
5373 if (vspltis_constant (op, step, copies))
5374 return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
5376 /* Then try with a vspltish. */
5377 if (step == 1)
5378 copies <<= 1;
5379 else
5380 step >>= 1;
5382 if (vspltis_constant (op, step, copies))
5383 return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
5385 /* And finally a vspltisb. */
5386 if (step == 1)
5387 copies <<= 1;
5388 else
5389 step >>= 1;
5391 if (vspltis_constant (op, step, copies))
5392 return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
5394 gcc_unreachable ();
5397 const char *
5398 output_vec_const_move (rtx *operands)
5400 int cst, cst2;
5401 machine_mode mode;
5402 rtx dest, vec;
5404 dest = operands[0];
5405 vec = operands[1];
5406 mode = GET_MODE (dest);
5408 if (TARGET_VSX)
5410 if (zero_constant (vec, mode))
5411 return "xxlxor %x0,%x0,%x0";
5413 if ((mode == V2DImode || mode == V1TImode)
5414 && INTVAL (CONST_VECTOR_ELT (vec, 0)) == -1
5415 && INTVAL (CONST_VECTOR_ELT (vec, 1)) == -1)
5416 return "vspltisw %0,-1";
5419 if (TARGET_ALTIVEC)
5421 rtx splat_vec;
5422 if (zero_constant (vec, mode))
5423 return "vxor %0,%0,%0";
5425 splat_vec = gen_easy_altivec_constant (vec);
5426 gcc_assert (GET_CODE (splat_vec) == VEC_DUPLICATE);
5427 operands[1] = XEXP (splat_vec, 0);
5428 if (!EASY_VECTOR_15 (INTVAL (operands[1])))
5429 return "#";
5431 switch (GET_MODE (splat_vec))
5433 case V4SImode:
5434 return "vspltisw %0,%1";
5436 case V8HImode:
5437 return "vspltish %0,%1";
5439 case V16QImode:
5440 return "vspltisb %0,%1";
5442 default:
5443 gcc_unreachable ();
5447 gcc_assert (TARGET_SPE);
5449 /* Vector constant 0 is handled as a splitter of V2SI, and in the
5450 pattern of V1DI, V4HI, and V2SF.
5452 FIXME: We should probably return # and add post reload
5453 splitters for these, but this way is so easy ;-). */
5454 cst = INTVAL (CONST_VECTOR_ELT (vec, 0));
5455 cst2 = INTVAL (CONST_VECTOR_ELT (vec, 1));
5456 operands[1] = CONST_VECTOR_ELT (vec, 0);
5457 operands[2] = CONST_VECTOR_ELT (vec, 1);
5458 if (cst == cst2)
5459 return "li %0,%1\n\tevmergelo %0,%0,%0";
5460 else if (WORDS_BIG_ENDIAN)
5461 return "li %0,%1\n\tevmergelo %0,%0,%0\n\tli %0,%2";
5462 else
5463 return "li %0,%2\n\tevmergelo %0,%0,%0\n\tli %0,%1";
5466 /* Initialize TARGET of vector PAIRED to VALS. */
5468 void
5469 paired_expand_vector_init (rtx target, rtx vals)
5471 machine_mode mode = GET_MODE (target);
5472 int n_elts = GET_MODE_NUNITS (mode);
5473 int n_var = 0;
5474 rtx x, new_rtx, tmp, constant_op, op1, op2;
5475 int i;
5477 for (i = 0; i < n_elts; ++i)
5479 x = XVECEXP (vals, 0, i);
5480 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5481 ++n_var;
5483 if (n_var == 0)
5485 /* Load from constant pool. */
5486 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5487 return;
5490 if (n_var == 2)
5492 /* The vector is initialized only with non-constants. */
5493 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, XVECEXP (vals, 0, 0),
5494 XVECEXP (vals, 0, 1));
5496 emit_move_insn (target, new_rtx);
5497 return;
5500 /* One field is non-constant and the other one is a constant. Load the
5501 constant from the constant pool and use ps_merge instruction to
5502 construct the whole vector. */
5503 op1 = XVECEXP (vals, 0, 0);
5504 op2 = XVECEXP (vals, 0, 1);
5506 constant_op = (CONSTANT_P (op1)) ? op1 : op2;
5508 tmp = gen_reg_rtx (GET_MODE (constant_op));
5509 emit_move_insn (tmp, constant_op);
5511 if (CONSTANT_P (op1))
5512 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, tmp, op2);
5513 else
5514 new_rtx = gen_rtx_VEC_CONCAT (V2SFmode, op1, tmp);
5516 emit_move_insn (target, new_rtx);
5519 void
5520 paired_expand_vector_move (rtx operands[])
5522 rtx op0 = operands[0], op1 = operands[1];
5524 emit_move_insn (op0, op1);
5527 /* Emit vector compare for code RCODE. DEST is destination, OP1 and
5528 OP2 are two VEC_COND_EXPR operands, CC_OP0 and CC_OP1 are the two
5529 operands for the relation operation COND. This is a recursive
5530 function. */
5532 static void
5533 paired_emit_vector_compare (enum rtx_code rcode,
5534 rtx dest, rtx op0, rtx op1,
5535 rtx cc_op0, rtx cc_op1)
5537 rtx tmp = gen_reg_rtx (V2SFmode);
5538 rtx tmp1, max, min;
5540 gcc_assert (TARGET_PAIRED_FLOAT);
5541 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5543 switch (rcode)
5545 case LT:
5546 case LTU:
5547 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5548 return;
5549 case GE:
5550 case GEU:
5551 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5552 emit_insn (gen_selv2sf4 (dest, tmp, op0, op1, CONST0_RTX (SFmode)));
5553 return;
5554 case LE:
5555 case LEU:
5556 paired_emit_vector_compare (GE, dest, op0, op1, cc_op1, cc_op0);
5557 return;
5558 case GT:
5559 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5560 return;
5561 case EQ:
5562 tmp1 = gen_reg_rtx (V2SFmode);
5563 max = gen_reg_rtx (V2SFmode);
5564 min = gen_reg_rtx (V2SFmode);
5565 gen_reg_rtx (V2SFmode);
5567 emit_insn (gen_subv2sf3 (tmp, cc_op0, cc_op1));
5568 emit_insn (gen_selv2sf4
5569 (max, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5570 emit_insn (gen_subv2sf3 (tmp, cc_op1, cc_op0));
5571 emit_insn (gen_selv2sf4
5572 (min, tmp, cc_op0, cc_op1, CONST0_RTX (SFmode)));
5573 emit_insn (gen_subv2sf3 (tmp1, min, max));
5574 emit_insn (gen_selv2sf4 (dest, tmp1, op0, op1, CONST0_RTX (SFmode)));
5575 return;
5576 case NE:
5577 paired_emit_vector_compare (EQ, dest, op1, op0, cc_op0, cc_op1);
5578 return;
5579 case UNLE:
5580 paired_emit_vector_compare (LE, dest, op1, op0, cc_op0, cc_op1);
5581 return;
5582 case UNLT:
5583 paired_emit_vector_compare (LT, dest, op1, op0, cc_op0, cc_op1);
5584 return;
5585 case UNGE:
5586 paired_emit_vector_compare (GE, dest, op1, op0, cc_op0, cc_op1);
5587 return;
5588 case UNGT:
5589 paired_emit_vector_compare (GT, dest, op1, op0, cc_op0, cc_op1);
5590 return;
5591 default:
5592 gcc_unreachable ();
5595 return;
5598 /* Emit vector conditional expression.
5599 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5600 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5603 paired_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5604 rtx cond, rtx cc_op0, rtx cc_op1)
5606 enum rtx_code rcode = GET_CODE (cond);
5608 if (!TARGET_PAIRED_FLOAT)
5609 return 0;
5611 paired_emit_vector_compare (rcode, dest, op1, op2, cc_op0, cc_op1);
5613 return 1;
5616 /* Initialize vector TARGET to VALS. */
5618 void
5619 rs6000_expand_vector_init (rtx target, rtx vals)
5621 machine_mode mode = GET_MODE (target);
5622 machine_mode inner_mode = GET_MODE_INNER (mode);
5623 int n_elts = GET_MODE_NUNITS (mode);
5624 int n_var = 0, one_var = -1;
5625 bool all_same = true, all_const_zero = true;
5626 rtx x, mem;
5627 int i;
5629 for (i = 0; i < n_elts; ++i)
5631 x = XVECEXP (vals, 0, i);
5632 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
5633 ++n_var, one_var = i;
5634 else if (x != CONST0_RTX (inner_mode))
5635 all_const_zero = false;
5637 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
5638 all_same = false;
5641 if (n_var == 0)
5643 rtx const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
5644 bool int_vector_p = (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
5645 if ((int_vector_p || TARGET_VSX) && all_const_zero)
5647 /* Zero register. */
5648 emit_insn (gen_rtx_SET (VOIDmode, target,
5649 gen_rtx_XOR (mode, target, target)));
5650 return;
5652 else if (int_vector_p && easy_vector_constant (const_vec, mode))
5654 /* Splat immediate. */
5655 emit_insn (gen_rtx_SET (VOIDmode, target, const_vec));
5656 return;
5658 else
5660 /* Load from constant pool. */
5661 emit_move_insn (target, const_vec);
5662 return;
5666 /* Double word values on VSX can use xxpermdi or lxvdsx. */
5667 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5669 rtx op0 = XVECEXP (vals, 0, 0);
5670 rtx op1 = XVECEXP (vals, 0, 1);
5671 if (all_same)
5673 if (!MEM_P (op0) && !REG_P (op0))
5674 op0 = force_reg (inner_mode, op0);
5675 if (mode == V2DFmode)
5676 emit_insn (gen_vsx_splat_v2df (target, op0));
5677 else
5678 emit_insn (gen_vsx_splat_v2di (target, op0));
5680 else
5682 op0 = force_reg (inner_mode, op0);
5683 op1 = force_reg (inner_mode, op1);
5684 if (mode == V2DFmode)
5685 emit_insn (gen_vsx_concat_v2df (target, op0, op1));
5686 else
5687 emit_insn (gen_vsx_concat_v2di (target, op0, op1));
5689 return;
5692 /* With single precision floating point on VSX, know that internally single
5693 precision is actually represented as a double, and either make 2 V2DF
5694 vectors, and convert these vectors to single precision, or do one
5695 conversion, and splat the result to the other elements. */
5696 if (mode == V4SFmode && VECTOR_MEM_VSX_P (mode))
5698 if (all_same)
5700 rtx freg = gen_reg_rtx (V4SFmode);
5701 rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
5702 rtx cvt = ((TARGET_XSCVDPSPN)
5703 ? gen_vsx_xscvdpspn_scalar (freg, sreg)
5704 : gen_vsx_xscvdpsp_scalar (freg, sreg));
5706 emit_insn (cvt);
5707 emit_insn (gen_vsx_xxspltw_v4sf_direct (target, freg, const0_rtx));
5709 else
5711 rtx dbl_even = gen_reg_rtx (V2DFmode);
5712 rtx dbl_odd = gen_reg_rtx (V2DFmode);
5713 rtx flt_even = gen_reg_rtx (V4SFmode);
5714 rtx flt_odd = gen_reg_rtx (V4SFmode);
5715 rtx op0 = force_reg (SFmode, XVECEXP (vals, 0, 0));
5716 rtx op1 = force_reg (SFmode, XVECEXP (vals, 0, 1));
5717 rtx op2 = force_reg (SFmode, XVECEXP (vals, 0, 2));
5718 rtx op3 = force_reg (SFmode, XVECEXP (vals, 0, 3));
5720 emit_insn (gen_vsx_concat_v2sf (dbl_even, op0, op1));
5721 emit_insn (gen_vsx_concat_v2sf (dbl_odd, op2, op3));
5722 emit_insn (gen_vsx_xvcvdpsp (flt_even, dbl_even));
5723 emit_insn (gen_vsx_xvcvdpsp (flt_odd, dbl_odd));
5724 rs6000_expand_extract_even (target, flt_even, flt_odd);
5726 return;
5729 /* Store value to stack temp. Load vector element. Splat. However, splat
5730 of 64-bit items is not supported on Altivec. */
5731 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
5733 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5734 emit_move_insn (adjust_address_nv (mem, inner_mode, 0),
5735 XVECEXP (vals, 0, 0));
5736 x = gen_rtx_UNSPEC (VOIDmode,
5737 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5738 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5739 gen_rtvec (2,
5740 gen_rtx_SET (VOIDmode,
5741 target, mem),
5742 x)));
5743 x = gen_rtx_VEC_SELECT (inner_mode, target,
5744 gen_rtx_PARALLEL (VOIDmode,
5745 gen_rtvec (1, const0_rtx)));
5746 emit_insn (gen_rtx_SET (VOIDmode, target,
5747 gen_rtx_VEC_DUPLICATE (mode, x)));
5748 return;
5751 /* One field is non-constant. Load constant then overwrite
5752 varying field. */
5753 if (n_var == 1)
5755 rtx copy = copy_rtx (vals);
5757 /* Load constant part of vector, substitute neighboring value for
5758 varying element. */
5759 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
5760 rs6000_expand_vector_init (target, copy);
5762 /* Insert variable. */
5763 rs6000_expand_vector_set (target, XVECEXP (vals, 0, one_var), one_var);
5764 return;
5767 /* Construct the vector in memory one field at a time
5768 and load the whole vector. */
5769 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5770 for (i = 0; i < n_elts; i++)
5771 emit_move_insn (adjust_address_nv (mem, inner_mode,
5772 i * GET_MODE_SIZE (inner_mode)),
5773 XVECEXP (vals, 0, i));
5774 emit_move_insn (target, mem);
5777 /* Set field ELT of TARGET to VAL. */
5779 void
5780 rs6000_expand_vector_set (rtx target, rtx val, int elt)
5782 machine_mode mode = GET_MODE (target);
5783 machine_mode inner_mode = GET_MODE_INNER (mode);
5784 rtx reg = gen_reg_rtx (mode);
5785 rtx mask, mem, x;
5786 int width = GET_MODE_SIZE (inner_mode);
5787 int i;
5789 if (VECTOR_MEM_VSX_P (mode) && (mode == V2DFmode || mode == V2DImode))
5791 rtx (*set_func) (rtx, rtx, rtx, rtx)
5792 = ((mode == V2DFmode) ? gen_vsx_set_v2df : gen_vsx_set_v2di);
5793 emit_insn (set_func (target, target, val, GEN_INT (elt)));
5794 return;
5797 /* Simplify setting single element vectors like V1TImode. */
5798 if (GET_MODE_SIZE (mode) == GET_MODE_SIZE (inner_mode) && elt == 0)
5800 emit_move_insn (target, gen_lowpart (mode, val));
5801 return;
5804 /* Load single variable value. */
5805 mem = assign_stack_temp (mode, GET_MODE_SIZE (inner_mode));
5806 emit_move_insn (adjust_address_nv (mem, inner_mode, 0), val);
5807 x = gen_rtx_UNSPEC (VOIDmode,
5808 gen_rtvec (1, const0_rtx), UNSPEC_LVE);
5809 emit_insn (gen_rtx_PARALLEL (VOIDmode,
5810 gen_rtvec (2,
5811 gen_rtx_SET (VOIDmode,
5812 reg, mem),
5813 x)));
5815 /* Linear sequence. */
5816 mask = gen_rtx_PARALLEL (V16QImode, rtvec_alloc (16));
5817 for (i = 0; i < 16; ++i)
5818 XVECEXP (mask, 0, i) = GEN_INT (i);
5820 /* Set permute mask to insert element into target. */
5821 for (i = 0; i < width; ++i)
5822 XVECEXP (mask, 0, elt*width + i)
5823 = GEN_INT (i + 0x10);
5824 x = gen_rtx_CONST_VECTOR (V16QImode, XVEC (mask, 0));
5826 if (BYTES_BIG_ENDIAN)
5827 x = gen_rtx_UNSPEC (mode,
5828 gen_rtvec (3, target, reg,
5829 force_reg (V16QImode, x)),
5830 UNSPEC_VPERM);
5831 else
5833 /* Invert selector. We prefer to generate VNAND on P8 so
5834 that future fusion opportunities can kick in, but must
5835 generate VNOR elsewhere. */
5836 rtx notx = gen_rtx_NOT (V16QImode, force_reg (V16QImode, x));
5837 rtx iorx = (TARGET_P8_VECTOR
5838 ? gen_rtx_IOR (V16QImode, notx, notx)
5839 : gen_rtx_AND (V16QImode, notx, notx));
5840 rtx tmp = gen_reg_rtx (V16QImode);
5841 emit_insn (gen_rtx_SET (VOIDmode, tmp, iorx));
5843 /* Permute with operands reversed and adjusted selector. */
5844 x = gen_rtx_UNSPEC (mode, gen_rtvec (3, reg, target, tmp),
5845 UNSPEC_VPERM);
5848 emit_insn (gen_rtx_SET (VOIDmode, target, x));
5851 /* Extract field ELT from VEC into TARGET. */
5853 void
5854 rs6000_expand_vector_extract (rtx target, rtx vec, int elt)
5856 machine_mode mode = GET_MODE (vec);
5857 machine_mode inner_mode = GET_MODE_INNER (mode);
5858 rtx mem;
5860 if (VECTOR_MEM_VSX_P (mode))
5862 switch (mode)
5864 default:
5865 break;
5866 case V1TImode:
5867 gcc_assert (elt == 0 && inner_mode == TImode);
5868 emit_move_insn (target, gen_lowpart (TImode, vec));
5869 break;
5870 case V2DFmode:
5871 emit_insn (gen_vsx_extract_v2df (target, vec, GEN_INT (elt)));
5872 return;
5873 case V2DImode:
5874 emit_insn (gen_vsx_extract_v2di (target, vec, GEN_INT (elt)));
5875 return;
5876 case V4SFmode:
5877 emit_insn (gen_vsx_extract_v4sf (target, vec, GEN_INT (elt)));
5878 return;
5882 /* Allocate mode-sized buffer. */
5883 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
5885 emit_move_insn (mem, vec);
5887 /* Add offset to field within buffer matching vector element. */
5888 mem = adjust_address_nv (mem, inner_mode, elt * GET_MODE_SIZE (inner_mode));
5890 emit_move_insn (target, adjust_address_nv (mem, inner_mode, 0));
5893 /* Generates shifts and masks for a pair of rldicl or rldicr insns to
5894 implement ANDing by the mask IN. */
5895 void
5896 build_mask64_2_operands (rtx in, rtx *out)
5898 unsigned HOST_WIDE_INT c, lsb, m1, m2;
5899 int shift;
5901 gcc_assert (GET_CODE (in) == CONST_INT);
5903 c = INTVAL (in);
5904 if (c & 1)
5906 /* Assume c initially something like 0x00fff000000fffff. The idea
5907 is to rotate the word so that the middle ^^^^^^ group of zeros
5908 is at the MS end and can be cleared with an rldicl mask. We then
5909 rotate back and clear off the MS ^^ group of zeros with a
5910 second rldicl. */
5911 c = ~c; /* c == 0xff000ffffff00000 */
5912 lsb = c & -c; /* lsb == 0x0000000000100000 */
5913 m1 = -lsb; /* m1 == 0xfffffffffff00000 */
5914 c = ~c; /* c == 0x00fff000000fffff */
5915 c &= -lsb; /* c == 0x00fff00000000000 */
5916 lsb = c & -c; /* lsb == 0x0000100000000000 */
5917 c = ~c; /* c == 0xff000fffffffffff */
5918 c &= -lsb; /* c == 0xff00000000000000 */
5919 shift = 0;
5920 while ((lsb >>= 1) != 0)
5921 shift++; /* shift == 44 on exit from loop */
5922 m1 <<= 64 - shift; /* m1 == 0xffffff0000000000 */
5923 m1 = ~m1; /* m1 == 0x000000ffffffffff */
5924 m2 = ~c; /* m2 == 0x00ffffffffffffff */
5926 else
5928 /* Assume c initially something like 0xff000f0000000000. The idea
5929 is to rotate the word so that the ^^^ middle group of zeros
5930 is at the LS end and can be cleared with an rldicr mask. We then
5931 rotate back and clear off the LS group of ^^^^^^^^^^ zeros with
5932 a second rldicr. */
5933 lsb = c & -c; /* lsb == 0x0000010000000000 */
5934 m2 = -lsb; /* m2 == 0xffffff0000000000 */
5935 c = ~c; /* c == 0x00fff0ffffffffff */
5936 c &= -lsb; /* c == 0x00fff00000000000 */
5937 lsb = c & -c; /* lsb == 0x0000100000000000 */
5938 c = ~c; /* c == 0xff000fffffffffff */
5939 c &= -lsb; /* c == 0xff00000000000000 */
5940 shift = 0;
5941 while ((lsb >>= 1) != 0)
5942 shift++; /* shift == 44 on exit from loop */
5943 m1 = ~c; /* m1 == 0x00ffffffffffffff */
5944 m1 >>= shift; /* m1 == 0x0000000000000fff */
5945 m1 = ~m1; /* m1 == 0xfffffffffffff000 */
5948 /* Note that when we only have two 0->1 and 1->0 transitions, one of the
5949 masks will be all 1's. We are guaranteed more than one transition. */
5950 out[0] = GEN_INT (64 - shift);
5951 out[1] = GEN_INT (m1);
5952 out[2] = GEN_INT (shift);
5953 out[3] = GEN_INT (m2);
5956 /* Return TRUE if OP is an invalid SUBREG operation on the e500. */
5958 bool
5959 invalid_e500_subreg (rtx op, machine_mode mode)
5961 if (TARGET_E500_DOUBLE)
5963 /* Reject (subreg:SI (reg:DF)); likewise with subreg:DI or
5964 subreg:TI and reg:TF. Decimal float modes are like integer
5965 modes (only low part of each register used) for this
5966 purpose. */
5967 if (GET_CODE (op) == SUBREG
5968 && (mode == SImode || mode == DImode || mode == TImode
5969 || mode == DDmode || mode == TDmode || mode == PTImode)
5970 && REG_P (SUBREG_REG (op))
5971 && (GET_MODE (SUBREG_REG (op)) == DFmode
5972 || GET_MODE (SUBREG_REG (op)) == TFmode))
5973 return true;
5975 /* Reject (subreg:DF (reg:DI)); likewise with subreg:TF and
5976 reg:TI. */
5977 if (GET_CODE (op) == SUBREG
5978 && (mode == DFmode || mode == TFmode)
5979 && REG_P (SUBREG_REG (op))
5980 && (GET_MODE (SUBREG_REG (op)) == DImode
5981 || GET_MODE (SUBREG_REG (op)) == TImode
5982 || GET_MODE (SUBREG_REG (op)) == PTImode
5983 || GET_MODE (SUBREG_REG (op)) == DDmode
5984 || GET_MODE (SUBREG_REG (op)) == TDmode))
5985 return true;
5988 if (TARGET_SPE
5989 && GET_CODE (op) == SUBREG
5990 && mode == SImode
5991 && REG_P (SUBREG_REG (op))
5992 && SPE_VECTOR_MODE (GET_MODE (SUBREG_REG (op))))
5993 return true;
5995 return false;
5998 /* Return alignment of TYPE. Existing alignment is ALIGN. HOW
5999 selects whether the alignment is abi mandated, optional, or
6000 both abi and optional alignment. */
6002 unsigned int
6003 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
6005 if (how != align_opt)
6007 if (TREE_CODE (type) == VECTOR_TYPE)
6009 if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
6010 || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
6012 if (align < 64)
6013 align = 64;
6015 else if (align < 128)
6016 align = 128;
6018 else if (TARGET_E500_DOUBLE
6019 && TREE_CODE (type) == REAL_TYPE
6020 && TYPE_MODE (type) == DFmode)
6022 if (align < 64)
6023 align = 64;
6027 if (how != align_abi)
6029 if (TREE_CODE (type) == ARRAY_TYPE
6030 && TYPE_MODE (TREE_TYPE (type)) == QImode)
6032 if (align < BITS_PER_WORD)
6033 align = BITS_PER_WORD;
6037 return align;
6040 /* Previous GCC releases forced all vector types to have 16-byte alignment. */
6042 bool
6043 rs6000_special_adjust_field_align_p (tree field, unsigned int computed)
6045 if (TARGET_ALTIVEC && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
6047 if (computed != 128)
6049 static bool warned;
6050 if (!warned && warn_psabi)
6052 warned = true;
6053 inform (input_location,
6054 "the layout of aggregates containing vectors with"
6055 " %d-byte alignment has changed in GCC 5",
6056 computed / BITS_PER_UNIT);
6059 /* In current GCC there is no special case. */
6060 return false;
6063 return false;
6066 /* AIX increases natural record alignment to doubleword if the first
6067 field is an FP double while the FP fields remain word aligned. */
6069 unsigned int
6070 rs6000_special_round_type_align (tree type, unsigned int computed,
6071 unsigned int specified)
6073 unsigned int align = MAX (computed, specified);
6074 tree field = TYPE_FIELDS (type);
6076 /* Skip all non field decls */
6077 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6078 field = DECL_CHAIN (field);
6080 if (field != NULL && field != type)
6082 type = TREE_TYPE (field);
6083 while (TREE_CODE (type) == ARRAY_TYPE)
6084 type = TREE_TYPE (type);
6086 if (type != error_mark_node && TYPE_MODE (type) == DFmode)
6087 align = MAX (align, 64);
6090 return align;
6093 /* Darwin increases record alignment to the natural alignment of
6094 the first field. */
6096 unsigned int
6097 darwin_rs6000_special_round_type_align (tree type, unsigned int computed,
6098 unsigned int specified)
6100 unsigned int align = MAX (computed, specified);
6102 if (TYPE_PACKED (type))
6103 return align;
6105 /* Find the first field, looking down into aggregates. */
6106 do {
6107 tree field = TYPE_FIELDS (type);
6108 /* Skip all non field decls */
6109 while (field != NULL && TREE_CODE (field) != FIELD_DECL)
6110 field = DECL_CHAIN (field);
6111 if (! field)
6112 break;
6113 /* A packed field does not contribute any extra alignment. */
6114 if (DECL_PACKED (field))
6115 return align;
6116 type = TREE_TYPE (field);
6117 while (TREE_CODE (type) == ARRAY_TYPE)
6118 type = TREE_TYPE (type);
6119 } while (AGGREGATE_TYPE_P (type));
6121 if (! AGGREGATE_TYPE_P (type) && type != error_mark_node)
6122 align = MAX (align, TYPE_ALIGN (type));
6124 return align;
6127 /* Return 1 for an operand in small memory on V.4/eabi. */
6130 small_data_operand (rtx op ATTRIBUTE_UNUSED,
6131 machine_mode mode ATTRIBUTE_UNUSED)
6133 #if TARGET_ELF
6134 rtx sym_ref;
6136 if (rs6000_sdata == SDATA_NONE || rs6000_sdata == SDATA_DATA)
6137 return 0;
6139 if (DEFAULT_ABI != ABI_V4)
6140 return 0;
6142 /* Vector and float memory instructions have a limited offset on the
6143 SPE, so using a vector or float variable directly as an operand is
6144 not useful. */
6145 if (TARGET_SPE
6146 && (SPE_VECTOR_MODE (mode) || FLOAT_MODE_P (mode)))
6147 return 0;
6149 if (GET_CODE (op) == SYMBOL_REF)
6150 sym_ref = op;
6152 else if (GET_CODE (op) != CONST
6153 || GET_CODE (XEXP (op, 0)) != PLUS
6154 || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF
6155 || GET_CODE (XEXP (XEXP (op, 0), 1)) != CONST_INT)
6156 return 0;
6158 else
6160 rtx sum = XEXP (op, 0);
6161 HOST_WIDE_INT summand;
6163 /* We have to be careful here, because it is the referenced address
6164 that must be 32k from _SDA_BASE_, not just the symbol. */
6165 summand = INTVAL (XEXP (sum, 1));
6166 if (summand < 0 || summand > g_switch_value)
6167 return 0;
6169 sym_ref = XEXP (sum, 0);
6172 return SYMBOL_REF_SMALL_P (sym_ref);
6173 #else
6174 return 0;
6175 #endif
6178 /* Return true if either operand is a general purpose register. */
6180 bool
6181 gpr_or_gpr_p (rtx op0, rtx op1)
6183 return ((REG_P (op0) && INT_REGNO_P (REGNO (op0)))
6184 || (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
6187 /* Return true if this is a move direct operation between GPR registers and
6188 floating point/VSX registers. */
6190 bool
6191 direct_move_p (rtx op0, rtx op1)
6193 int regno0, regno1;
6195 if (!REG_P (op0) || !REG_P (op1))
6196 return false;
6198 if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
6199 return false;
6201 regno0 = REGNO (op0);
6202 regno1 = REGNO (op1);
6203 if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
6204 return false;
6206 if (INT_REGNO_P (regno0))
6207 return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
6209 else if (INT_REGNO_P (regno1))
6211 if (TARGET_MFPGPR && FP_REGNO_P (regno0))
6212 return true;
6214 else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
6215 return true;
6218 return false;
6221 /* Return true if this is a load or store quad operation. This function does
6222 not handle the atomic quad memory instructions. */
6224 bool
6225 quad_load_store_p (rtx op0, rtx op1)
6227 bool ret;
6229 if (!TARGET_QUAD_MEMORY)
6230 ret = false;
6232 else if (REG_P (op0) && MEM_P (op1))
6233 ret = (quad_int_reg_operand (op0, GET_MODE (op0))
6234 && quad_memory_operand (op1, GET_MODE (op1))
6235 && !reg_overlap_mentioned_p (op0, op1));
6237 else if (MEM_P (op0) && REG_P (op1))
6238 ret = (quad_memory_operand (op0, GET_MODE (op0))
6239 && quad_int_reg_operand (op1, GET_MODE (op1)));
6241 else
6242 ret = false;
6244 if (TARGET_DEBUG_ADDR)
6246 fprintf (stderr, "\n========== quad_load_store, return %s\n",
6247 ret ? "true" : "false");
6248 debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
6251 return ret;
6254 /* Given an address, return a constant offset term if one exists. */
6256 static rtx
6257 address_offset (rtx op)
6259 if (GET_CODE (op) == PRE_INC
6260 || GET_CODE (op) == PRE_DEC)
6261 op = XEXP (op, 0);
6262 else if (GET_CODE (op) == PRE_MODIFY
6263 || GET_CODE (op) == LO_SUM)
6264 op = XEXP (op, 1);
6266 if (GET_CODE (op) == CONST)
6267 op = XEXP (op, 0);
6269 if (GET_CODE (op) == PLUS)
6270 op = XEXP (op, 1);
6272 if (CONST_INT_P (op))
6273 return op;
6275 return NULL_RTX;
6278 /* Return true if the MEM operand is a memory operand suitable for use
6279 with a (full width, possibly multiple) gpr load/store. On
6280 powerpc64 this means the offset must be divisible by 4.
6281 Implements 'Y' constraint.
6283 Accept direct, indexed, offset, lo_sum and tocref. Since this is
6284 a constraint function we know the operand has satisfied a suitable
6285 memory predicate. Also accept some odd rtl generated by reload
6286 (see rs6000_legitimize_reload_address for various forms). It is
6287 important that reload rtl be accepted by appropriate constraints
6288 but not by the operand predicate.
6290 Offsetting a lo_sum should not be allowed, except where we know by
6291 alignment that a 32k boundary is not crossed, but see the ???
6292 comment in rs6000_legitimize_reload_address. Note that by
6293 "offsetting" here we mean a further offset to access parts of the
6294 MEM. It's fine to have a lo_sum where the inner address is offset
6295 from a sym, since the same sym+offset will appear in the high part
6296 of the address calculation. */
6298 bool
6299 mem_operand_gpr (rtx op, machine_mode mode)
6301 unsigned HOST_WIDE_INT offset;
6302 int extra;
6303 rtx addr = XEXP (op, 0);
6305 op = address_offset (addr);
6306 if (op == NULL_RTX)
6307 return true;
6309 offset = INTVAL (op);
6310 if (TARGET_POWERPC64 && (offset & 3) != 0)
6311 return false;
6313 extra = GET_MODE_SIZE (mode) - UNITS_PER_WORD;
6314 if (extra < 0)
6315 extra = 0;
6317 if (GET_CODE (addr) == LO_SUM)
6318 /* For lo_sum addresses, we must allow any offset except one that
6319 causes a wrap, so test only the low 16 bits. */
6320 offset = ((offset & 0xffff) ^ 0x8000) - 0x8000;
6322 return offset + 0x8000 < 0x10000u - extra;
6325 /* Subroutines of rs6000_legitimize_address and rs6000_legitimate_address_p. */
6327 static bool
6328 reg_offset_addressing_ok_p (machine_mode mode)
6330 switch (mode)
6332 case V16QImode:
6333 case V8HImode:
6334 case V4SFmode:
6335 case V4SImode:
6336 case V2DFmode:
6337 case V2DImode:
6338 case V1TImode:
6339 case TImode:
6340 /* AltiVec/VSX vector modes. Only reg+reg addressing is valid. While
6341 TImode is not a vector mode, if we want to use the VSX registers to
6342 move it around, we need to restrict ourselves to reg+reg
6343 addressing. */
6344 if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
6345 return false;
6346 break;
6348 case V4HImode:
6349 case V2SImode:
6350 case V1DImode:
6351 case V2SFmode:
6352 /* Paired vector modes. Only reg+reg addressing is valid. */
6353 if (TARGET_PAIRED_FLOAT)
6354 return false;
6355 break;
6357 case SDmode:
6358 /* If we can do direct load/stores of SDmode, restrict it to reg+reg
6359 addressing for the LFIWZX and STFIWX instructions. */
6360 if (TARGET_NO_SDMODE_STACK)
6361 return false;
6362 break;
6364 default:
6365 break;
6368 return true;
6371 static bool
6372 virtual_stack_registers_memory_p (rtx op)
6374 int regnum;
6376 if (GET_CODE (op) == REG)
6377 regnum = REGNO (op);
6379 else if (GET_CODE (op) == PLUS
6380 && GET_CODE (XEXP (op, 0)) == REG
6381 && GET_CODE (XEXP (op, 1)) == CONST_INT)
6382 regnum = REGNO (XEXP (op, 0));
6384 else
6385 return false;
6387 return (regnum >= FIRST_VIRTUAL_REGISTER
6388 && regnum <= LAST_VIRTUAL_POINTER_REGISTER);
6391 /* Return true if a MODE sized memory accesses to OP plus OFFSET
6392 is known to not straddle a 32k boundary. */
6394 static bool
6395 offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
6396 machine_mode mode)
6398 tree decl, type;
6399 unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
6401 if (GET_CODE (op) != SYMBOL_REF)
6402 return false;
6404 dsize = GET_MODE_SIZE (mode);
6405 decl = SYMBOL_REF_DECL (op);
6406 if (!decl)
6408 if (dsize == 0)
6409 return false;
6411 /* -fsection-anchors loses the original SYMBOL_REF_DECL when
6412 replacing memory addresses with an anchor plus offset. We
6413 could find the decl by rummaging around in the block->objects
6414 VEC for the given offset but that seems like too much work. */
6415 dalign = BITS_PER_UNIT;
6416 if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
6417 && SYMBOL_REF_ANCHOR_P (op)
6418 && SYMBOL_REF_BLOCK (op) != NULL)
6420 struct object_block *block = SYMBOL_REF_BLOCK (op);
6422 dalign = block->alignment;
6423 offset += SYMBOL_REF_BLOCK_OFFSET (op);
6425 else if (CONSTANT_POOL_ADDRESS_P (op))
6427 /* It would be nice to have get_pool_align().. */
6428 machine_mode cmode = get_pool_mode (op);
6430 dalign = GET_MODE_ALIGNMENT (cmode);
6433 else if (DECL_P (decl))
6435 dalign = DECL_ALIGN (decl);
6437 if (dsize == 0)
6439 /* Allow BLKmode when the entire object is known to not
6440 cross a 32k boundary. */
6441 if (!DECL_SIZE_UNIT (decl))
6442 return false;
6444 if (!tree_fits_uhwi_p (DECL_SIZE_UNIT (decl)))
6445 return false;
6447 dsize = tree_to_uhwi (DECL_SIZE_UNIT (decl));
6448 if (dsize > 32768)
6449 return false;
6451 return dalign / BITS_PER_UNIT >= dsize;
6454 else
6456 type = TREE_TYPE (decl);
6458 dalign = TYPE_ALIGN (type);
6459 if (CONSTANT_CLASS_P (decl))
6460 dalign = CONSTANT_ALIGNMENT (decl, dalign);
6461 else
6462 dalign = DATA_ALIGNMENT (decl, dalign);
6464 if (dsize == 0)
6466 /* BLKmode, check the entire object. */
6467 if (TREE_CODE (decl) == STRING_CST)
6468 dsize = TREE_STRING_LENGTH (decl);
6469 else if (TYPE_SIZE_UNIT (type)
6470 && tree_fits_uhwi_p (TYPE_SIZE_UNIT (type)))
6471 dsize = tree_to_uhwi (TYPE_SIZE_UNIT (type));
6472 else
6473 return false;
6474 if (dsize > 32768)
6475 return false;
6477 return dalign / BITS_PER_UNIT >= dsize;
6481 /* Find how many bits of the alignment we know for this access. */
6482 mask = dalign / BITS_PER_UNIT - 1;
6483 lsb = offset & -offset;
6484 mask &= lsb - 1;
6485 dalign = mask + 1;
6487 return dalign >= dsize;
6490 static bool
6491 constant_pool_expr_p (rtx op)
6493 rtx base, offset;
6495 split_const (op, &base, &offset);
6496 return (GET_CODE (base) == SYMBOL_REF
6497 && CONSTANT_POOL_ADDRESS_P (base)
6498 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (base), Pmode));
6501 static const_rtx tocrel_base, tocrel_offset;
6503 /* Return true if OP is a toc pointer relative address (the output
6504 of create_TOC_reference). If STRICT, do not match high part or
6505 non-split -mcmodel=large/medium toc pointer relative addresses. */
6507 bool
6508 toc_relative_expr_p (const_rtx op, bool strict)
6510 if (!TARGET_TOC)
6511 return false;
6513 if (TARGET_CMODEL != CMODEL_SMALL)
6515 /* Only match the low part. */
6516 if (GET_CODE (op) == LO_SUM
6517 && REG_P (XEXP (op, 0))
6518 && INT_REG_OK_FOR_BASE_P (XEXP (op, 0), strict))
6519 op = XEXP (op, 1);
6520 else if (strict)
6521 return false;
6524 tocrel_base = op;
6525 tocrel_offset = const0_rtx;
6526 if (GET_CODE (op) == PLUS && add_cint_operand (XEXP (op, 1), GET_MODE (op)))
6528 tocrel_base = XEXP (op, 0);
6529 tocrel_offset = XEXP (op, 1);
6532 return (GET_CODE (tocrel_base) == UNSPEC
6533 && XINT (tocrel_base, 1) == UNSPEC_TOCREL);
6536 /* Return true if X is a constant pool address, and also for cmodel=medium
6537 if X is a toc-relative address known to be offsettable within MODE. */
6539 bool
6540 legitimate_constant_pool_address_p (const_rtx x, machine_mode mode,
6541 bool strict)
6543 return (toc_relative_expr_p (x, strict)
6544 && (TARGET_CMODEL != CMODEL_MEDIUM
6545 || constant_pool_expr_p (XVECEXP (tocrel_base, 0, 0))
6546 || mode == QImode
6547 || offsettable_ok_by_alignment (XVECEXP (tocrel_base, 0, 0),
6548 INTVAL (tocrel_offset), mode)));
6551 static bool
6552 legitimate_small_data_p (machine_mode mode, rtx x)
6554 return (DEFAULT_ABI == ABI_V4
6555 && !flag_pic && !TARGET_TOC
6556 && (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST)
6557 && small_data_operand (x, mode));
6560 /* SPE offset addressing is limited to 5-bits worth of double words. */
6561 #define SPE_CONST_OFFSET_OK(x) (((x) & ~0xf8) == 0)
6563 bool
6564 rs6000_legitimate_offset_address_p (machine_mode mode, rtx x,
6565 bool strict, bool worst_case)
6567 unsigned HOST_WIDE_INT offset;
6568 unsigned int extra;
6570 if (GET_CODE (x) != PLUS)
6571 return false;
6572 if (!REG_P (XEXP (x, 0)))
6573 return false;
6574 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6575 return false;
6576 if (!reg_offset_addressing_ok_p (mode))
6577 return virtual_stack_registers_memory_p (x);
6578 if (legitimate_constant_pool_address_p (x, mode, strict || lra_in_progress))
6579 return true;
6580 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
6581 return false;
6583 offset = INTVAL (XEXP (x, 1));
6584 extra = 0;
6585 switch (mode)
6587 case V4HImode:
6588 case V2SImode:
6589 case V1DImode:
6590 case V2SFmode:
6591 /* SPE vector modes. */
6592 return SPE_CONST_OFFSET_OK (offset);
6594 case DFmode:
6595 case DDmode:
6596 case DImode:
6597 /* On e500v2, we may have:
6599 (subreg:DF (mem:DI (plus (reg) (const_int))) 0).
6601 Which gets addressed with evldd instructions. */
6602 if (TARGET_E500_DOUBLE)
6603 return SPE_CONST_OFFSET_OK (offset);
6605 /* If we are using VSX scalar loads, restrict ourselves to reg+reg
6606 addressing. */
6607 if (VECTOR_MEM_VSX_P (mode))
6608 return false;
6610 if (!worst_case)
6611 break;
6612 if (!TARGET_POWERPC64)
6613 extra = 4;
6614 else if (offset & 3)
6615 return false;
6616 break;
6618 case TFmode:
6619 if (TARGET_E500_DOUBLE)
6620 return (SPE_CONST_OFFSET_OK (offset)
6621 && SPE_CONST_OFFSET_OK (offset + 8));
6622 /* fall through */
6624 case TDmode:
6625 case TImode:
6626 case PTImode:
6627 extra = 8;
6628 if (!worst_case)
6629 break;
6630 if (!TARGET_POWERPC64)
6631 extra = 12;
6632 else if (offset & 3)
6633 return false;
6634 break;
6636 default:
6637 break;
6640 offset += 0x8000;
6641 return offset < 0x10000 - extra;
6644 bool
6645 legitimate_indexed_address_p (rtx x, int strict)
6647 rtx op0, op1;
6649 if (GET_CODE (x) != PLUS)
6650 return false;
6652 op0 = XEXP (x, 0);
6653 op1 = XEXP (x, 1);
6655 /* Recognize the rtl generated by reload which we know will later be
6656 replaced with proper base and index regs. */
6657 if (!strict
6658 && reload_in_progress
6659 && (REG_P (op0) || GET_CODE (op0) == PLUS)
6660 && REG_P (op1))
6661 return true;
6663 return (REG_P (op0) && REG_P (op1)
6664 && ((INT_REG_OK_FOR_BASE_P (op0, strict)
6665 && INT_REG_OK_FOR_INDEX_P (op1, strict))
6666 || (INT_REG_OK_FOR_BASE_P (op1, strict)
6667 && INT_REG_OK_FOR_INDEX_P (op0, strict))));
6670 bool
6671 avoiding_indexed_address_p (machine_mode mode)
6673 /* Avoid indexed addressing for modes that have non-indexed
6674 load/store instruction forms. */
6675 return (TARGET_AVOID_XFORM && VECTOR_MEM_NONE_P (mode));
6678 bool
6679 legitimate_indirect_address_p (rtx x, int strict)
6681 return GET_CODE (x) == REG && INT_REG_OK_FOR_BASE_P (x, strict);
6684 bool
6685 macho_lo_sum_memory_operand (rtx x, machine_mode mode)
6687 if (!TARGET_MACHO || !flag_pic
6688 || mode != SImode || GET_CODE (x) != MEM)
6689 return false;
6690 x = XEXP (x, 0);
6692 if (GET_CODE (x) != LO_SUM)
6693 return false;
6694 if (GET_CODE (XEXP (x, 0)) != REG)
6695 return false;
6696 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 0))
6697 return false;
6698 x = XEXP (x, 1);
6700 return CONSTANT_P (x);
6703 static bool
6704 legitimate_lo_sum_address_p (machine_mode mode, rtx x, int strict)
6706 if (GET_CODE (x) != LO_SUM)
6707 return false;
6708 if (GET_CODE (XEXP (x, 0)) != REG)
6709 return false;
6710 if (!INT_REG_OK_FOR_BASE_P (XEXP (x, 0), strict))
6711 return false;
6712 /* Restrict addressing for DI because of our SUBREG hackery. */
6713 if (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
6714 return false;
6715 x = XEXP (x, 1);
6717 if (TARGET_ELF || TARGET_MACHO)
6719 bool large_toc_ok;
6721 if (DEFAULT_ABI == ABI_V4 && flag_pic)
6722 return false;
6723 /* LRA don't use LEGITIMIZE_RELOAD_ADDRESS as it usually calls
6724 push_reload from reload pass code. LEGITIMIZE_RELOAD_ADDRESS
6725 recognizes some LO_SUM addresses as valid although this
6726 function says opposite. In most cases, LRA through different
6727 transformations can generate correct code for address reloads.
6728 It can not manage only some LO_SUM cases. So we need to add
6729 code analogous to one in rs6000_legitimize_reload_address for
6730 LOW_SUM here saying that some addresses are still valid. */
6731 large_toc_ok = (lra_in_progress && TARGET_CMODEL != CMODEL_SMALL
6732 && small_toc_ref (x, VOIDmode));
6733 if (TARGET_TOC && ! large_toc_ok)
6734 return false;
6735 if (GET_MODE_NUNITS (mode) != 1)
6736 return false;
6737 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
6738 && !(/* ??? Assume floating point reg based on mode? */
6739 TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
6740 && (mode == DFmode || mode == DDmode)))
6741 return false;
6743 return CONSTANT_P (x) || large_toc_ok;
6746 return false;
6750 /* Try machine-dependent ways of modifying an illegitimate address
6751 to be legitimate. If we find one, return the new, valid address.
6752 This is used from only one place: `memory_address' in explow.c.
6754 OLDX is the address as it was before break_out_memory_refs was
6755 called. In some cases it is useful to look at this to decide what
6756 needs to be done.
6758 It is always safe for this function to do nothing. It exists to
6759 recognize opportunities to optimize the output.
6761 On RS/6000, first check for the sum of a register with a constant
6762 integer that is out of range. If so, generate code to add the
6763 constant with the low-order 16 bits masked to the register and force
6764 this result into another register (this can be done with `cau').
6765 Then generate an address of REG+(CONST&0xffff), allowing for the
6766 possibility of bit 16 being a one.
6768 Then check for the sum of a register and something not constant, try to
6769 load the other things into a register and return the sum. */
6771 static rtx
6772 rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
6773 machine_mode mode)
6775 unsigned int extra;
6777 if (!reg_offset_addressing_ok_p (mode))
6779 if (virtual_stack_registers_memory_p (x))
6780 return x;
6782 /* In theory we should not be seeing addresses of the form reg+0,
6783 but just in case it is generated, optimize it away. */
6784 if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
6785 return force_reg (Pmode, XEXP (x, 0));
6787 /* For TImode with load/store quad, restrict addresses to just a single
6788 pointer, so it works with both GPRs and VSX registers. */
6789 /* Make sure both operands are registers. */
6790 else if (GET_CODE (x) == PLUS
6791 && (mode != TImode || !TARGET_QUAD_MEMORY))
6792 return gen_rtx_PLUS (Pmode,
6793 force_reg (Pmode, XEXP (x, 0)),
6794 force_reg (Pmode, XEXP (x, 1)));
6795 else
6796 return force_reg (Pmode, x);
6798 if (GET_CODE (x) == SYMBOL_REF)
6800 enum tls_model model = SYMBOL_REF_TLS_MODEL (x);
6801 if (model != 0)
6802 return rs6000_legitimize_tls_address (x, model);
6805 extra = 0;
6806 switch (mode)
6808 case TFmode:
6809 case TDmode:
6810 case TImode:
6811 case PTImode:
6812 /* As in legitimate_offset_address_p we do not assume
6813 worst-case. The mode here is just a hint as to the registers
6814 used. A TImode is usually in gprs, but may actually be in
6815 fprs. Leave worst-case scenario for reload to handle via
6816 insn constraints. PTImode is only GPRs. */
6817 extra = 8;
6818 break;
6819 default:
6820 break;
6823 if (GET_CODE (x) == PLUS
6824 && GET_CODE (XEXP (x, 0)) == REG
6825 && GET_CODE (XEXP (x, 1)) == CONST_INT
6826 && ((unsigned HOST_WIDE_INT) (INTVAL (XEXP (x, 1)) + 0x8000)
6827 >= 0x10000 - extra)
6828 && !(SPE_VECTOR_MODE (mode)
6829 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)))
6831 HOST_WIDE_INT high_int, low_int;
6832 rtx sum;
6833 low_int = ((INTVAL (XEXP (x, 1)) & 0xffff) ^ 0x8000) - 0x8000;
6834 if (low_int >= 0x8000 - extra)
6835 low_int = 0;
6836 high_int = INTVAL (XEXP (x, 1)) - low_int;
6837 sum = force_operand (gen_rtx_PLUS (Pmode, XEXP (x, 0),
6838 GEN_INT (high_int)), 0);
6839 return plus_constant (Pmode, sum, low_int);
6841 else if (GET_CODE (x) == PLUS
6842 && GET_CODE (XEXP (x, 0)) == REG
6843 && GET_CODE (XEXP (x, 1)) != CONST_INT
6844 && GET_MODE_NUNITS (mode) == 1
6845 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6846 || (/* ??? Assume floating point reg based on mode? */
6847 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6848 && (mode == DFmode || mode == DDmode)))
6849 && !avoiding_indexed_address_p (mode))
6851 return gen_rtx_PLUS (Pmode, XEXP (x, 0),
6852 force_reg (Pmode, force_operand (XEXP (x, 1), 0)));
6854 else if (SPE_VECTOR_MODE (mode)
6855 || (TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD))
6857 if (mode == DImode)
6858 return x;
6859 /* We accept [reg + reg] and [reg + OFFSET]. */
6861 if (GET_CODE (x) == PLUS)
6863 rtx op1 = XEXP (x, 0);
6864 rtx op2 = XEXP (x, 1);
6865 rtx y;
6867 op1 = force_reg (Pmode, op1);
6869 if (GET_CODE (op2) != REG
6870 && (GET_CODE (op2) != CONST_INT
6871 || !SPE_CONST_OFFSET_OK (INTVAL (op2))
6872 || (GET_MODE_SIZE (mode) > 8
6873 && !SPE_CONST_OFFSET_OK (INTVAL (op2) + 8))))
6874 op2 = force_reg (Pmode, op2);
6876 /* We can't always do [reg + reg] for these, because [reg +
6877 reg + offset] is not a legitimate addressing mode. */
6878 y = gen_rtx_PLUS (Pmode, op1, op2);
6880 if ((GET_MODE_SIZE (mode) > 8 || mode == DDmode) && REG_P (op2))
6881 return force_reg (Pmode, y);
6882 else
6883 return y;
6886 return force_reg (Pmode, x);
6888 else if ((TARGET_ELF
6889 #if TARGET_MACHO
6890 || !MACHO_DYNAMIC_NO_PIC_P
6891 #endif
6893 && TARGET_32BIT
6894 && TARGET_NO_TOC
6895 && ! flag_pic
6896 && GET_CODE (x) != CONST_INT
6897 && GET_CODE (x) != CONST_WIDE_INT
6898 && GET_CODE (x) != CONST_DOUBLE
6899 && CONSTANT_P (x)
6900 && GET_MODE_NUNITS (mode) == 1
6901 && (GET_MODE_SIZE (mode) <= UNITS_PER_WORD
6902 || (/* ??? Assume floating point reg based on mode? */
6903 (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
6904 && (mode == DFmode || mode == DDmode))))
6906 rtx reg = gen_reg_rtx (Pmode);
6907 if (TARGET_ELF)
6908 emit_insn (gen_elf_high (reg, x));
6909 else
6910 emit_insn (gen_macho_high (reg, x));
6911 return gen_rtx_LO_SUM (Pmode, reg, x);
6913 else if (TARGET_TOC
6914 && GET_CODE (x) == SYMBOL_REF
6915 && constant_pool_expr_p (x)
6916 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (x), Pmode))
6917 return create_TOC_reference (x, NULL_RTX);
6918 else
6919 return x;
6922 /* Debug version of rs6000_legitimize_address. */
6923 static rtx
6924 rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
6926 rtx ret;
6927 rtx_insn *insns;
6929 start_sequence ();
6930 ret = rs6000_legitimize_address (x, oldx, mode);
6931 insns = get_insns ();
6932 end_sequence ();
6934 if (ret != x)
6936 fprintf (stderr,
6937 "\nrs6000_legitimize_address: mode %s, old code %s, "
6938 "new code %s, modified\n",
6939 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)),
6940 GET_RTX_NAME (GET_CODE (ret)));
6942 fprintf (stderr, "Original address:\n");
6943 debug_rtx (x);
6945 fprintf (stderr, "oldx:\n");
6946 debug_rtx (oldx);
6948 fprintf (stderr, "New address:\n");
6949 debug_rtx (ret);
6951 if (insns)
6953 fprintf (stderr, "Insns added:\n");
6954 debug_rtx_list (insns, 20);
6957 else
6959 fprintf (stderr,
6960 "\nrs6000_legitimize_address: mode %s, code %s, no change:\n",
6961 GET_MODE_NAME (mode), GET_RTX_NAME (GET_CODE (x)));
6963 debug_rtx (x);
6966 if (insns)
6967 emit_insn (insns);
6969 return ret;
6972 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
6973 We need to emit DTP-relative relocations. */
6975 static void rs6000_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
6976 static void
6977 rs6000_output_dwarf_dtprel (FILE *file, int size, rtx x)
6979 switch (size)
6981 case 4:
6982 fputs ("\t.long\t", file);
6983 break;
6984 case 8:
6985 fputs (DOUBLE_INT_ASM_OP, file);
6986 break;
6987 default:
6988 gcc_unreachable ();
6990 output_addr_const (file, x);
6991 fputs ("@dtprel+0x8000", file);
6994 /* Return true if X is a symbol that refers to real (rather than emulated)
6995 TLS. */
6997 static bool
6998 rs6000_real_tls_symbol_ref_p (rtx x)
7000 return (GET_CODE (x) == SYMBOL_REF
7001 && SYMBOL_REF_TLS_MODEL (x) >= TLS_MODEL_REAL);
7004 /* In the name of slightly smaller debug output, and to cater to
7005 general assembler lossage, recognize various UNSPEC sequences
7006 and turn them back into a direct symbol reference. */
7008 static rtx
7009 rs6000_delegitimize_address (rtx orig_x)
7011 rtx x, y, offset;
7013 orig_x = delegitimize_mem_from_attrs (orig_x);
7014 x = orig_x;
7015 if (MEM_P (x))
7016 x = XEXP (x, 0);
7018 y = x;
7019 if (TARGET_CMODEL != CMODEL_SMALL
7020 && GET_CODE (y) == LO_SUM)
7021 y = XEXP (y, 1);
7023 offset = NULL_RTX;
7024 if (GET_CODE (y) == PLUS
7025 && GET_MODE (y) == Pmode
7026 && CONST_INT_P (XEXP (y, 1)))
7028 offset = XEXP (y, 1);
7029 y = XEXP (y, 0);
7032 if (GET_CODE (y) == UNSPEC
7033 && XINT (y, 1) == UNSPEC_TOCREL)
7035 #ifdef ENABLE_CHECKING
7036 if (REG_P (XVECEXP (y, 0, 1))
7037 && REGNO (XVECEXP (y, 0, 1)) == TOC_REGISTER)
7039 /* All good. */
7041 else if (GET_CODE (XVECEXP (y, 0, 1)) == DEBUG_EXPR)
7043 /* Weirdness alert. df_note_compute can replace r2 with a
7044 debug_expr when this unspec is in a debug_insn.
7045 Seen in gcc.dg/pr51957-1.c */
7047 else
7049 debug_rtx (orig_x);
7050 abort ();
7052 #endif
7053 y = XVECEXP (y, 0, 0);
7055 #ifdef HAVE_AS_TLS
7056 /* Do not associate thread-local symbols with the original
7057 constant pool symbol. */
7058 if (TARGET_XCOFF
7059 && GET_CODE (y) == SYMBOL_REF
7060 && CONSTANT_POOL_ADDRESS_P (y)
7061 && rs6000_real_tls_symbol_ref_p (get_pool_constant (y)))
7062 return orig_x;
7063 #endif
7065 if (offset != NULL_RTX)
7066 y = gen_rtx_PLUS (Pmode, y, offset);
7067 if (!MEM_P (orig_x))
7068 return y;
7069 else
7070 return replace_equiv_address_nv (orig_x, y);
7073 if (TARGET_MACHO
7074 && GET_CODE (orig_x) == LO_SUM
7075 && GET_CODE (XEXP (orig_x, 1)) == CONST)
7077 y = XEXP (XEXP (orig_x, 1), 0);
7078 if (GET_CODE (y) == UNSPEC
7079 && XINT (y, 1) == UNSPEC_MACHOPIC_OFFSET)
7080 return XVECEXP (y, 0, 0);
7083 return orig_x;
7086 /* Return true if X shouldn't be emitted into the debug info.
7087 The linker doesn't like .toc section references from
7088 .debug_* sections, so reject .toc section symbols. */
7090 static bool
7091 rs6000_const_not_ok_for_debug_p (rtx x)
7093 if (GET_CODE (x) == SYMBOL_REF
7094 && CONSTANT_POOL_ADDRESS_P (x))
7096 rtx c = get_pool_constant (x);
7097 machine_mode cmode = get_pool_mode (x);
7098 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (c, cmode))
7099 return true;
7102 return false;
7105 /* Construct the SYMBOL_REF for the tls_get_addr function. */
7107 static GTY(()) rtx rs6000_tls_symbol;
7108 static rtx
7109 rs6000_tls_get_addr (void)
7111 if (!rs6000_tls_symbol)
7112 rs6000_tls_symbol = init_one_libfunc ("__tls_get_addr");
7114 return rs6000_tls_symbol;
7117 /* Construct the SYMBOL_REF for TLS GOT references. */
7119 static GTY(()) rtx rs6000_got_symbol;
7120 static rtx
7121 rs6000_got_sym (void)
7123 if (!rs6000_got_symbol)
7125 rs6000_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
7126 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_LOCAL;
7127 SYMBOL_REF_FLAGS (rs6000_got_symbol) |= SYMBOL_FLAG_EXTERNAL;
7130 return rs6000_got_symbol;
7133 /* AIX Thread-Local Address support. */
7135 static rtx
7136 rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
7138 rtx sym, mem, tocref, tlsreg, tmpreg, dest, tlsaddr;
7139 const char *name;
7140 char *tlsname;
7142 name = XSTR (addr, 0);
7143 /* Append TLS CSECT qualifier, unless the symbol already is qualified
7144 or the symbol will be in TLS private data section. */
7145 if (name[strlen (name) - 1] != ']'
7146 && (TREE_PUBLIC (SYMBOL_REF_DECL (addr))
7147 || bss_initializer_p (SYMBOL_REF_DECL (addr))))
7149 tlsname = XALLOCAVEC (char, strlen (name) + 4);
7150 strcpy (tlsname, name);
7151 strcat (tlsname,
7152 bss_initializer_p (SYMBOL_REF_DECL (addr)) ? "[UL]" : "[TL]");
7153 tlsaddr = copy_rtx (addr);
7154 XSTR (tlsaddr, 0) = ggc_strdup (tlsname);
7156 else
7157 tlsaddr = addr;
7159 /* Place addr into TOC constant pool. */
7160 sym = force_const_mem (GET_MODE (tlsaddr), tlsaddr);
7162 /* Output the TOC entry and create the MEM referencing the value. */
7163 if (constant_pool_expr_p (XEXP (sym, 0))
7164 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (XEXP (sym, 0)), Pmode))
7166 tocref = create_TOC_reference (XEXP (sym, 0), NULL_RTX);
7167 mem = gen_const_mem (Pmode, tocref);
7168 set_mem_alias_set (mem, get_TOC_alias_set ());
7170 else
7171 return sym;
7173 /* Use global-dynamic for local-dynamic. */
7174 if (model == TLS_MODEL_GLOBAL_DYNAMIC
7175 || model == TLS_MODEL_LOCAL_DYNAMIC)
7177 /* Create new TOC reference for @m symbol. */
7178 name = XSTR (XVECEXP (XEXP (mem, 0), 0, 0), 0);
7179 tlsname = XALLOCAVEC (char, strlen (name) + 1);
7180 strcpy (tlsname, "*LCM");
7181 strcat (tlsname, name + 3);
7182 rtx modaddr = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (tlsname));
7183 SYMBOL_REF_FLAGS (modaddr) |= SYMBOL_FLAG_LOCAL;
7184 tocref = create_TOC_reference (modaddr, NULL_RTX);
7185 rtx modmem = gen_const_mem (Pmode, tocref);
7186 set_mem_alias_set (modmem, get_TOC_alias_set ());
7188 rtx modreg = gen_reg_rtx (Pmode);
7189 emit_insn (gen_rtx_SET (VOIDmode, modreg, modmem));
7191 tmpreg = gen_reg_rtx (Pmode);
7192 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7194 dest = gen_reg_rtx (Pmode);
7195 if (TARGET_32BIT)
7196 emit_insn (gen_tls_get_addrsi (dest, modreg, tmpreg));
7197 else
7198 emit_insn (gen_tls_get_addrdi (dest, modreg, tmpreg));
7199 return dest;
7201 /* Obtain TLS pointer: 32 bit call or 64 bit GPR 13. */
7202 else if (TARGET_32BIT)
7204 tlsreg = gen_reg_rtx (SImode);
7205 emit_insn (gen_tls_get_tpointer (tlsreg));
7207 else
7208 tlsreg = gen_rtx_REG (DImode, 13);
7210 /* Load the TOC value into temporary register. */
7211 tmpreg = gen_reg_rtx (Pmode);
7212 emit_insn (gen_rtx_SET (VOIDmode, tmpreg, mem));
7213 set_unique_reg_note (get_last_insn (), REG_EQUAL,
7214 gen_rtx_MINUS (Pmode, addr, tlsreg));
7216 /* Add TOC symbol value to TLS pointer. */
7217 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tmpreg, tlsreg));
7219 return dest;
7222 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
7223 this (thread-local) address. */
7225 static rtx
7226 rs6000_legitimize_tls_address (rtx addr, enum tls_model model)
7228 rtx dest, insn;
7230 if (TARGET_XCOFF)
7231 return rs6000_legitimize_tls_address_aix (addr, model);
7233 dest = gen_reg_rtx (Pmode);
7234 if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 16)
7236 rtx tlsreg;
7238 if (TARGET_64BIT)
7240 tlsreg = gen_rtx_REG (Pmode, 13);
7241 insn = gen_tls_tprel_64 (dest, tlsreg, addr);
7243 else
7245 tlsreg = gen_rtx_REG (Pmode, 2);
7246 insn = gen_tls_tprel_32 (dest, tlsreg, addr);
7248 emit_insn (insn);
7250 else if (model == TLS_MODEL_LOCAL_EXEC && rs6000_tls_size == 32)
7252 rtx tlsreg, tmp;
7254 tmp = gen_reg_rtx (Pmode);
7255 if (TARGET_64BIT)
7257 tlsreg = gen_rtx_REG (Pmode, 13);
7258 insn = gen_tls_tprel_ha_64 (tmp, tlsreg, addr);
7260 else
7262 tlsreg = gen_rtx_REG (Pmode, 2);
7263 insn = gen_tls_tprel_ha_32 (tmp, tlsreg, addr);
7265 emit_insn (insn);
7266 if (TARGET_64BIT)
7267 insn = gen_tls_tprel_lo_64 (dest, tmp, addr);
7268 else
7269 insn = gen_tls_tprel_lo_32 (dest, tmp, addr);
7270 emit_insn (insn);
7272 else
7274 rtx r3, got, tga, tmp1, tmp2, call_insn;
7276 /* We currently use relocations like @got@tlsgd for tls, which
7277 means the linker will handle allocation of tls entries, placing
7278 them in the .got section. So use a pointer to the .got section,
7279 not one to secondary TOC sections used by 64-bit -mminimal-toc,
7280 or to secondary GOT sections used by 32-bit -fPIC. */
7281 if (TARGET_64BIT)
7282 got = gen_rtx_REG (Pmode, 2);
7283 else
7285 if (flag_pic == 1)
7286 got = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
7287 else
7289 rtx gsym = rs6000_got_sym ();
7290 got = gen_reg_rtx (Pmode);
7291 if (flag_pic == 0)
7292 rs6000_emit_move (got, gsym, Pmode);
7293 else
7295 rtx mem, lab, last;
7297 tmp1 = gen_reg_rtx (Pmode);
7298 tmp2 = gen_reg_rtx (Pmode);
7299 mem = gen_const_mem (Pmode, tmp1);
7300 lab = gen_label_rtx ();
7301 emit_insn (gen_load_toc_v4_PIC_1b (gsym, lab));
7302 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
7303 if (TARGET_LINK_STACK)
7304 emit_insn (gen_addsi3 (tmp1, tmp1, GEN_INT (4)));
7305 emit_move_insn (tmp2, mem);
7306 last = emit_insn (gen_addsi3 (got, tmp1, tmp2));
7307 set_unique_reg_note (last, REG_EQUAL, gsym);
7312 if (model == TLS_MODEL_GLOBAL_DYNAMIC)
7314 tga = rs6000_tls_get_addr ();
7315 emit_library_call_value (tga, dest, LCT_CONST, Pmode,
7316 1, const0_rtx, Pmode);
7318 r3 = gen_rtx_REG (Pmode, 3);
7319 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7321 if (TARGET_64BIT)
7322 insn = gen_tls_gd_aix64 (r3, got, addr, tga, const0_rtx);
7323 else
7324 insn = gen_tls_gd_aix32 (r3, got, addr, tga, const0_rtx);
7326 else if (DEFAULT_ABI == ABI_V4)
7327 insn = gen_tls_gd_sysvsi (r3, got, addr, tga, const0_rtx);
7328 else
7329 gcc_unreachable ();
7330 call_insn = last_call_insn ();
7331 PATTERN (call_insn) = insn;
7332 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7333 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7334 pic_offset_table_rtx);
7336 else if (model == TLS_MODEL_LOCAL_DYNAMIC)
7338 tga = rs6000_tls_get_addr ();
7339 tmp1 = gen_reg_rtx (Pmode);
7340 emit_library_call_value (tga, tmp1, LCT_CONST, Pmode,
7341 1, const0_rtx, Pmode);
7343 r3 = gen_rtx_REG (Pmode, 3);
7344 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7346 if (TARGET_64BIT)
7347 insn = gen_tls_ld_aix64 (r3, got, tga, const0_rtx);
7348 else
7349 insn = gen_tls_ld_aix32 (r3, got, tga, const0_rtx);
7351 else if (DEFAULT_ABI == ABI_V4)
7352 insn = gen_tls_ld_sysvsi (r3, got, tga, const0_rtx);
7353 else
7354 gcc_unreachable ();
7355 call_insn = last_call_insn ();
7356 PATTERN (call_insn) = insn;
7357 if (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
7358 use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn),
7359 pic_offset_table_rtx);
7361 if (rs6000_tls_size == 16)
7363 if (TARGET_64BIT)
7364 insn = gen_tls_dtprel_64 (dest, tmp1, addr);
7365 else
7366 insn = gen_tls_dtprel_32 (dest, tmp1, addr);
7368 else if (rs6000_tls_size == 32)
7370 tmp2 = gen_reg_rtx (Pmode);
7371 if (TARGET_64BIT)
7372 insn = gen_tls_dtprel_ha_64 (tmp2, tmp1, addr);
7373 else
7374 insn = gen_tls_dtprel_ha_32 (tmp2, tmp1, addr);
7375 emit_insn (insn);
7376 if (TARGET_64BIT)
7377 insn = gen_tls_dtprel_lo_64 (dest, tmp2, addr);
7378 else
7379 insn = gen_tls_dtprel_lo_32 (dest, tmp2, addr);
7381 else
7383 tmp2 = gen_reg_rtx (Pmode);
7384 if (TARGET_64BIT)
7385 insn = gen_tls_got_dtprel_64 (tmp2, got, addr);
7386 else
7387 insn = gen_tls_got_dtprel_32 (tmp2, got, addr);
7388 emit_insn (insn);
7389 insn = gen_rtx_SET (Pmode, dest,
7390 gen_rtx_PLUS (Pmode, tmp2, tmp1));
7392 emit_insn (insn);
7394 else
7396 /* IE, or 64-bit offset LE. */
7397 tmp2 = gen_reg_rtx (Pmode);
7398 if (TARGET_64BIT)
7399 insn = gen_tls_got_tprel_64 (tmp2, got, addr);
7400 else
7401 insn = gen_tls_got_tprel_32 (tmp2, got, addr);
7402 emit_insn (insn);
7403 if (TARGET_64BIT)
7404 insn = gen_tls_tls_64 (dest, tmp2, addr);
7405 else
7406 insn = gen_tls_tls_32 (dest, tmp2, addr);
7407 emit_insn (insn);
7411 return dest;
7414 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
7416 static bool
7417 rs6000_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
7419 if (GET_CODE (x) == HIGH
7420 && GET_CODE (XEXP (x, 0)) == UNSPEC)
7421 return true;
7423 /* A TLS symbol in the TOC cannot contain a sum. */
7424 if (GET_CODE (x) == CONST
7425 && GET_CODE (XEXP (x, 0)) == PLUS
7426 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7427 && SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0)) != 0)
7428 return true;
7430 /* Do not place an ELF TLS symbol in the constant pool. */
7431 return TARGET_ELF && tls_referenced_p (x);
7434 /* Return true iff the given SYMBOL_REF refers to a constant pool entry
7435 that we have put in the TOC, or for cmodel=medium, if the SYMBOL_REF
7436 can be addressed relative to the toc pointer. */
7438 static bool
7439 use_toc_relative_ref (rtx sym)
7441 return ((constant_pool_expr_p (sym)
7442 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
7443 get_pool_mode (sym)))
7444 || (TARGET_CMODEL == CMODEL_MEDIUM
7445 && SYMBOL_REF_LOCAL_P (sym)));
7448 /* Our implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
7449 replace the input X, or the original X if no replacement is called for.
7450 The output parameter *WIN is 1 if the calling macro should goto WIN,
7451 0 if it should not.
7453 For RS/6000, we wish to handle large displacements off a base
7454 register by splitting the addend across an addiu/addis and the mem insn.
7455 This cuts number of extra insns needed from 3 to 1.
7457 On Darwin, we use this to generate code for floating point constants.
7458 A movsf_low is generated so we wind up with 2 instructions rather than 3.
7459 The Darwin code is inside #if TARGET_MACHO because only then are the
7460 machopic_* functions defined. */
7461 static rtx
7462 rs6000_legitimize_reload_address (rtx x, machine_mode mode,
7463 int opnum, int type,
7464 int ind_levels ATTRIBUTE_UNUSED, int *win)
7466 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7468 /* Nasty hack for vsx_splat_V2DF/V2DI load from mem, which takes a
7469 DFmode/DImode MEM. */
7470 if (reg_offset_p
7471 && opnum == 1
7472 && ((mode == DFmode && recog_data.operand_mode[0] == V2DFmode)
7473 || (mode == DImode && recog_data.operand_mode[0] == V2DImode)))
7474 reg_offset_p = false;
7476 /* We must recognize output that we have already generated ourselves. */
7477 if (GET_CODE (x) == PLUS
7478 && GET_CODE (XEXP (x, 0)) == PLUS
7479 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7480 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7481 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7483 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7484 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7485 opnum, (enum reload_type) type);
7486 *win = 1;
7487 return x;
7490 /* Likewise for (lo_sum (high ...) ...) output we have generated. */
7491 if (GET_CODE (x) == LO_SUM
7492 && GET_CODE (XEXP (x, 0)) == HIGH)
7494 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7495 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7496 opnum, (enum reload_type) type);
7497 *win = 1;
7498 return x;
7501 #if TARGET_MACHO
7502 if (DEFAULT_ABI == ABI_DARWIN && flag_pic
7503 && GET_CODE (x) == LO_SUM
7504 && GET_CODE (XEXP (x, 0)) == PLUS
7505 && XEXP (XEXP (x, 0), 0) == pic_offset_table_rtx
7506 && GET_CODE (XEXP (XEXP (x, 0), 1)) == HIGH
7507 && XEXP (XEXP (XEXP (x, 0), 1), 0) == XEXP (x, 1)
7508 && machopic_operand_p (XEXP (x, 1)))
7510 /* Result of previous invocation of this function on Darwin
7511 floating point constant. */
7512 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7513 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7514 opnum, (enum reload_type) type);
7515 *win = 1;
7516 return x;
7518 #endif
7520 if (TARGET_CMODEL != CMODEL_SMALL
7521 && reg_offset_p
7522 && small_toc_ref (x, VOIDmode))
7524 rtx hi = gen_rtx_HIGH (Pmode, copy_rtx (x));
7525 x = gen_rtx_LO_SUM (Pmode, hi, x);
7526 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7527 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7528 opnum, (enum reload_type) type);
7529 *win = 1;
7530 return x;
7533 if (GET_CODE (x) == PLUS
7534 && GET_CODE (XEXP (x, 0)) == REG
7535 && REGNO (XEXP (x, 0)) < FIRST_PSEUDO_REGISTER
7536 && INT_REG_OK_FOR_BASE_P (XEXP (x, 0), 1)
7537 && GET_CODE (XEXP (x, 1)) == CONST_INT
7538 && reg_offset_p
7539 && !SPE_VECTOR_MODE (mode)
7540 && !(TARGET_E500_DOUBLE && GET_MODE_SIZE (mode) > UNITS_PER_WORD)
7541 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode)))
7543 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
7544 HOST_WIDE_INT low = ((val & 0xffff) ^ 0x8000) - 0x8000;
7545 HOST_WIDE_INT high
7546 = (((val - low) & 0xffffffff) ^ 0x80000000) - 0x80000000;
7548 /* Check for 32-bit overflow. */
7549 if (high + low != val)
7551 *win = 0;
7552 return x;
7555 /* Reload the high part into a base reg; leave the low part
7556 in the mem directly. */
7558 x = gen_rtx_PLUS (GET_MODE (x),
7559 gen_rtx_PLUS (GET_MODE (x), XEXP (x, 0),
7560 GEN_INT (high)),
7561 GEN_INT (low));
7563 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7564 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
7565 opnum, (enum reload_type) type);
7566 *win = 1;
7567 return x;
7570 if (GET_CODE (x) == SYMBOL_REF
7571 && reg_offset_p
7572 && (!VECTOR_MODE_P (mode) || VECTOR_MEM_NONE_P (mode))
7573 && !SPE_VECTOR_MODE (mode)
7574 #if TARGET_MACHO
7575 && DEFAULT_ABI == ABI_DARWIN
7576 && (flag_pic || MACHO_DYNAMIC_NO_PIC_P)
7577 && machopic_symbol_defined_p (x)
7578 #else
7579 && DEFAULT_ABI == ABI_V4
7580 && !flag_pic
7581 #endif
7582 /* Don't do this for TFmode or TDmode, since the result isn't offsettable.
7583 The same goes for DImode without 64-bit gprs and DFmode and DDmode
7584 without fprs.
7585 ??? Assume floating point reg based on mode? This assumption is
7586 violated by eg. powerpc-linux -m32 compile of gcc.dg/pr28796-2.c
7587 where reload ends up doing a DFmode load of a constant from
7588 mem using two gprs. Unfortunately, at this point reload
7589 hasn't yet selected regs so poking around in reload data
7590 won't help and even if we could figure out the regs reliably,
7591 we'd still want to allow this transformation when the mem is
7592 naturally aligned. Since we say the address is good here, we
7593 can't disable offsets from LO_SUMs in mem_operand_gpr.
7594 FIXME: Allow offset from lo_sum for other modes too, when
7595 mem is sufficiently aligned. */
7596 && mode != TFmode
7597 && mode != TDmode
7598 && (mode != TImode || !TARGET_VSX_TIMODE)
7599 && mode != PTImode
7600 && (mode != DImode || TARGET_POWERPC64)
7601 && ((mode != DFmode && mode != DDmode) || TARGET_POWERPC64
7602 || (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)))
7604 #if TARGET_MACHO
7605 if (flag_pic)
7607 rtx offset = machopic_gen_offset (x);
7608 x = gen_rtx_LO_SUM (GET_MODE (x),
7609 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
7610 gen_rtx_HIGH (Pmode, offset)), offset);
7612 else
7613 #endif
7614 x = gen_rtx_LO_SUM (GET_MODE (x),
7615 gen_rtx_HIGH (Pmode, x), x);
7617 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7618 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7619 opnum, (enum reload_type) type);
7620 *win = 1;
7621 return x;
7624 /* Reload an offset address wrapped by an AND that represents the
7625 masking of the lower bits. Strip the outer AND and let reload
7626 convert the offset address into an indirect address. For VSX,
7627 force reload to create the address with an AND in a separate
7628 register, because we can't guarantee an altivec register will
7629 be used. */
7630 if (VECTOR_MEM_ALTIVEC_P (mode)
7631 && GET_CODE (x) == AND
7632 && GET_CODE (XEXP (x, 0)) == PLUS
7633 && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
7634 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
7635 && GET_CODE (XEXP (x, 1)) == CONST_INT
7636 && INTVAL (XEXP (x, 1)) == -16)
7638 x = XEXP (x, 0);
7639 *win = 1;
7640 return x;
7643 if (TARGET_TOC
7644 && reg_offset_p
7645 && GET_CODE (x) == SYMBOL_REF
7646 && use_toc_relative_ref (x))
7648 x = create_TOC_reference (x, NULL_RTX);
7649 if (TARGET_CMODEL != CMODEL_SMALL)
7650 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
7651 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
7652 opnum, (enum reload_type) type);
7653 *win = 1;
7654 return x;
7656 *win = 0;
7657 return x;
7660 /* Debug version of rs6000_legitimize_reload_address. */
7661 static rtx
7662 rs6000_debug_legitimize_reload_address (rtx x, machine_mode mode,
7663 int opnum, int type,
7664 int ind_levels, int *win)
7666 rtx ret = rs6000_legitimize_reload_address (x, mode, opnum, type,
7667 ind_levels, win);
7668 fprintf (stderr,
7669 "\nrs6000_legitimize_reload_address: mode = %s, opnum = %d, "
7670 "type = %d, ind_levels = %d, win = %d, original addr:\n",
7671 GET_MODE_NAME (mode), opnum, type, ind_levels, *win);
7672 debug_rtx (x);
7674 if (x == ret)
7675 fprintf (stderr, "Same address returned\n");
7676 else if (!ret)
7677 fprintf (stderr, "NULL returned\n");
7678 else
7680 fprintf (stderr, "New address:\n");
7681 debug_rtx (ret);
7684 return ret;
7687 /* TARGET_LEGITIMATE_ADDRESS_P recognizes an RTL expression
7688 that is a valid memory address for an instruction.
7689 The MODE argument is the machine mode for the MEM expression
7690 that wants to use this address.
7692 On the RS/6000, there are four valid address: a SYMBOL_REF that
7693 refers to a constant pool entry of an address (or the sum of it
7694 plus a constant), a short (16-bit signed) constant plus a register,
7695 the sum of two registers, or a register indirect, possibly with an
7696 auto-increment. For DFmode, DDmode and DImode with a constant plus
7697 register, we must ensure that both words are addressable or PowerPC64
7698 with offset word aligned.
7700 For modes spanning multiple registers (DFmode and DDmode in 32-bit GPRs,
7701 32-bit DImode, TImode, TFmode, TDmode), indexed addressing cannot be used
7702 because adjacent memory cells are accessed by adding word-sized offsets
7703 during assembly output. */
7704 static bool
7705 rs6000_legitimate_address_p (machine_mode mode, rtx x, bool reg_ok_strict)
7707 bool reg_offset_p = reg_offset_addressing_ok_p (mode);
7709 /* If this is an unaligned stvx/ldvx type address, discard the outer AND. */
7710 if (VECTOR_MEM_ALTIVEC_P (mode)
7711 && GET_CODE (x) == AND
7712 && GET_CODE (XEXP (x, 1)) == CONST_INT
7713 && INTVAL (XEXP (x, 1)) == -16)
7714 x = XEXP (x, 0);
7716 if (TARGET_ELF && RS6000_SYMBOL_REF_TLS_P (x))
7717 return 0;
7718 if (legitimate_indirect_address_p (x, reg_ok_strict))
7719 return 1;
7720 if (TARGET_UPDATE
7721 && (GET_CODE (x) == PRE_INC || GET_CODE (x) == PRE_DEC)
7722 && mode_supports_pre_incdec_p (mode)
7723 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict))
7724 return 1;
7725 if (virtual_stack_registers_memory_p (x))
7726 return 1;
7727 if (reg_offset_p && legitimate_small_data_p (mode, x))
7728 return 1;
7729 if (reg_offset_p
7730 && legitimate_constant_pool_address_p (x, mode,
7731 reg_ok_strict || lra_in_progress))
7732 return 1;
7733 /* If not REG_OK_STRICT (before reload) let pass any stack offset. */
7734 if (! reg_ok_strict
7735 && reg_offset_p
7736 && GET_CODE (x) == PLUS
7737 && GET_CODE (XEXP (x, 0)) == REG
7738 && (XEXP (x, 0) == virtual_stack_vars_rtx
7739 || XEXP (x, 0) == arg_pointer_rtx)
7740 && GET_CODE (XEXP (x, 1)) == CONST_INT)
7741 return 1;
7742 if (rs6000_legitimate_offset_address_p (mode, x, reg_ok_strict, false))
7743 return 1;
7744 if (mode != TFmode
7745 && mode != TDmode
7746 && ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT)
7747 || TARGET_POWERPC64
7748 || (mode != DFmode && mode != DDmode)
7749 || (TARGET_E500_DOUBLE && mode != DDmode))
7750 && (TARGET_POWERPC64 || mode != DImode)
7751 && (mode != TImode || VECTOR_MEM_VSX_P (TImode))
7752 && mode != PTImode
7753 && !avoiding_indexed_address_p (mode)
7754 && legitimate_indexed_address_p (x, reg_ok_strict))
7755 return 1;
7756 if (TARGET_UPDATE && GET_CODE (x) == PRE_MODIFY
7757 && mode_supports_pre_modify_p (mode)
7758 && legitimate_indirect_address_p (XEXP (x, 0), reg_ok_strict)
7759 && (rs6000_legitimate_offset_address_p (mode, XEXP (x, 1),
7760 reg_ok_strict, false)
7761 || (!avoiding_indexed_address_p (mode)
7762 && legitimate_indexed_address_p (XEXP (x, 1), reg_ok_strict)))
7763 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7764 return 1;
7765 if (reg_offset_p && legitimate_lo_sum_address_p (mode, x, reg_ok_strict))
7766 return 1;
7767 return 0;
7770 /* Debug version of rs6000_legitimate_address_p. */
7771 static bool
7772 rs6000_debug_legitimate_address_p (machine_mode mode, rtx x,
7773 bool reg_ok_strict)
7775 bool ret = rs6000_legitimate_address_p (mode, x, reg_ok_strict);
7776 fprintf (stderr,
7777 "\nrs6000_legitimate_address_p: return = %s, mode = %s, "
7778 "strict = %d, reload = %s, code = %s\n",
7779 ret ? "true" : "false",
7780 GET_MODE_NAME (mode),
7781 reg_ok_strict,
7782 (reload_completed
7783 ? "after"
7784 : (reload_in_progress ? "progress" : "before")),
7785 GET_RTX_NAME (GET_CODE (x)));
7786 debug_rtx (x);
7788 return ret;
7791 /* Implement TARGET_MODE_DEPENDENT_ADDRESS_P. */
7793 static bool
7794 rs6000_mode_dependent_address_p (const_rtx addr,
7795 addr_space_t as ATTRIBUTE_UNUSED)
7797 return rs6000_mode_dependent_address_ptr (addr);
7800 /* Go to LABEL if ADDR (a legitimate address expression)
7801 has an effect that depends on the machine mode it is used for.
7803 On the RS/6000 this is true of all integral offsets (since AltiVec
7804 and VSX modes don't allow them) or is a pre-increment or decrement.
7806 ??? Except that due to conceptual problems in offsettable_address_p
7807 we can't really report the problems of integral offsets. So leave
7808 this assuming that the adjustable offset must be valid for the
7809 sub-words of a TFmode operand, which is what we had before. */
7811 static bool
7812 rs6000_mode_dependent_address (const_rtx addr)
7814 switch (GET_CODE (addr))
7816 case PLUS:
7817 /* Any offset from virtual_stack_vars_rtx and arg_pointer_rtx
7818 is considered a legitimate address before reload, so there
7819 are no offset restrictions in that case. Note that this
7820 condition is safe in strict mode because any address involving
7821 virtual_stack_vars_rtx or arg_pointer_rtx would already have
7822 been rejected as illegitimate. */
7823 if (XEXP (addr, 0) != virtual_stack_vars_rtx
7824 && XEXP (addr, 0) != arg_pointer_rtx
7825 && GET_CODE (XEXP (addr, 1)) == CONST_INT)
7827 unsigned HOST_WIDE_INT val = INTVAL (XEXP (addr, 1));
7828 return val + 0x8000 >= 0x10000 - (TARGET_POWERPC64 ? 8 : 12);
7830 break;
7832 case LO_SUM:
7833 /* Anything in the constant pool is sufficiently aligned that
7834 all bytes have the same high part address. */
7835 return !legitimate_constant_pool_address_p (addr, QImode, false);
7837 /* Auto-increment cases are now treated generically in recog.c. */
7838 case PRE_MODIFY:
7839 return TARGET_UPDATE;
7841 /* AND is only allowed in Altivec loads. */
7842 case AND:
7843 return true;
7845 default:
7846 break;
7849 return false;
7852 /* Debug version of rs6000_mode_dependent_address. */
7853 static bool
7854 rs6000_debug_mode_dependent_address (const_rtx addr)
7856 bool ret = rs6000_mode_dependent_address (addr);
7858 fprintf (stderr, "\nrs6000_mode_dependent_address: ret = %s\n",
7859 ret ? "true" : "false");
7860 debug_rtx (addr);
7862 return ret;
7865 /* Implement FIND_BASE_TERM. */
7868 rs6000_find_base_term (rtx op)
7870 rtx base;
7872 base = op;
7873 if (GET_CODE (base) == CONST)
7874 base = XEXP (base, 0);
7875 if (GET_CODE (base) == PLUS)
7876 base = XEXP (base, 0);
7877 if (GET_CODE (base) == UNSPEC)
7878 switch (XINT (base, 1))
7880 case UNSPEC_TOCREL:
7881 case UNSPEC_MACHOPIC_OFFSET:
7882 /* OP represents SYM [+ OFFSET] - ANCHOR. SYM is the base term
7883 for aliasing purposes. */
7884 return XVECEXP (base, 0, 0);
7887 return op;
7890 /* More elaborate version of recog's offsettable_memref_p predicate
7891 that works around the ??? note of rs6000_mode_dependent_address.
7892 In particular it accepts
7894 (mem:DI (plus:SI (reg/f:SI 31 31) (const_int 32760 [0x7ff8])))
7896 in 32-bit mode, that the recog predicate rejects. */
7898 static bool
7899 rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode)
7901 bool worst_case;
7903 if (!MEM_P (op))
7904 return false;
7906 /* First mimic offsettable_memref_p. */
7907 if (offsettable_address_p (true, GET_MODE (op), XEXP (op, 0)))
7908 return true;
7910 /* offsettable_address_p invokes rs6000_mode_dependent_address, but
7911 the latter predicate knows nothing about the mode of the memory
7912 reference and, therefore, assumes that it is the largest supported
7913 mode (TFmode). As a consequence, legitimate offsettable memory
7914 references are rejected. rs6000_legitimate_offset_address_p contains
7915 the correct logic for the PLUS case of rs6000_mode_dependent_address,
7916 at least with a little bit of help here given that we know the
7917 actual registers used. */
7918 worst_case = ((TARGET_POWERPC64 && GET_MODE_CLASS (reg_mode) == MODE_INT)
7919 || GET_MODE_SIZE (reg_mode) == 4);
7920 return rs6000_legitimate_offset_address_p (GET_MODE (op), XEXP (op, 0),
7921 true, worst_case);
7924 /* Change register usage conditional on target flags. */
7925 static void
7926 rs6000_conditional_register_usage (void)
7928 int i;
7930 if (TARGET_DEBUG_TARGET)
7931 fprintf (stderr, "rs6000_conditional_register_usage called\n");
7933 /* Set MQ register fixed (already call_used) so that it will not be
7934 allocated. */
7935 fixed_regs[64] = 1;
7937 /* 64-bit AIX and Linux reserve GPR13 for thread-private data. */
7938 if (TARGET_64BIT)
7939 fixed_regs[13] = call_used_regs[13]
7940 = call_really_used_regs[13] = 1;
7942 /* Conditionally disable FPRs. */
7943 if (TARGET_SOFT_FLOAT || !TARGET_FPRS)
7944 for (i = 32; i < 64; i++)
7945 fixed_regs[i] = call_used_regs[i]
7946 = call_really_used_regs[i] = 1;
7948 /* The TOC register is not killed across calls in a way that is
7949 visible to the compiler. */
7950 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
7951 call_really_used_regs[2] = 0;
7953 if (DEFAULT_ABI == ABI_V4
7954 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7955 && flag_pic == 2)
7956 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7958 if (DEFAULT_ABI == ABI_V4
7959 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM
7960 && flag_pic == 1)
7961 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7962 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7963 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7965 if (DEFAULT_ABI == ABI_DARWIN
7966 && PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
7967 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7968 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7969 = call_really_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7971 if (TARGET_TOC && TARGET_MINIMAL_TOC)
7972 fixed_regs[RS6000_PIC_OFFSET_TABLE_REGNUM]
7973 = call_used_regs[RS6000_PIC_OFFSET_TABLE_REGNUM] = 1;
7975 if (TARGET_SPE)
7977 global_regs[SPEFSCR_REGNO] = 1;
7978 /* We used to use r14 as FIXED_SCRATCH to address SPE 64-bit
7979 registers in prologues and epilogues. We no longer use r14
7980 for FIXED_SCRATCH, but we're keeping r14 out of the allocation
7981 pool for link-compatibility with older versions of GCC. Once
7982 "old" code has died out, we can return r14 to the allocation
7983 pool. */
7984 fixed_regs[14]
7985 = call_used_regs[14]
7986 = call_really_used_regs[14] = 1;
7989 if (!TARGET_ALTIVEC && !TARGET_VSX)
7991 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
7992 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
7993 call_really_used_regs[VRSAVE_REGNO] = 1;
7996 if (TARGET_ALTIVEC || TARGET_VSX)
7997 global_regs[VSCR_REGNO] = 1;
7999 if (TARGET_ALTIVEC_ABI)
8001 for (i = FIRST_ALTIVEC_REGNO; i < FIRST_ALTIVEC_REGNO + 20; ++i)
8002 call_used_regs[i] = call_really_used_regs[i] = 1;
8004 /* AIX reserves VR20:31 in non-extended ABI mode. */
8005 if (TARGET_XCOFF)
8006 for (i = FIRST_ALTIVEC_REGNO + 20; i < FIRST_ALTIVEC_REGNO + 32; ++i)
8007 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
8012 /* Output insns to set DEST equal to the constant SOURCE as a series of
8013 lis, ori and shl instructions and return TRUE. */
8015 bool
8016 rs6000_emit_set_const (rtx dest, rtx source)
8018 machine_mode mode = GET_MODE (dest);
8019 rtx temp, set;
8020 rtx_insn *insn;
8021 HOST_WIDE_INT c;
8023 gcc_checking_assert (CONST_INT_P (source));
8024 c = INTVAL (source);
8025 switch (mode)
8027 case QImode:
8028 case HImode:
8029 emit_insn (gen_rtx_SET (VOIDmode, dest, source));
8030 return true;
8032 case SImode:
8033 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (SImode);
8035 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (temp),
8036 GEN_INT (c & ~(HOST_WIDE_INT) 0xffff)));
8037 emit_insn (gen_rtx_SET (VOIDmode, dest,
8038 gen_rtx_IOR (SImode, copy_rtx (temp),
8039 GEN_INT (c & 0xffff))));
8040 break;
8042 case DImode:
8043 if (!TARGET_POWERPC64)
8045 rtx hi, lo;
8047 hi = operand_subword_force (copy_rtx (dest), WORDS_BIG_ENDIAN == 0,
8048 DImode);
8049 lo = operand_subword_force (dest, WORDS_BIG_ENDIAN != 0,
8050 DImode);
8051 emit_move_insn (hi, GEN_INT (c >> 32));
8052 c = ((c & 0xffffffff) ^ 0x80000000) - 0x80000000;
8053 emit_move_insn (lo, GEN_INT (c));
8055 else
8056 rs6000_emit_set_long_const (dest, c);
8057 break;
8059 default:
8060 gcc_unreachable ();
8063 insn = get_last_insn ();
8064 set = single_set (insn);
8065 if (! CONSTANT_P (SET_SRC (set)))
8066 set_unique_reg_note (insn, REG_EQUAL, GEN_INT (c));
8068 return true;
8071 /* Subroutine of rs6000_emit_set_const, handling PowerPC64 DImode.
8072 Output insns to set DEST equal to the constant C as a series of
8073 lis, ori and shl instructions. */
8075 static void
8076 rs6000_emit_set_long_const (rtx dest, HOST_WIDE_INT c)
8078 rtx temp;
8079 HOST_WIDE_INT ud1, ud2, ud3, ud4;
8081 ud1 = c & 0xffff;
8082 c = c >> 16;
8083 ud2 = c & 0xffff;
8084 c = c >> 16;
8085 ud3 = c & 0xffff;
8086 c = c >> 16;
8087 ud4 = c & 0xffff;
8089 if ((ud4 == 0xffff && ud3 == 0xffff && ud2 == 0xffff && (ud1 & 0x8000))
8090 || (ud4 == 0 && ud3 == 0 && ud2 == 0 && ! (ud1 & 0x8000)))
8091 emit_move_insn (dest, GEN_INT ((ud1 ^ 0x8000) - 0x8000));
8093 else if ((ud4 == 0xffff && ud3 == 0xffff && (ud2 & 0x8000))
8094 || (ud4 == 0 && ud3 == 0 && ! (ud2 & 0x8000)))
8096 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8098 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8099 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8100 if (ud1 != 0)
8101 emit_move_insn (dest,
8102 gen_rtx_IOR (DImode, copy_rtx (temp),
8103 GEN_INT (ud1)));
8105 else if (ud3 == 0 && ud4 == 0)
8107 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8109 gcc_assert (ud2 & 0x8000);
8110 emit_move_insn (copy_rtx (temp),
8111 GEN_INT (((ud2 << 16) ^ 0x80000000) - 0x80000000));
8112 if (ud1 != 0)
8113 emit_move_insn (copy_rtx (temp),
8114 gen_rtx_IOR (DImode, copy_rtx (temp),
8115 GEN_INT (ud1)));
8116 emit_move_insn (dest,
8117 gen_rtx_ZERO_EXTEND (DImode,
8118 gen_lowpart (SImode,
8119 copy_rtx (temp))));
8121 else if ((ud4 == 0xffff && (ud3 & 0x8000))
8122 || (ud4 == 0 && ! (ud3 & 0x8000)))
8124 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8126 emit_move_insn (copy_rtx (temp),
8127 GEN_INT (((ud3 << 16) ^ 0x80000000) - 0x80000000));
8128 if (ud2 != 0)
8129 emit_move_insn (copy_rtx (temp),
8130 gen_rtx_IOR (DImode, copy_rtx (temp),
8131 GEN_INT (ud2)));
8132 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8133 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8134 GEN_INT (16)));
8135 if (ud1 != 0)
8136 emit_move_insn (dest,
8137 gen_rtx_IOR (DImode, copy_rtx (temp),
8138 GEN_INT (ud1)));
8140 else
8142 temp = !can_create_pseudo_p () ? dest : gen_reg_rtx (DImode);
8144 emit_move_insn (copy_rtx (temp),
8145 GEN_INT (((ud4 << 16) ^ 0x80000000) - 0x80000000));
8146 if (ud3 != 0)
8147 emit_move_insn (copy_rtx (temp),
8148 gen_rtx_IOR (DImode, copy_rtx (temp),
8149 GEN_INT (ud3)));
8151 emit_move_insn (ud2 != 0 || ud1 != 0 ? copy_rtx (temp) : dest,
8152 gen_rtx_ASHIFT (DImode, copy_rtx (temp),
8153 GEN_INT (32)));
8154 if (ud2 != 0)
8155 emit_move_insn (ud1 != 0 ? copy_rtx (temp) : dest,
8156 gen_rtx_IOR (DImode, copy_rtx (temp),
8157 GEN_INT (ud2 << 16)));
8158 if (ud1 != 0)
8159 emit_move_insn (dest,
8160 gen_rtx_IOR (DImode, copy_rtx (temp),
8161 GEN_INT (ud1)));
8165 /* Helper for the following. Get rid of [r+r] memory refs
8166 in cases where it won't work (TImode, TFmode, TDmode, PTImode). */
8168 static void
8169 rs6000_eliminate_indexed_memrefs (rtx operands[2])
8171 if (reload_in_progress)
8172 return;
8174 if (GET_CODE (operands[0]) == MEM
8175 && GET_CODE (XEXP (operands[0], 0)) != REG
8176 && ! legitimate_constant_pool_address_p (XEXP (operands[0], 0),
8177 GET_MODE (operands[0]), false))
8178 operands[0]
8179 = replace_equiv_address (operands[0],
8180 copy_addr_to_reg (XEXP (operands[0], 0)));
8182 if (GET_CODE (operands[1]) == MEM
8183 && GET_CODE (XEXP (operands[1], 0)) != REG
8184 && ! legitimate_constant_pool_address_p (XEXP (operands[1], 0),
8185 GET_MODE (operands[1]), false))
8186 operands[1]
8187 = replace_equiv_address (operands[1],
8188 copy_addr_to_reg (XEXP (operands[1], 0)));
8191 /* Generate a vector of constants to permute MODE for a little-endian
8192 storage operation by swapping the two halves of a vector. */
8193 static rtvec
8194 rs6000_const_vec (machine_mode mode)
8196 int i, subparts;
8197 rtvec v;
8199 switch (mode)
8201 case V1TImode:
8202 subparts = 1;
8203 break;
8204 case V2DFmode:
8205 case V2DImode:
8206 subparts = 2;
8207 break;
8208 case V4SFmode:
8209 case V4SImode:
8210 subparts = 4;
8211 break;
8212 case V8HImode:
8213 subparts = 8;
8214 break;
8215 case V16QImode:
8216 subparts = 16;
8217 break;
8218 default:
8219 gcc_unreachable();
8222 v = rtvec_alloc (subparts);
8224 for (i = 0; i < subparts / 2; ++i)
8225 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i + subparts / 2);
8226 for (i = subparts / 2; i < subparts; ++i)
8227 RTVEC_ELT (v, i) = gen_rtx_CONST_INT (DImode, i - subparts / 2);
8229 return v;
8232 /* Generate a permute rtx that represents an lxvd2x, stxvd2x, or xxpermdi
8233 for a VSX load or store operation. */
8235 rs6000_gen_le_vsx_permute (rtx source, machine_mode mode)
8237 rtx par = gen_rtx_PARALLEL (VOIDmode, rs6000_const_vec (mode));
8238 return gen_rtx_VEC_SELECT (mode, source, par);
8241 /* Emit a little-endian load from vector memory location SOURCE to VSX
8242 register DEST in mode MODE. The load is done with two permuting
8243 insn's that represent an lxvd2x and xxpermdi. */
8244 void
8245 rs6000_emit_le_vsx_load (rtx dest, rtx source, machine_mode mode)
8247 rtx tmp, permute_mem, permute_reg;
8249 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8250 V1TImode). */
8251 if (mode == TImode || mode == V1TImode)
8253 mode = V2DImode;
8254 dest = gen_lowpart (V2DImode, dest);
8255 source = adjust_address (source, V2DImode, 0);
8258 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (dest) : dest;
8259 permute_mem = rs6000_gen_le_vsx_permute (source, mode);
8260 permute_reg = rs6000_gen_le_vsx_permute (tmp, mode);
8261 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_mem));
8262 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_reg));
8265 /* Emit a little-endian store to vector memory location DEST from VSX
8266 register SOURCE in mode MODE. The store is done with two permuting
8267 insn's that represent an xxpermdi and an stxvd2x. */
8268 void
8269 rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
8271 rtx tmp, permute_src, permute_tmp;
8273 /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
8274 V1TImode). */
8275 if (mode == TImode || mode == V1TImode)
8277 mode = V2DImode;
8278 dest = adjust_address (dest, V2DImode, 0);
8279 source = gen_lowpart (V2DImode, source);
8282 tmp = can_create_pseudo_p () ? gen_reg_rtx_and_attrs (source) : source;
8283 permute_src = rs6000_gen_le_vsx_permute (source, mode);
8284 permute_tmp = rs6000_gen_le_vsx_permute (tmp, mode);
8285 emit_insn (gen_rtx_SET (VOIDmode, tmp, permute_src));
8286 emit_insn (gen_rtx_SET (VOIDmode, dest, permute_tmp));
8289 /* Emit a sequence representing a little-endian VSX load or store,
8290 moving data from SOURCE to DEST in mode MODE. This is done
8291 separately from rs6000_emit_move to ensure it is called only
8292 during expand. LE VSX loads and stores introduced later are
8293 handled with a split. The expand-time RTL generation allows
8294 us to optimize away redundant pairs of register-permutes. */
8295 void
8296 rs6000_emit_le_vsx_move (rtx dest, rtx source, machine_mode mode)
8298 gcc_assert (!BYTES_BIG_ENDIAN
8299 && VECTOR_MEM_VSX_P (mode)
8300 && !gpr_or_gpr_p (dest, source)
8301 && (MEM_P (source) ^ MEM_P (dest)));
8303 if (MEM_P (source))
8305 gcc_assert (REG_P (dest) || GET_CODE (dest) == SUBREG);
8306 rs6000_emit_le_vsx_load (dest, source, mode);
8308 else
8310 if (!REG_P (source))
8311 source = force_reg (mode, source);
8312 rs6000_emit_le_vsx_store (dest, source, mode);
8316 /* Emit a move from SOURCE to DEST in mode MODE. */
8317 void
8318 rs6000_emit_move (rtx dest, rtx source, machine_mode mode)
8320 rtx operands[2];
8321 operands[0] = dest;
8322 operands[1] = source;
8324 if (TARGET_DEBUG_ADDR)
8326 fprintf (stderr,
8327 "\nrs6000_emit_move: mode = %s, reload_in_progress = %d, "
8328 "reload_completed = %d, can_create_pseudos = %d.\ndest:\n",
8329 GET_MODE_NAME (mode),
8330 reload_in_progress,
8331 reload_completed,
8332 can_create_pseudo_p ());
8333 debug_rtx (dest);
8334 fprintf (stderr, "source:\n");
8335 debug_rtx (source);
8338 /* Sanity checks. Check that we get CONST_DOUBLE only when we should. */
8339 if (CONST_WIDE_INT_P (operands[1])
8340 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
8342 /* This should be fixed with the introduction of CONST_WIDE_INT. */
8343 gcc_unreachable ();
8346 /* Check if GCC is setting up a block move that will end up using FP
8347 registers as temporaries. We must make sure this is acceptable. */
8348 if (GET_CODE (operands[0]) == MEM
8349 && GET_CODE (operands[1]) == MEM
8350 && mode == DImode
8351 && (SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[0]))
8352 || SLOW_UNALIGNED_ACCESS (DImode, MEM_ALIGN (operands[1])))
8353 && ! (SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[0]) > 32
8354 ? 32 : MEM_ALIGN (operands[0])))
8355 || SLOW_UNALIGNED_ACCESS (SImode, (MEM_ALIGN (operands[1]) > 32
8356 ? 32
8357 : MEM_ALIGN (operands[1]))))
8358 && ! MEM_VOLATILE_P (operands [0])
8359 && ! MEM_VOLATILE_P (operands [1]))
8361 emit_move_insn (adjust_address (operands[0], SImode, 0),
8362 adjust_address (operands[1], SImode, 0));
8363 emit_move_insn (adjust_address (copy_rtx (operands[0]), SImode, 4),
8364 adjust_address (copy_rtx (operands[1]), SImode, 4));
8365 return;
8368 if (can_create_pseudo_p () && GET_CODE (operands[0]) == MEM
8369 && !gpc_reg_operand (operands[1], mode))
8370 operands[1] = force_reg (mode, operands[1]);
8372 /* Recognize the case where operand[1] is a reference to thread-local
8373 data and load its address to a register. */
8374 if (tls_referenced_p (operands[1]))
8376 enum tls_model model;
8377 rtx tmp = operands[1];
8378 rtx addend = NULL;
8380 if (GET_CODE (tmp) == CONST && GET_CODE (XEXP (tmp, 0)) == PLUS)
8382 addend = XEXP (XEXP (tmp, 0), 1);
8383 tmp = XEXP (XEXP (tmp, 0), 0);
8386 gcc_assert (GET_CODE (tmp) == SYMBOL_REF);
8387 model = SYMBOL_REF_TLS_MODEL (tmp);
8388 gcc_assert (model != 0);
8390 tmp = rs6000_legitimize_tls_address (tmp, model);
8391 if (addend)
8393 tmp = gen_rtx_PLUS (mode, tmp, addend);
8394 tmp = force_operand (tmp, operands[0]);
8396 operands[1] = tmp;
8399 /* Handle the case where reload calls us with an invalid address. */
8400 if (reload_in_progress && mode == Pmode
8401 && (! general_operand (operands[1], mode)
8402 || ! nonimmediate_operand (operands[0], mode)))
8403 goto emit_set;
8405 /* 128-bit constant floating-point values on Darwin should really be
8406 loaded as two parts. */
8407 if (!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128
8408 && mode == TFmode && GET_CODE (operands[1]) == CONST_DOUBLE)
8410 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode, 0),
8411 simplify_gen_subreg (DFmode, operands[1], mode, 0),
8412 DFmode);
8413 rs6000_emit_move (simplify_gen_subreg (DFmode, operands[0], mode,
8414 GET_MODE_SIZE (DFmode)),
8415 simplify_gen_subreg (DFmode, operands[1], mode,
8416 GET_MODE_SIZE (DFmode)),
8417 DFmode);
8418 return;
8421 if (reload_in_progress && cfun->machine->sdmode_stack_slot != NULL_RTX)
8422 cfun->machine->sdmode_stack_slot =
8423 eliminate_regs (cfun->machine->sdmode_stack_slot, VOIDmode, NULL_RTX);
8426 /* Transform (p0:DD, (SUBREG:DD p1:SD)) to ((SUBREG:SD p0:DD),
8427 p1:SD) if p1 is not of floating point class and p0 is spilled as
8428 we can have no analogous movsd_store for this. */
8429 if (lra_in_progress && mode == DDmode
8430 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8431 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8432 && GET_CODE (operands[1]) == SUBREG && REG_P (SUBREG_REG (operands[1]))
8433 && GET_MODE (SUBREG_REG (operands[1])) == SDmode)
8435 enum reg_class cl;
8436 int regno = REGNO (SUBREG_REG (operands[1]));
8438 if (regno >= FIRST_PSEUDO_REGISTER)
8440 cl = reg_preferred_class (regno);
8441 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][1];
8443 if (regno >= 0 && ! FP_REGNO_P (regno))
8445 mode = SDmode;
8446 operands[0] = gen_lowpart_SUBREG (SDmode, operands[0]);
8447 operands[1] = SUBREG_REG (operands[1]);
8450 if (lra_in_progress
8451 && mode == SDmode
8452 && REG_P (operands[0]) && REGNO (operands[0]) >= FIRST_PSEUDO_REGISTER
8453 && reg_preferred_class (REGNO (operands[0])) == NO_REGS
8454 && (REG_P (operands[1])
8455 || (GET_CODE (operands[1]) == SUBREG
8456 && REG_P (SUBREG_REG (operands[1])))))
8458 int regno = REGNO (GET_CODE (operands[1]) == SUBREG
8459 ? SUBREG_REG (operands[1]) : operands[1]);
8460 enum reg_class cl;
8462 if (regno >= FIRST_PSEUDO_REGISTER)
8464 cl = reg_preferred_class (regno);
8465 gcc_assert (cl != NO_REGS);
8466 regno = ira_class_hard_regs[cl][0];
8468 if (FP_REGNO_P (regno))
8470 if (GET_MODE (operands[0]) != DDmode)
8471 operands[0] = gen_rtx_SUBREG (DDmode, operands[0], 0);
8472 emit_insn (gen_movsd_store (operands[0], operands[1]));
8474 else if (INT_REGNO_P (regno))
8475 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8476 else
8477 gcc_unreachable();
8478 return;
8480 /* Transform ((SUBREG:DD p0:SD), p1:DD) to (p0:SD, (SUBREG:SD
8481 p:DD)) if p0 is not of floating point class and p1 is spilled as
8482 we can have no analogous movsd_load for this. */
8483 if (lra_in_progress && mode == DDmode
8484 && GET_CODE (operands[0]) == SUBREG && REG_P (SUBREG_REG (operands[0]))
8485 && GET_MODE (SUBREG_REG (operands[0])) == SDmode
8486 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8487 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8489 enum reg_class cl;
8490 int regno = REGNO (SUBREG_REG (operands[0]));
8492 if (regno >= FIRST_PSEUDO_REGISTER)
8494 cl = reg_preferred_class (regno);
8495 regno = cl == NO_REGS ? -1 : ira_class_hard_regs[cl][0];
8497 if (regno >= 0 && ! FP_REGNO_P (regno))
8499 mode = SDmode;
8500 operands[0] = SUBREG_REG (operands[0]);
8501 operands[1] = gen_lowpart_SUBREG (SDmode, operands[1]);
8504 if (lra_in_progress
8505 && mode == SDmode
8506 && (REG_P (operands[0])
8507 || (GET_CODE (operands[0]) == SUBREG
8508 && REG_P (SUBREG_REG (operands[0]))))
8509 && REG_P (operands[1]) && REGNO (operands[1]) >= FIRST_PSEUDO_REGISTER
8510 && reg_preferred_class (REGNO (operands[1])) == NO_REGS)
8512 int regno = REGNO (GET_CODE (operands[0]) == SUBREG
8513 ? SUBREG_REG (operands[0]) : operands[0]);
8514 enum reg_class cl;
8516 if (regno >= FIRST_PSEUDO_REGISTER)
8518 cl = reg_preferred_class (regno);
8519 gcc_assert (cl != NO_REGS);
8520 regno = ira_class_hard_regs[cl][0];
8522 if (FP_REGNO_P (regno))
8524 if (GET_MODE (operands[1]) != DDmode)
8525 operands[1] = gen_rtx_SUBREG (DDmode, operands[1], 0);
8526 emit_insn (gen_movsd_load (operands[0], operands[1]));
8528 else if (INT_REGNO_P (regno))
8529 emit_insn (gen_movsd_hardfloat (operands[0], operands[1]));
8530 else
8531 gcc_unreachable();
8532 return;
8535 if (reload_in_progress
8536 && mode == SDmode
8537 && cfun->machine->sdmode_stack_slot != NULL_RTX
8538 && MEM_P (operands[0])
8539 && rtx_equal_p (operands[0], cfun->machine->sdmode_stack_slot)
8540 && REG_P (operands[1]))
8542 if (FP_REGNO_P (REGNO (operands[1])))
8544 rtx mem = adjust_address_nv (operands[0], DDmode, 0);
8545 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8546 emit_insn (gen_movsd_store (mem, operands[1]));
8548 else if (INT_REGNO_P (REGNO (operands[1])))
8550 rtx mem = operands[0];
8551 if (BYTES_BIG_ENDIAN)
8552 mem = adjust_address_nv (mem, mode, 4);
8553 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8554 emit_insn (gen_movsd_hardfloat (mem, operands[1]));
8556 else
8557 gcc_unreachable();
8558 return;
8560 if (reload_in_progress
8561 && mode == SDmode
8562 && REG_P (operands[0])
8563 && MEM_P (operands[1])
8564 && cfun->machine->sdmode_stack_slot != NULL_RTX
8565 && rtx_equal_p (operands[1], cfun->machine->sdmode_stack_slot))
8567 if (FP_REGNO_P (REGNO (operands[0])))
8569 rtx mem = adjust_address_nv (operands[1], DDmode, 0);
8570 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8571 emit_insn (gen_movsd_load (operands[0], mem));
8573 else if (INT_REGNO_P (REGNO (operands[0])))
8575 rtx mem = operands[1];
8576 if (BYTES_BIG_ENDIAN)
8577 mem = adjust_address_nv (mem, mode, 4);
8578 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
8579 emit_insn (gen_movsd_hardfloat (operands[0], mem));
8581 else
8582 gcc_unreachable();
8583 return;
8586 /* FIXME: In the long term, this switch statement should go away
8587 and be replaced by a sequence of tests based on things like
8588 mode == Pmode. */
8589 switch (mode)
8591 case HImode:
8592 case QImode:
8593 if (CONSTANT_P (operands[1])
8594 && GET_CODE (operands[1]) != CONST_INT)
8595 operands[1] = force_const_mem (mode, operands[1]);
8596 break;
8598 case TFmode:
8599 case TDmode:
8600 rs6000_eliminate_indexed_memrefs (operands);
8601 /* fall through */
8603 case DFmode:
8604 case DDmode:
8605 case SFmode:
8606 case SDmode:
8607 if (CONSTANT_P (operands[1])
8608 && ! easy_fp_constant (operands[1], mode))
8609 operands[1] = force_const_mem (mode, operands[1]);
8610 break;
8612 case V16QImode:
8613 case V8HImode:
8614 case V4SFmode:
8615 case V4SImode:
8616 case V4HImode:
8617 case V2SFmode:
8618 case V2SImode:
8619 case V1DImode:
8620 case V2DFmode:
8621 case V2DImode:
8622 case V1TImode:
8623 if (CONSTANT_P (operands[1])
8624 && !easy_vector_constant (operands[1], mode))
8625 operands[1] = force_const_mem (mode, operands[1]);
8626 break;
8628 case SImode:
8629 case DImode:
8630 /* Use default pattern for address of ELF small data */
8631 if (TARGET_ELF
8632 && mode == Pmode
8633 && DEFAULT_ABI == ABI_V4
8634 && (GET_CODE (operands[1]) == SYMBOL_REF
8635 || GET_CODE (operands[1]) == CONST)
8636 && small_data_operand (operands[1], mode))
8638 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8639 return;
8642 if (DEFAULT_ABI == ABI_V4
8643 && mode == Pmode && mode == SImode
8644 && flag_pic == 1 && got_operand (operands[1], mode))
8646 emit_insn (gen_movsi_got (operands[0], operands[1]));
8647 return;
8650 if ((TARGET_ELF || DEFAULT_ABI == ABI_DARWIN)
8651 && TARGET_NO_TOC
8652 && ! flag_pic
8653 && mode == Pmode
8654 && CONSTANT_P (operands[1])
8655 && GET_CODE (operands[1]) != HIGH
8656 && GET_CODE (operands[1]) != CONST_INT)
8658 rtx target = (!can_create_pseudo_p ()
8659 ? operands[0]
8660 : gen_reg_rtx (mode));
8662 /* If this is a function address on -mcall-aixdesc,
8663 convert it to the address of the descriptor. */
8664 if (DEFAULT_ABI == ABI_AIX
8665 && GET_CODE (operands[1]) == SYMBOL_REF
8666 && XSTR (operands[1], 0)[0] == '.')
8668 const char *name = XSTR (operands[1], 0);
8669 rtx new_ref;
8670 while (*name == '.')
8671 name++;
8672 new_ref = gen_rtx_SYMBOL_REF (Pmode, name);
8673 CONSTANT_POOL_ADDRESS_P (new_ref)
8674 = CONSTANT_POOL_ADDRESS_P (operands[1]);
8675 SYMBOL_REF_FLAGS (new_ref) = SYMBOL_REF_FLAGS (operands[1]);
8676 SYMBOL_REF_USED (new_ref) = SYMBOL_REF_USED (operands[1]);
8677 SYMBOL_REF_DATA (new_ref) = SYMBOL_REF_DATA (operands[1]);
8678 operands[1] = new_ref;
8681 if (DEFAULT_ABI == ABI_DARWIN)
8683 #if TARGET_MACHO
8684 if (MACHO_DYNAMIC_NO_PIC_P)
8686 /* Take care of any required data indirection. */
8687 operands[1] = rs6000_machopic_legitimize_pic_address (
8688 operands[1], mode, operands[0]);
8689 if (operands[0] != operands[1])
8690 emit_insn (gen_rtx_SET (VOIDmode,
8691 operands[0], operands[1]));
8692 return;
8694 #endif
8695 emit_insn (gen_macho_high (target, operands[1]));
8696 emit_insn (gen_macho_low (operands[0], target, operands[1]));
8697 return;
8700 emit_insn (gen_elf_high (target, operands[1]));
8701 emit_insn (gen_elf_low (operands[0], target, operands[1]));
8702 return;
8705 /* If this is a SYMBOL_REF that refers to a constant pool entry,
8706 and we have put it in the TOC, we just need to make a TOC-relative
8707 reference to it. */
8708 if (TARGET_TOC
8709 && GET_CODE (operands[1]) == SYMBOL_REF
8710 && use_toc_relative_ref (operands[1]))
8711 operands[1] = create_TOC_reference (operands[1], operands[0]);
8712 else if (mode == Pmode
8713 && CONSTANT_P (operands[1])
8714 && GET_CODE (operands[1]) != HIGH
8715 && ((GET_CODE (operands[1]) != CONST_INT
8716 && ! easy_fp_constant (operands[1], mode))
8717 || (GET_CODE (operands[1]) == CONST_INT
8718 && (num_insns_constant (operands[1], mode)
8719 > (TARGET_CMODEL != CMODEL_SMALL ? 3 : 2)))
8720 || (GET_CODE (operands[0]) == REG
8721 && FP_REGNO_P (REGNO (operands[0]))))
8722 && !toc_relative_expr_p (operands[1], false)
8723 && (TARGET_CMODEL == CMODEL_SMALL
8724 || can_create_pseudo_p ()
8725 || (REG_P (operands[0])
8726 && INT_REG_OK_FOR_BASE_P (operands[0], true))))
8729 #if TARGET_MACHO
8730 /* Darwin uses a special PIC legitimizer. */
8731 if (DEFAULT_ABI == ABI_DARWIN && MACHOPIC_INDIRECT)
8733 operands[1] =
8734 rs6000_machopic_legitimize_pic_address (operands[1], mode,
8735 operands[0]);
8736 if (operands[0] != operands[1])
8737 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8738 return;
8740 #endif
8742 /* If we are to limit the number of things we put in the TOC and
8743 this is a symbol plus a constant we can add in one insn,
8744 just put the symbol in the TOC and add the constant. Don't do
8745 this if reload is in progress. */
8746 if (GET_CODE (operands[1]) == CONST
8747 && TARGET_NO_SUM_IN_TOC && ! reload_in_progress
8748 && GET_CODE (XEXP (operands[1], 0)) == PLUS
8749 && add_operand (XEXP (XEXP (operands[1], 0), 1), mode)
8750 && (GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
8751 || GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == SYMBOL_REF)
8752 && ! side_effects_p (operands[0]))
8754 rtx sym =
8755 force_const_mem (mode, XEXP (XEXP (operands[1], 0), 0));
8756 rtx other = XEXP (XEXP (operands[1], 0), 1);
8758 sym = force_reg (mode, sym);
8759 emit_insn (gen_add3_insn (operands[0], sym, other));
8760 return;
8763 operands[1] = force_const_mem (mode, operands[1]);
8765 if (TARGET_TOC
8766 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
8767 && constant_pool_expr_p (XEXP (operands[1], 0))
8768 && ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (
8769 get_pool_constant (XEXP (operands[1], 0)),
8770 get_pool_mode (XEXP (operands[1], 0))))
8772 rtx tocref = create_TOC_reference (XEXP (operands[1], 0),
8773 operands[0]);
8774 operands[1] = gen_const_mem (mode, tocref);
8775 set_mem_alias_set (operands[1], get_TOC_alias_set ());
8778 break;
8780 case TImode:
8781 if (!VECTOR_MEM_VSX_P (TImode))
8782 rs6000_eliminate_indexed_memrefs (operands);
8783 break;
8785 case PTImode:
8786 rs6000_eliminate_indexed_memrefs (operands);
8787 break;
8789 default:
8790 fatal_insn ("bad move", gen_rtx_SET (VOIDmode, dest, source));
8793 /* Above, we may have called force_const_mem which may have returned
8794 an invalid address. If we can, fix this up; otherwise, reload will
8795 have to deal with it. */
8796 if (GET_CODE (operands[1]) == MEM && ! reload_in_progress)
8797 operands[1] = validize_mem (operands[1]);
8799 emit_set:
8800 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[1]));
8803 /* Return true if a structure, union or array containing FIELD should be
8804 accessed using `BLKMODE'.
8806 For the SPE, simd types are V2SI, and gcc can be tempted to put the
8807 entire thing in a DI and use subregs to access the internals.
8808 store_bit_field() will force (subreg:DI (reg:V2SI x))'s to the
8809 back-end. Because a single GPR can hold a V2SI, but not a DI, the
8810 best thing to do is set structs to BLKmode and avoid Severe Tire
8811 Damage.
8813 On e500 v2, DF and DI modes suffer from the same anomaly. DF can
8814 fit into 1, whereas DI still needs two. */
8816 static bool
8817 rs6000_member_type_forces_blk (const_tree field, machine_mode mode)
8819 return ((TARGET_SPE && TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE)
8820 || (TARGET_E500_DOUBLE && mode == DFmode));
8823 /* Nonzero if we can use a floating-point register to pass this arg. */
8824 #define USE_FP_FOR_ARG_P(CUM,MODE) \
8825 (SCALAR_FLOAT_MODE_P (MODE) \
8826 && (CUM)->fregno <= FP_ARG_MAX_REG \
8827 && TARGET_HARD_FLOAT && TARGET_FPRS)
8829 /* Nonzero if we can use an AltiVec register to pass this arg. */
8830 #define USE_ALTIVEC_FOR_ARG_P(CUM,MODE,NAMED) \
8831 (ALTIVEC_OR_VSX_VECTOR_MODE (MODE) \
8832 && (CUM)->vregno <= ALTIVEC_ARG_MAX_REG \
8833 && TARGET_ALTIVEC_ABI \
8834 && (NAMED))
8836 /* Walk down the type tree of TYPE counting consecutive base elements.
8837 If *MODEP is VOIDmode, then set it to the first valid floating point
8838 or vector type. If a non-floating point or vector type is found, or
8839 if a floating point or vector type that doesn't match a non-VOIDmode
8840 *MODEP is found, then return -1, otherwise return the count in the
8841 sub-tree. */
8843 static int
8844 rs6000_aggregate_candidate (const_tree type, machine_mode *modep)
8846 machine_mode mode;
8847 HOST_WIDE_INT size;
8849 switch (TREE_CODE (type))
8851 case REAL_TYPE:
8852 mode = TYPE_MODE (type);
8853 if (!SCALAR_FLOAT_MODE_P (mode))
8854 return -1;
8856 if (*modep == VOIDmode)
8857 *modep = mode;
8859 if (*modep == mode)
8860 return 1;
8862 break;
8864 case COMPLEX_TYPE:
8865 mode = TYPE_MODE (TREE_TYPE (type));
8866 if (!SCALAR_FLOAT_MODE_P (mode))
8867 return -1;
8869 if (*modep == VOIDmode)
8870 *modep = mode;
8872 if (*modep == mode)
8873 return 2;
8875 break;
8877 case VECTOR_TYPE:
8878 if (!TARGET_ALTIVEC_ABI || !TARGET_ALTIVEC)
8879 return -1;
8881 /* Use V4SImode as representative of all 128-bit vector types. */
8882 size = int_size_in_bytes (type);
8883 switch (size)
8885 case 16:
8886 mode = V4SImode;
8887 break;
8888 default:
8889 return -1;
8892 if (*modep == VOIDmode)
8893 *modep = mode;
8895 /* Vector modes are considered to be opaque: two vectors are
8896 equivalent for the purposes of being homogeneous aggregates
8897 if they are the same size. */
8898 if (*modep == mode)
8899 return 1;
8901 break;
8903 case ARRAY_TYPE:
8905 int count;
8906 tree index = TYPE_DOMAIN (type);
8908 /* Can't handle incomplete types nor sizes that are not
8909 fixed. */
8910 if (!COMPLETE_TYPE_P (type)
8911 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8912 return -1;
8914 count = rs6000_aggregate_candidate (TREE_TYPE (type), modep);
8915 if (count == -1
8916 || !index
8917 || !TYPE_MAX_VALUE (index)
8918 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
8919 || !TYPE_MIN_VALUE (index)
8920 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
8921 || count < 0)
8922 return -1;
8924 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
8925 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
8927 /* There must be no padding. */
8928 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8929 return -1;
8931 return count;
8934 case RECORD_TYPE:
8936 int count = 0;
8937 int sub_count;
8938 tree field;
8940 /* Can't handle incomplete types nor sizes that are not
8941 fixed. */
8942 if (!COMPLETE_TYPE_P (type)
8943 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8944 return -1;
8946 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8948 if (TREE_CODE (field) != FIELD_DECL)
8949 continue;
8951 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8952 if (sub_count < 0)
8953 return -1;
8954 count += sub_count;
8957 /* There must be no padding. */
8958 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8959 return -1;
8961 return count;
8964 case UNION_TYPE:
8965 case QUAL_UNION_TYPE:
8967 /* These aren't very interesting except in a degenerate case. */
8968 int count = 0;
8969 int sub_count;
8970 tree field;
8972 /* Can't handle incomplete types nor sizes that are not
8973 fixed. */
8974 if (!COMPLETE_TYPE_P (type)
8975 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
8976 return -1;
8978 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
8980 if (TREE_CODE (field) != FIELD_DECL)
8981 continue;
8983 sub_count = rs6000_aggregate_candidate (TREE_TYPE (field), modep);
8984 if (sub_count < 0)
8985 return -1;
8986 count = count > sub_count ? count : sub_count;
8989 /* There must be no padding. */
8990 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
8991 return -1;
8993 return count;
8996 default:
8997 break;
9000 return -1;
9003 /* If an argument, whose type is described by TYPE and MODE, is a homogeneous
9004 float or vector aggregate that shall be passed in FP/vector registers
9005 according to the ELFv2 ABI, return the homogeneous element mode in
9006 *ELT_MODE and the number of elements in *N_ELTS, and return TRUE.
9008 Otherwise, set *ELT_MODE to MODE and *N_ELTS to 1, and return FALSE. */
9010 static bool
9011 rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
9012 machine_mode *elt_mode,
9013 int *n_elts)
9015 /* Note that we do not accept complex types at the top level as
9016 homogeneous aggregates; these types are handled via the
9017 targetm.calls.split_complex_arg mechanism. Complex types
9018 can be elements of homogeneous aggregates, however. */
9019 if (DEFAULT_ABI == ABI_ELFv2 && type && AGGREGATE_TYPE_P (type))
9021 machine_mode field_mode = VOIDmode;
9022 int field_count = rs6000_aggregate_candidate (type, &field_mode);
9024 if (field_count > 0)
9026 int n_regs = (SCALAR_FLOAT_MODE_P (field_mode)?
9027 (GET_MODE_SIZE (field_mode) + 7) >> 3 : 1);
9029 /* The ELFv2 ABI allows homogeneous aggregates to occupy
9030 up to AGGR_ARG_NUM_REG registers. */
9031 if (field_count * n_regs <= AGGR_ARG_NUM_REG)
9033 if (elt_mode)
9034 *elt_mode = field_mode;
9035 if (n_elts)
9036 *n_elts = field_count;
9037 return true;
9042 if (elt_mode)
9043 *elt_mode = mode;
9044 if (n_elts)
9045 *n_elts = 1;
9046 return false;
9049 /* Return a nonzero value to say to return the function value in
9050 memory, just as large structures are always returned. TYPE will be
9051 the data type of the value, and FNTYPE will be the type of the
9052 function doing the returning, or @code{NULL} for libcalls.
9054 The AIX ABI for the RS/6000 specifies that all structures are
9055 returned in memory. The Darwin ABI does the same.
9057 For the Darwin 64 Bit ABI, a function result can be returned in
9058 registers or in memory, depending on the size of the return data
9059 type. If it is returned in registers, the value occupies the same
9060 registers as it would if it were the first and only function
9061 argument. Otherwise, the function places its result in memory at
9062 the location pointed to by GPR3.
9064 The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
9065 but a draft put them in memory, and GCC used to implement the draft
9066 instead of the final standard. Therefore, aix_struct_return
9067 controls this instead of DEFAULT_ABI; V.4 targets needing backward
9068 compatibility can change DRAFT_V4_STRUCT_RET to override the
9069 default, and -m switches get the final word. See
9070 rs6000_option_override_internal for more details.
9072 The PPC32 SVR4 ABI uses IEEE double extended for long double, if 128-bit
9073 long double support is enabled. These values are returned in memory.
9075 int_size_in_bytes returns -1 for variable size objects, which go in
9076 memory always. The cast to unsigned makes -1 > 8. */
9078 static bool
9079 rs6000_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
9081 /* For the Darwin64 ABI, test if we can fit the return value in regs. */
9082 if (TARGET_MACHO
9083 && rs6000_darwin64_abi
9084 && TREE_CODE (type) == RECORD_TYPE
9085 && int_size_in_bytes (type) > 0)
9087 CUMULATIVE_ARGS valcum;
9088 rtx valret;
9090 valcum.words = 0;
9091 valcum.fregno = FP_ARG_MIN_REG;
9092 valcum.vregno = ALTIVEC_ARG_MIN_REG;
9093 /* Do a trial code generation as if this were going to be passed
9094 as an argument; if any part goes in memory, we return NULL. */
9095 valret = rs6000_darwin64_record_arg (&valcum, type, true, true);
9096 if (valret)
9097 return false;
9098 /* Otherwise fall through to more conventional ABI rules. */
9101 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers */
9102 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (type), type,
9103 NULL, NULL))
9104 return false;
9106 /* The ELFv2 ABI returns aggregates up to 16B in registers */
9107 if (DEFAULT_ABI == ABI_ELFv2 && AGGREGATE_TYPE_P (type)
9108 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) <= 16)
9109 return false;
9111 if (AGGREGATE_TYPE_P (type)
9112 && (aix_struct_return
9113 || (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8))
9114 return true;
9116 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
9117 modes only exist for GCC vector types if -maltivec. */
9118 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI
9119 && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
9120 return false;
9122 /* Return synthetic vectors in memory. */
9123 if (TREE_CODE (type) == VECTOR_TYPE
9124 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
9126 static bool warned_for_return_big_vectors = false;
9127 if (!warned_for_return_big_vectors)
9129 warning (0, "GCC vector returned by reference: "
9130 "non-standard ABI extension with no compatibility guarantee");
9131 warned_for_return_big_vectors = true;
9133 return true;
9136 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && TYPE_MODE (type) == TFmode)
9137 return true;
9139 return false;
9142 /* Specify whether values returned in registers should be at the most
9143 significant end of a register. We want aggregates returned by
9144 value to match the way aggregates are passed to functions. */
9146 static bool
9147 rs6000_return_in_msb (const_tree valtype)
9149 return (DEFAULT_ABI == ABI_ELFv2
9150 && BYTES_BIG_ENDIAN
9151 && AGGREGATE_TYPE_P (valtype)
9152 && FUNCTION_ARG_PADDING (TYPE_MODE (valtype), valtype) == upward);
9155 #ifdef HAVE_AS_GNU_ATTRIBUTE
9156 /* Return TRUE if a call to function FNDECL may be one that
9157 potentially affects the function calling ABI of the object file. */
9159 static bool
9160 call_ABI_of_interest (tree fndecl)
9162 if (symtab->state == EXPANSION)
9164 struct cgraph_node *c_node;
9166 /* Libcalls are always interesting. */
9167 if (fndecl == NULL_TREE)
9168 return true;
9170 /* Any call to an external function is interesting. */
9171 if (DECL_EXTERNAL (fndecl))
9172 return true;
9174 /* Interesting functions that we are emitting in this object file. */
9175 c_node = cgraph_node::get (fndecl);
9176 c_node = c_node->ultimate_alias_target ();
9177 return !c_node->only_called_directly_p ();
9179 return false;
9181 #endif
9183 /* Initialize a variable CUM of type CUMULATIVE_ARGS
9184 for a call to a function whose data type is FNTYPE.
9185 For a library call, FNTYPE is 0 and RETURN_MODE the return value mode.
9187 For incoming args we set the number of arguments in the prototype large
9188 so we never return a PARALLEL. */
9190 void
9191 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
9192 rtx libname ATTRIBUTE_UNUSED, int incoming,
9193 int libcall, int n_named_args,
9194 tree fndecl ATTRIBUTE_UNUSED,
9195 machine_mode return_mode ATTRIBUTE_UNUSED)
9197 static CUMULATIVE_ARGS zero_cumulative;
9199 *cum = zero_cumulative;
9200 cum->words = 0;
9201 cum->fregno = FP_ARG_MIN_REG;
9202 cum->vregno = ALTIVEC_ARG_MIN_REG;
9203 cum->prototype = (fntype && prototype_p (fntype));
9204 cum->call_cookie = ((DEFAULT_ABI == ABI_V4 && libcall)
9205 ? CALL_LIBCALL : CALL_NORMAL);
9206 cum->sysv_gregno = GP_ARG_MIN_REG;
9207 cum->stdarg = stdarg_p (fntype);
9209 cum->nargs_prototype = 0;
9210 if (incoming || cum->prototype)
9211 cum->nargs_prototype = n_named_args;
9213 /* Check for a longcall attribute. */
9214 if ((!fntype && rs6000_default_long_calls)
9215 || (fntype
9216 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype))
9217 && !lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype))))
9218 cum->call_cookie |= CALL_LONG;
9220 if (TARGET_DEBUG_ARG)
9222 fprintf (stderr, "\ninit_cumulative_args:");
9223 if (fntype)
9225 tree ret_type = TREE_TYPE (fntype);
9226 fprintf (stderr, " ret code = %s,",
9227 get_tree_code_name (TREE_CODE (ret_type)));
9230 if (cum->call_cookie & CALL_LONG)
9231 fprintf (stderr, " longcall,");
9233 fprintf (stderr, " proto = %d, nargs = %d\n",
9234 cum->prototype, cum->nargs_prototype);
9237 #ifdef HAVE_AS_GNU_ATTRIBUTE
9238 if (DEFAULT_ABI == ABI_V4)
9240 cum->escapes = call_ABI_of_interest (fndecl);
9241 if (cum->escapes)
9243 tree return_type;
9245 if (fntype)
9247 return_type = TREE_TYPE (fntype);
9248 return_mode = TYPE_MODE (return_type);
9250 else
9251 return_type = lang_hooks.types.type_for_mode (return_mode, 0);
9253 if (return_type != NULL)
9255 if (TREE_CODE (return_type) == RECORD_TYPE
9256 && TYPE_TRANSPARENT_AGGR (return_type))
9258 return_type = TREE_TYPE (first_field (return_type));
9259 return_mode = TYPE_MODE (return_type);
9261 if (AGGREGATE_TYPE_P (return_type)
9262 && ((unsigned HOST_WIDE_INT) int_size_in_bytes (return_type)
9263 <= 8))
9264 rs6000_returns_struct = true;
9266 if (SCALAR_FLOAT_MODE_P (return_mode))
9267 rs6000_passes_float = true;
9268 else if (ALTIVEC_OR_VSX_VECTOR_MODE (return_mode)
9269 || SPE_VECTOR_MODE (return_mode))
9270 rs6000_passes_vector = true;
9273 #endif
9275 if (fntype
9276 && !TARGET_ALTIVEC
9277 && TARGET_ALTIVEC_ABI
9278 && ALTIVEC_VECTOR_MODE (TYPE_MODE (TREE_TYPE (fntype))))
9280 error ("cannot return value in vector register because"
9281 " altivec instructions are disabled, use -maltivec"
9282 " to enable them");
9286 /* Return true if TYPE must be passed on the stack and not in registers. */
9288 static bool
9289 rs6000_must_pass_in_stack (machine_mode mode, const_tree type)
9291 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2 || TARGET_64BIT)
9292 return must_pass_in_stack_var_size (mode, type);
9293 else
9294 return must_pass_in_stack_var_size_or_pad (mode, type);
9297 /* If defined, a C expression which determines whether, and in which
9298 direction, to pad out an argument with extra space. The value
9299 should be of type `enum direction': either `upward' to pad above
9300 the argument, `downward' to pad below, or `none' to inhibit
9301 padding.
9303 For the AIX ABI structs are always stored left shifted in their
9304 argument slot. */
9306 enum direction
9307 function_arg_padding (machine_mode mode, const_tree type)
9309 #ifndef AGGREGATE_PADDING_FIXED
9310 #define AGGREGATE_PADDING_FIXED 0
9311 #endif
9312 #ifndef AGGREGATES_PAD_UPWARD_ALWAYS
9313 #define AGGREGATES_PAD_UPWARD_ALWAYS 0
9314 #endif
9316 if (!AGGREGATE_PADDING_FIXED)
9318 /* GCC used to pass structures of the same size as integer types as
9319 if they were in fact integers, ignoring FUNCTION_ARG_PADDING.
9320 i.e. Structures of size 1 or 2 (or 4 when TARGET_64BIT) were
9321 passed padded downward, except that -mstrict-align further
9322 muddied the water in that multi-component structures of 2 and 4
9323 bytes in size were passed padded upward.
9325 The following arranges for best compatibility with previous
9326 versions of gcc, but removes the -mstrict-align dependency. */
9327 if (BYTES_BIG_ENDIAN)
9329 HOST_WIDE_INT size = 0;
9331 if (mode == BLKmode)
9333 if (type && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST)
9334 size = int_size_in_bytes (type);
9336 else
9337 size = GET_MODE_SIZE (mode);
9339 if (size == 1 || size == 2 || size == 4)
9340 return downward;
9342 return upward;
9345 if (AGGREGATES_PAD_UPWARD_ALWAYS)
9347 if (type != 0 && AGGREGATE_TYPE_P (type))
9348 return upward;
9351 /* Fall back to the default. */
9352 return DEFAULT_FUNCTION_ARG_PADDING (mode, type);
9355 /* If defined, a C expression that gives the alignment boundary, in bits,
9356 of an argument with the specified mode and type. If it is not defined,
9357 PARM_BOUNDARY is used for all arguments.
9359 V.4 wants long longs and doubles to be double word aligned. Just
9360 testing the mode size is a boneheaded way to do this as it means
9361 that other types such as complex int are also double word aligned.
9362 However, we're stuck with this because changing the ABI might break
9363 existing library interfaces.
9365 Doubleword align SPE vectors.
9366 Quadword align Altivec/VSX vectors.
9367 Quadword align large synthetic vector types. */
9369 static unsigned int
9370 rs6000_function_arg_boundary (machine_mode mode, const_tree type)
9372 machine_mode elt_mode;
9373 int n_elts;
9375 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9377 if (DEFAULT_ABI == ABI_V4
9378 && (GET_MODE_SIZE (mode) == 8
9379 || (TARGET_HARD_FLOAT
9380 && TARGET_FPRS
9381 && (mode == TFmode || mode == TDmode))))
9382 return 64;
9383 else if (SPE_VECTOR_MODE (mode)
9384 || (type && TREE_CODE (type) == VECTOR_TYPE
9385 && int_size_in_bytes (type) >= 8
9386 && int_size_in_bytes (type) < 16))
9387 return 64;
9388 else if (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9389 || (type && TREE_CODE (type) == VECTOR_TYPE
9390 && int_size_in_bytes (type) >= 16))
9391 return 128;
9393 /* Aggregate types that need > 8 byte alignment are quadword-aligned
9394 in the parameter area in the ELFv2 ABI, and in the AIX ABI unless
9395 -mcompat-align-parm is used. */
9396 if (((DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm)
9397 || DEFAULT_ABI == ABI_ELFv2)
9398 && type && TYPE_ALIGN (type) > 64)
9400 /* "Aggregate" means any AGGREGATE_TYPE except for single-element
9401 or homogeneous float/vector aggregates here. We already handled
9402 vector aggregates above, but still need to check for float here. */
9403 bool aggregate_p = (AGGREGATE_TYPE_P (type)
9404 && !SCALAR_FLOAT_MODE_P (elt_mode));
9406 /* We used to check for BLKmode instead of the above aggregate type
9407 check. Warn when this results in any difference to the ABI. */
9408 if (aggregate_p != (mode == BLKmode))
9410 static bool warned;
9411 if (!warned && warn_psabi)
9413 warned = true;
9414 inform (input_location,
9415 "the ABI of passing aggregates with %d-byte alignment"
9416 " has changed in GCC 5",
9417 (int) TYPE_ALIGN (type) / BITS_PER_UNIT);
9421 if (aggregate_p)
9422 return 128;
9425 /* Similar for the Darwin64 ABI. Note that for historical reasons we
9426 implement the "aggregate type" check as a BLKmode check here; this
9427 means certain aggregate types are in fact not aligned. */
9428 if (TARGET_MACHO && rs6000_darwin64_abi
9429 && mode == BLKmode
9430 && type && TYPE_ALIGN (type) > 64)
9431 return 128;
9433 return PARM_BOUNDARY;
9436 /* The offset in words to the start of the parameter save area. */
9438 static unsigned int
9439 rs6000_parm_offset (void)
9441 return (DEFAULT_ABI == ABI_V4 ? 2
9442 : DEFAULT_ABI == ABI_ELFv2 ? 4
9443 : 6);
9446 /* For a function parm of MODE and TYPE, return the starting word in
9447 the parameter area. NWORDS of the parameter area are already used. */
9449 static unsigned int
9450 rs6000_parm_start (machine_mode mode, const_tree type,
9451 unsigned int nwords)
9453 unsigned int align;
9455 align = rs6000_function_arg_boundary (mode, type) / PARM_BOUNDARY - 1;
9456 return nwords + (-(rs6000_parm_offset () + nwords) & align);
9459 /* Compute the size (in words) of a function argument. */
9461 static unsigned long
9462 rs6000_arg_size (machine_mode mode, const_tree type)
9464 unsigned long size;
9466 if (mode != BLKmode)
9467 size = GET_MODE_SIZE (mode);
9468 else
9469 size = int_size_in_bytes (type);
9471 if (TARGET_32BIT)
9472 return (size + 3) >> 2;
9473 else
9474 return (size + 7) >> 3;
9477 /* Use this to flush pending int fields. */
9479 static void
9480 rs6000_darwin64_record_arg_advance_flush (CUMULATIVE_ARGS *cum,
9481 HOST_WIDE_INT bitpos, int final)
9483 unsigned int startbit, endbit;
9484 int intregs, intoffset;
9485 machine_mode mode;
9487 /* Handle the situations where a float is taking up the first half
9488 of the GPR, and the other half is empty (typically due to
9489 alignment restrictions). We can detect this by a 8-byte-aligned
9490 int field, or by seeing that this is the final flush for this
9491 argument. Count the word and continue on. */
9492 if (cum->floats_in_gpr == 1
9493 && (cum->intoffset % 64 == 0
9494 || (cum->intoffset == -1 && final)))
9496 cum->words++;
9497 cum->floats_in_gpr = 0;
9500 if (cum->intoffset == -1)
9501 return;
9503 intoffset = cum->intoffset;
9504 cum->intoffset = -1;
9505 cum->floats_in_gpr = 0;
9507 if (intoffset % BITS_PER_WORD != 0)
9509 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9510 MODE_INT, 0);
9511 if (mode == BLKmode)
9513 /* We couldn't find an appropriate mode, which happens,
9514 e.g., in packed structs when there are 3 bytes to load.
9515 Back intoffset back to the beginning of the word in this
9516 case. */
9517 intoffset = intoffset & -BITS_PER_WORD;
9521 startbit = intoffset & -BITS_PER_WORD;
9522 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9523 intregs = (endbit - startbit) / BITS_PER_WORD;
9524 cum->words += intregs;
9525 /* words should be unsigned. */
9526 if ((unsigned)cum->words < (endbit/BITS_PER_WORD))
9528 int pad = (endbit/BITS_PER_WORD) - cum->words;
9529 cum->words += pad;
9533 /* The darwin64 ABI calls for us to recurse down through structs,
9534 looking for elements passed in registers. Unfortunately, we have
9535 to track int register count here also because of misalignments
9536 in powerpc alignment mode. */
9538 static void
9539 rs6000_darwin64_record_arg_advance_recurse (CUMULATIVE_ARGS *cum,
9540 const_tree type,
9541 HOST_WIDE_INT startbitpos)
9543 tree f;
9545 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
9546 if (TREE_CODE (f) == FIELD_DECL)
9548 HOST_WIDE_INT bitpos = startbitpos;
9549 tree ftype = TREE_TYPE (f);
9550 machine_mode mode;
9551 if (ftype == error_mark_node)
9552 continue;
9553 mode = TYPE_MODE (ftype);
9555 if (DECL_SIZE (f) != 0
9556 && tree_fits_uhwi_p (bit_position (f)))
9557 bitpos += int_bit_position (f);
9559 /* ??? FIXME: else assume zero offset. */
9561 if (TREE_CODE (ftype) == RECORD_TYPE)
9562 rs6000_darwin64_record_arg_advance_recurse (cum, ftype, bitpos);
9563 else if (USE_FP_FOR_ARG_P (cum, mode))
9565 unsigned n_fpregs = (GET_MODE_SIZE (mode) + 7) >> 3;
9566 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9567 cum->fregno += n_fpregs;
9568 /* Single-precision floats present a special problem for
9569 us, because they are smaller than an 8-byte GPR, and so
9570 the structure-packing rules combined with the standard
9571 varargs behavior mean that we want to pack float/float
9572 and float/int combinations into a single register's
9573 space. This is complicated by the arg advance flushing,
9574 which works on arbitrarily large groups of int-type
9575 fields. */
9576 if (mode == SFmode)
9578 if (cum->floats_in_gpr == 1)
9580 /* Two floats in a word; count the word and reset
9581 the float count. */
9582 cum->words++;
9583 cum->floats_in_gpr = 0;
9585 else if (bitpos % 64 == 0)
9587 /* A float at the beginning of an 8-byte word;
9588 count it and put off adjusting cum->words until
9589 we see if a arg advance flush is going to do it
9590 for us. */
9591 cum->floats_in_gpr++;
9593 else
9595 /* The float is at the end of a word, preceded
9596 by integer fields, so the arg advance flush
9597 just above has already set cum->words and
9598 everything is taken care of. */
9601 else
9602 cum->words += n_fpregs;
9604 else if (USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
9606 rs6000_darwin64_record_arg_advance_flush (cum, bitpos, 0);
9607 cum->vregno++;
9608 cum->words += 2;
9610 else if (cum->intoffset == -1)
9611 cum->intoffset = bitpos;
9615 /* Check for an item that needs to be considered specially under the darwin 64
9616 bit ABI. These are record types where the mode is BLK or the structure is
9617 8 bytes in size. */
9618 static int
9619 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
9621 return rs6000_darwin64_abi
9622 && ((mode == BLKmode
9623 && TREE_CODE (type) == RECORD_TYPE
9624 && int_size_in_bytes (type) > 0)
9625 || (type && TREE_CODE (type) == RECORD_TYPE
9626 && int_size_in_bytes (type) == 8)) ? 1 : 0;
9629 /* Update the data in CUM to advance over an argument
9630 of mode MODE and data type TYPE.
9631 (TYPE is null for libcalls where that information may not be available.)
9633 Note that for args passed by reference, function_arg will be called
9634 with MODE and TYPE set to that of the pointer to the arg, not the arg
9635 itself. */
9637 static void
9638 rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
9639 const_tree type, bool named, int depth)
9641 machine_mode elt_mode;
9642 int n_elts;
9644 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
9646 /* Only tick off an argument if we're not recursing. */
9647 if (depth == 0)
9648 cum->nargs_prototype--;
9650 #ifdef HAVE_AS_GNU_ATTRIBUTE
9651 if (DEFAULT_ABI == ABI_V4
9652 && cum->escapes)
9654 if (SCALAR_FLOAT_MODE_P (mode))
9655 rs6000_passes_float = true;
9656 else if (named && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
9657 rs6000_passes_vector = true;
9658 else if (SPE_VECTOR_MODE (mode)
9659 && !cum->stdarg
9660 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9661 rs6000_passes_vector = true;
9663 #endif
9665 if (TARGET_ALTIVEC_ABI
9666 && (ALTIVEC_OR_VSX_VECTOR_MODE (elt_mode)
9667 || (type && TREE_CODE (type) == VECTOR_TYPE
9668 && int_size_in_bytes (type) == 16)))
9670 bool stack = false;
9672 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
9674 cum->vregno += n_elts;
9676 if (!TARGET_ALTIVEC)
9677 error ("cannot pass argument in vector register because"
9678 " altivec instructions are disabled, use -maltivec"
9679 " to enable them");
9681 /* PowerPC64 Linux and AIX allocate GPRs for a vector argument
9682 even if it is going to be passed in a vector register.
9683 Darwin does the same for variable-argument functions. */
9684 if (((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
9685 && TARGET_64BIT)
9686 || (cum->stdarg && DEFAULT_ABI != ABI_V4))
9687 stack = true;
9689 else
9690 stack = true;
9692 if (stack)
9694 int align;
9696 /* Vector parameters must be 16-byte aligned. In 32-bit
9697 mode this means we need to take into account the offset
9698 to the parameter save area. In 64-bit mode, they just
9699 have to start on an even word, since the parameter save
9700 area is 16-byte aligned. */
9701 if (TARGET_32BIT)
9702 align = -(rs6000_parm_offset () + cum->words) & 3;
9703 else
9704 align = cum->words & 1;
9705 cum->words += align + rs6000_arg_size (mode, type);
9707 if (TARGET_DEBUG_ARG)
9709 fprintf (stderr, "function_adv: words = %2d, align=%d, ",
9710 cum->words, align);
9711 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s\n",
9712 cum->nargs_prototype, cum->prototype,
9713 GET_MODE_NAME (mode));
9717 else if (TARGET_SPE_ABI && TARGET_SPE && SPE_VECTOR_MODE (mode)
9718 && !cum->stdarg
9719 && cum->sysv_gregno <= GP_ARG_MAX_REG)
9720 cum->sysv_gregno++;
9722 else if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
9724 int size = int_size_in_bytes (type);
9725 /* Variable sized types have size == -1 and are
9726 treated as if consisting entirely of ints.
9727 Pad to 16 byte boundary if needed. */
9728 if (TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
9729 && (cum->words % 2) != 0)
9730 cum->words++;
9731 /* For varargs, we can just go up by the size of the struct. */
9732 if (!named)
9733 cum->words += (size + 7) / 8;
9734 else
9736 /* It is tempting to say int register count just goes up by
9737 sizeof(type)/8, but this is wrong in a case such as
9738 { int; double; int; } [powerpc alignment]. We have to
9739 grovel through the fields for these too. */
9740 cum->intoffset = 0;
9741 cum->floats_in_gpr = 0;
9742 rs6000_darwin64_record_arg_advance_recurse (cum, type, 0);
9743 rs6000_darwin64_record_arg_advance_flush (cum,
9744 size * BITS_PER_UNIT, 1);
9746 if (TARGET_DEBUG_ARG)
9748 fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
9749 cum->words, TYPE_ALIGN (type), size);
9750 fprintf (stderr,
9751 "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
9752 cum->nargs_prototype, cum->prototype,
9753 GET_MODE_NAME (mode));
9756 else if (DEFAULT_ABI == ABI_V4)
9758 if (TARGET_HARD_FLOAT && TARGET_FPRS
9759 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
9760 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
9761 || (mode == TFmode && !TARGET_IEEEQUAD)
9762 || mode == SDmode || mode == DDmode || mode == TDmode))
9764 /* _Decimal128 must use an even/odd register pair. This assumes
9765 that the register number is odd when fregno is odd. */
9766 if (mode == TDmode && (cum->fregno % 2) == 1)
9767 cum->fregno++;
9769 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
9770 <= FP_ARG_V4_MAX_REG)
9771 cum->fregno += (GET_MODE_SIZE (mode) + 7) >> 3;
9772 else
9774 cum->fregno = FP_ARG_V4_MAX_REG + 1;
9775 if (mode == DFmode || mode == TFmode
9776 || mode == DDmode || mode == TDmode)
9777 cum->words += cum->words & 1;
9778 cum->words += rs6000_arg_size (mode, type);
9781 else
9783 int n_words = rs6000_arg_size (mode, type);
9784 int gregno = cum->sysv_gregno;
9786 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
9787 (r7,r8) or (r9,r10). As does any other 2 word item such
9788 as complex int due to a historical mistake. */
9789 if (n_words == 2)
9790 gregno += (1 - gregno) & 1;
9792 /* Multi-reg args are not split between registers and stack. */
9793 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9795 /* Long long and SPE vectors are aligned on the stack.
9796 So are other 2 word items such as complex int due to
9797 a historical mistake. */
9798 if (n_words == 2)
9799 cum->words += cum->words & 1;
9800 cum->words += n_words;
9803 /* Note: continuing to accumulate gregno past when we've started
9804 spilling to the stack indicates the fact that we've started
9805 spilling to the stack to expand_builtin_saveregs. */
9806 cum->sysv_gregno = gregno + n_words;
9809 if (TARGET_DEBUG_ARG)
9811 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9812 cum->words, cum->fregno);
9813 fprintf (stderr, "gregno = %2d, nargs = %4d, proto = %d, ",
9814 cum->sysv_gregno, cum->nargs_prototype, cum->prototype);
9815 fprintf (stderr, "mode = %4s, named = %d\n",
9816 GET_MODE_NAME (mode), named);
9819 else
9821 int n_words = rs6000_arg_size (mode, type);
9822 int start_words = cum->words;
9823 int align_words = rs6000_parm_start (mode, type, start_words);
9825 cum->words = align_words + n_words;
9827 if (SCALAR_FLOAT_MODE_P (elt_mode)
9828 && TARGET_HARD_FLOAT && TARGET_FPRS)
9830 /* _Decimal128 must be passed in an even/odd float register pair.
9831 This assumes that the register number is odd when fregno is
9832 odd. */
9833 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
9834 cum->fregno++;
9835 cum->fregno += n_elts * ((GET_MODE_SIZE (elt_mode) + 7) >> 3);
9838 if (TARGET_DEBUG_ARG)
9840 fprintf (stderr, "function_adv: words = %2d, fregno = %2d, ",
9841 cum->words, cum->fregno);
9842 fprintf (stderr, "nargs = %4d, proto = %d, mode = %4s, ",
9843 cum->nargs_prototype, cum->prototype, GET_MODE_NAME (mode));
9844 fprintf (stderr, "named = %d, align = %d, depth = %d\n",
9845 named, align_words - start_words, depth);
9850 static void
9851 rs6000_function_arg_advance (cumulative_args_t cum, machine_mode mode,
9852 const_tree type, bool named)
9854 rs6000_function_arg_advance_1 (get_cumulative_args (cum), mode, type, named,
9858 static rtx
9859 spe_build_register_parallel (machine_mode mode, int gregno)
9861 rtx r1, r3, r5, r7;
9863 switch (mode)
9865 case DFmode:
9866 r1 = gen_rtx_REG (DImode, gregno);
9867 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9868 return gen_rtx_PARALLEL (mode, gen_rtvec (1, r1));
9870 case DCmode:
9871 case TFmode:
9872 r1 = gen_rtx_REG (DImode, gregno);
9873 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9874 r3 = gen_rtx_REG (DImode, gregno + 2);
9875 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9876 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r3));
9878 case TCmode:
9879 r1 = gen_rtx_REG (DImode, gregno);
9880 r1 = gen_rtx_EXPR_LIST (VOIDmode, r1, const0_rtx);
9881 r3 = gen_rtx_REG (DImode, gregno + 2);
9882 r3 = gen_rtx_EXPR_LIST (VOIDmode, r3, GEN_INT (8));
9883 r5 = gen_rtx_REG (DImode, gregno + 4);
9884 r5 = gen_rtx_EXPR_LIST (VOIDmode, r5, GEN_INT (16));
9885 r7 = gen_rtx_REG (DImode, gregno + 6);
9886 r7 = gen_rtx_EXPR_LIST (VOIDmode, r7, GEN_INT (24));
9887 return gen_rtx_PARALLEL (mode, gen_rtvec (4, r1, r3, r5, r7));
9889 default:
9890 gcc_unreachable ();
9894 /* Determine where to put a SIMD argument on the SPE. */
9895 static rtx
9896 rs6000_spe_function_arg (const CUMULATIVE_ARGS *cum, machine_mode mode,
9897 const_tree type)
9899 int gregno = cum->sysv_gregno;
9901 /* On E500 v2, double arithmetic is done on the full 64-bit GPR, but
9902 are passed and returned in a pair of GPRs for ABI compatibility. */
9903 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode
9904 || mode == DCmode || mode == TCmode))
9906 int n_words = rs6000_arg_size (mode, type);
9908 /* Doubles go in an odd/even register pair (r5/r6, etc). */
9909 if (mode == DFmode)
9910 gregno += (1 - gregno) & 1;
9912 /* Multi-reg args are not split between registers and stack. */
9913 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
9914 return NULL_RTX;
9916 return spe_build_register_parallel (mode, gregno);
9918 if (cum->stdarg)
9920 int n_words = rs6000_arg_size (mode, type);
9922 /* SPE vectors are put in odd registers. */
9923 if (n_words == 2 && (gregno & 1) == 0)
9924 gregno += 1;
9926 if (gregno + n_words - 1 <= GP_ARG_MAX_REG)
9928 rtx r1, r2;
9929 machine_mode m = SImode;
9931 r1 = gen_rtx_REG (m, gregno);
9932 r1 = gen_rtx_EXPR_LIST (m, r1, const0_rtx);
9933 r2 = gen_rtx_REG (m, gregno + 1);
9934 r2 = gen_rtx_EXPR_LIST (m, r2, GEN_INT (4));
9935 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
9937 else
9938 return NULL_RTX;
9940 else
9942 if (gregno <= GP_ARG_MAX_REG)
9943 return gen_rtx_REG (mode, gregno);
9944 else
9945 return NULL_RTX;
9949 /* A subroutine of rs6000_darwin64_record_arg. Assign the bits of the
9950 structure between cum->intoffset and bitpos to integer registers. */
9952 static void
9953 rs6000_darwin64_record_arg_flush (CUMULATIVE_ARGS *cum,
9954 HOST_WIDE_INT bitpos, rtx rvec[], int *k)
9956 machine_mode mode;
9957 unsigned int regno;
9958 unsigned int startbit, endbit;
9959 int this_regno, intregs, intoffset;
9960 rtx reg;
9962 if (cum->intoffset == -1)
9963 return;
9965 intoffset = cum->intoffset;
9966 cum->intoffset = -1;
9968 /* If this is the trailing part of a word, try to only load that
9969 much into the register. Otherwise load the whole register. Note
9970 that in the latter case we may pick up unwanted bits. It's not a
9971 problem at the moment but may wish to revisit. */
9973 if (intoffset % BITS_PER_WORD != 0)
9975 mode = mode_for_size (BITS_PER_WORD - intoffset % BITS_PER_WORD,
9976 MODE_INT, 0);
9977 if (mode == BLKmode)
9979 /* We couldn't find an appropriate mode, which happens,
9980 e.g., in packed structs when there are 3 bytes to load.
9981 Back intoffset back to the beginning of the word in this
9982 case. */
9983 intoffset = intoffset & -BITS_PER_WORD;
9984 mode = word_mode;
9987 else
9988 mode = word_mode;
9990 startbit = intoffset & -BITS_PER_WORD;
9991 endbit = (bitpos + BITS_PER_WORD - 1) & -BITS_PER_WORD;
9992 intregs = (endbit - startbit) / BITS_PER_WORD;
9993 this_regno = cum->words + intoffset / BITS_PER_WORD;
9995 if (intregs > 0 && intregs > GP_ARG_NUM_REG - this_regno)
9996 cum->use_stack = 1;
9998 intregs = MIN (intregs, GP_ARG_NUM_REG - this_regno);
9999 if (intregs <= 0)
10000 return;
10002 intoffset /= BITS_PER_UNIT;
10005 regno = GP_ARG_MIN_REG + this_regno;
10006 reg = gen_rtx_REG (mode, regno);
10007 rvec[(*k)++] =
10008 gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
10010 this_regno += 1;
10011 intoffset = (intoffset | (UNITS_PER_WORD-1)) + 1;
10012 mode = word_mode;
10013 intregs -= 1;
10015 while (intregs > 0);
10018 /* Recursive workhorse for the following. */
10020 static void
10021 rs6000_darwin64_record_arg_recurse (CUMULATIVE_ARGS *cum, const_tree type,
10022 HOST_WIDE_INT startbitpos, rtx rvec[],
10023 int *k)
10025 tree f;
10027 for (f = TYPE_FIELDS (type); f ; f = DECL_CHAIN (f))
10028 if (TREE_CODE (f) == FIELD_DECL)
10030 HOST_WIDE_INT bitpos = startbitpos;
10031 tree ftype = TREE_TYPE (f);
10032 machine_mode mode;
10033 if (ftype == error_mark_node)
10034 continue;
10035 mode = TYPE_MODE (ftype);
10037 if (DECL_SIZE (f) != 0
10038 && tree_fits_uhwi_p (bit_position (f)))
10039 bitpos += int_bit_position (f);
10041 /* ??? FIXME: else assume zero offset. */
10043 if (TREE_CODE (ftype) == RECORD_TYPE)
10044 rs6000_darwin64_record_arg_recurse (cum, ftype, bitpos, rvec, k);
10045 else if (cum->named && USE_FP_FOR_ARG_P (cum, mode))
10047 unsigned n_fpreg = (GET_MODE_SIZE (mode) + 7) >> 3;
10048 #if 0
10049 switch (mode)
10051 case SCmode: mode = SFmode; break;
10052 case DCmode: mode = DFmode; break;
10053 case TCmode: mode = TFmode; break;
10054 default: break;
10056 #endif
10057 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10058 if (cum->fregno + n_fpreg > FP_ARG_MAX_REG + 1)
10060 gcc_assert (cum->fregno == FP_ARG_MAX_REG
10061 && (mode == TFmode || mode == TDmode));
10062 /* Long double or _Decimal128 split over regs and memory. */
10063 mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode : DFmode;
10064 cum->use_stack=1;
10066 rvec[(*k)++]
10067 = gen_rtx_EXPR_LIST (VOIDmode,
10068 gen_rtx_REG (mode, cum->fregno++),
10069 GEN_INT (bitpos / BITS_PER_UNIT));
10070 if (mode == TFmode || mode == TDmode)
10071 cum->fregno++;
10073 else if (cum->named && USE_ALTIVEC_FOR_ARG_P (cum, mode, 1))
10075 rs6000_darwin64_record_arg_flush (cum, bitpos, rvec, k);
10076 rvec[(*k)++]
10077 = gen_rtx_EXPR_LIST (VOIDmode,
10078 gen_rtx_REG (mode, cum->vregno++),
10079 GEN_INT (bitpos / BITS_PER_UNIT));
10081 else if (cum->intoffset == -1)
10082 cum->intoffset = bitpos;
10086 /* For the darwin64 ABI, we want to construct a PARALLEL consisting of
10087 the register(s) to be used for each field and subfield of a struct
10088 being passed by value, along with the offset of where the
10089 register's value may be found in the block. FP fields go in FP
10090 register, vector fields go in vector registers, and everything
10091 else goes in int registers, packed as in memory.
10093 This code is also used for function return values. RETVAL indicates
10094 whether this is the case.
10096 Much of this is taken from the SPARC V9 port, which has a similar
10097 calling convention. */
10099 static rtx
10100 rs6000_darwin64_record_arg (CUMULATIVE_ARGS *orig_cum, const_tree type,
10101 bool named, bool retval)
10103 rtx rvec[FIRST_PSEUDO_REGISTER];
10104 int k = 1, kbase = 1;
10105 HOST_WIDE_INT typesize = int_size_in_bytes (type);
10106 /* This is a copy; modifications are not visible to our caller. */
10107 CUMULATIVE_ARGS copy_cum = *orig_cum;
10108 CUMULATIVE_ARGS *cum = &copy_cum;
10110 /* Pad to 16 byte boundary if needed. */
10111 if (!retval && TYPE_ALIGN (type) >= 2 * BITS_PER_WORD
10112 && (cum->words % 2) != 0)
10113 cum->words++;
10115 cum->intoffset = 0;
10116 cum->use_stack = 0;
10117 cum->named = named;
10119 /* Put entries into rvec[] for individual FP and vector fields, and
10120 for the chunks of memory that go in int regs. Note we start at
10121 element 1; 0 is reserved for an indication of using memory, and
10122 may or may not be filled in below. */
10123 rs6000_darwin64_record_arg_recurse (cum, type, /* startbit pos= */ 0, rvec, &k);
10124 rs6000_darwin64_record_arg_flush (cum, typesize * BITS_PER_UNIT, rvec, &k);
10126 /* If any part of the struct went on the stack put all of it there.
10127 This hack is because the generic code for
10128 FUNCTION_ARG_PARTIAL_NREGS cannot handle cases where the register
10129 parts of the struct are not at the beginning. */
10130 if (cum->use_stack)
10132 if (retval)
10133 return NULL_RTX; /* doesn't go in registers at all */
10134 kbase = 0;
10135 rvec[0] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10137 if (k > 1 || cum->use_stack)
10138 return gen_rtx_PARALLEL (BLKmode, gen_rtvec_v (k - kbase, &rvec[kbase]));
10139 else
10140 return NULL_RTX;
10143 /* Determine where to place an argument in 64-bit mode with 32-bit ABI. */
10145 static rtx
10146 rs6000_mixed_function_arg (machine_mode mode, const_tree type,
10147 int align_words)
10149 int n_units;
10150 int i, k;
10151 rtx rvec[GP_ARG_NUM_REG + 1];
10153 if (align_words >= GP_ARG_NUM_REG)
10154 return NULL_RTX;
10156 n_units = rs6000_arg_size (mode, type);
10158 /* Optimize the simple case where the arg fits in one gpr, except in
10159 the case of BLKmode due to assign_parms assuming that registers are
10160 BITS_PER_WORD wide. */
10161 if (n_units == 0
10162 || (n_units == 1 && mode != BLKmode))
10163 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10165 k = 0;
10166 if (align_words + n_units > GP_ARG_NUM_REG)
10167 /* Not all of the arg fits in gprs. Say that it goes in memory too,
10168 using a magic NULL_RTX component.
10169 This is not strictly correct. Only some of the arg belongs in
10170 memory, not all of it. However, the normal scheme using
10171 function_arg_partial_nregs can result in unusual subregs, eg.
10172 (subreg:SI (reg:DF) 4), which are not handled well. The code to
10173 store the whole arg to memory is often more efficient than code
10174 to store pieces, and we know that space is available in the right
10175 place for the whole arg. */
10176 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10178 i = 0;
10181 rtx r = gen_rtx_REG (SImode, GP_ARG_MIN_REG + align_words);
10182 rtx off = GEN_INT (i++ * 4);
10183 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10185 while (++align_words < GP_ARG_NUM_REG && --n_units != 0);
10187 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10190 /* We have an argument of MODE and TYPE that goes into FPRs or VRs,
10191 but must also be copied into the parameter save area starting at
10192 offset ALIGN_WORDS. Fill in RVEC with the elements corresponding
10193 to the GPRs and/or memory. Return the number of elements used. */
10195 static int
10196 rs6000_psave_function_arg (machine_mode mode, const_tree type,
10197 int align_words, rtx *rvec)
10199 int k = 0;
10201 if (align_words < GP_ARG_NUM_REG)
10203 int n_words = rs6000_arg_size (mode, type);
10205 if (align_words + n_words > GP_ARG_NUM_REG
10206 || mode == BLKmode
10207 || (TARGET_32BIT && TARGET_POWERPC64))
10209 /* If this is partially on the stack, then we only
10210 include the portion actually in registers here. */
10211 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10212 int i = 0;
10214 if (align_words + n_words > GP_ARG_NUM_REG)
10216 /* Not all of the arg fits in gprs. Say that it goes in memory
10217 too, using a magic NULL_RTX component. Also see comment in
10218 rs6000_mixed_function_arg for why the normal
10219 function_arg_partial_nregs scheme doesn't work in this case. */
10220 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10225 rtx r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10226 rtx off = GEN_INT (i++ * GET_MODE_SIZE (rmode));
10227 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10229 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10231 else
10233 /* The whole arg fits in gprs. */
10234 rtx r = gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10235 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, const0_rtx);
10238 else
10240 /* It's entirely in memory. */
10241 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
10244 return k;
10247 /* RVEC is a vector of K components of an argument of mode MODE.
10248 Construct the final function_arg return value from it. */
10250 static rtx
10251 rs6000_finish_function_arg (machine_mode mode, rtx *rvec, int k)
10253 gcc_assert (k >= 1);
10255 /* Avoid returning a PARALLEL in the trivial cases. */
10256 if (k == 1)
10258 if (XEXP (rvec[0], 0) == NULL_RTX)
10259 return NULL_RTX;
10261 if (GET_MODE (XEXP (rvec[0], 0)) == mode)
10262 return XEXP (rvec[0], 0);
10265 return gen_rtx_PARALLEL (mode, gen_rtvec_v (k, rvec));
10268 /* Determine where to put an argument to a function.
10269 Value is zero to push the argument on the stack,
10270 or a hard register in which to store the argument.
10272 MODE is the argument's machine mode.
10273 TYPE is the data type of the argument (as a tree).
10274 This is null for libcalls where that information may
10275 not be available.
10276 CUM is a variable of type CUMULATIVE_ARGS which gives info about
10277 the preceding args and about the function being called. It is
10278 not modified in this routine.
10279 NAMED is nonzero if this argument is a named parameter
10280 (otherwise it is an extra parameter matching an ellipsis).
10282 On RS/6000 the first eight words of non-FP are normally in registers
10283 and the rest are pushed. Under AIX, the first 13 FP args are in registers.
10284 Under V.4, the first 8 FP args are in registers.
10286 If this is floating-point and no prototype is specified, we use
10287 both an FP and integer register (or possibly FP reg and stack). Library
10288 functions (when CALL_LIBCALL is set) always have the proper types for args,
10289 so we can pass the FP value just in one register. emit_library_function
10290 doesn't support PARALLEL anyway.
10292 Note that for args passed by reference, function_arg will be called
10293 with MODE and TYPE set to that of the pointer to the arg, not the arg
10294 itself. */
10296 static rtx
10297 rs6000_function_arg (cumulative_args_t cum_v, machine_mode mode,
10298 const_tree type, bool named)
10300 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10301 enum rs6000_abi abi = DEFAULT_ABI;
10302 machine_mode elt_mode;
10303 int n_elts;
10305 /* Return a marker to indicate whether CR1 needs to set or clear the
10306 bit that V.4 uses to say fp args were passed in registers.
10307 Assume that we don't need the marker for software floating point,
10308 or compiler generated library calls. */
10309 if (mode == VOIDmode)
10311 if (abi == ABI_V4
10312 && (cum->call_cookie & CALL_LIBCALL) == 0
10313 && (cum->stdarg
10314 || (cum->nargs_prototype < 0
10315 && (cum->prototype || TARGET_NO_PROTOTYPE))))
10317 /* For the SPE, we need to crxor CR6 always. */
10318 if (TARGET_SPE_ABI)
10319 return GEN_INT (cum->call_cookie | CALL_V4_SET_FP_ARGS);
10320 else if (TARGET_HARD_FLOAT && TARGET_FPRS)
10321 return GEN_INT (cum->call_cookie
10322 | ((cum->fregno == FP_ARG_MIN_REG)
10323 ? CALL_V4_SET_FP_ARGS
10324 : CALL_V4_CLEAR_FP_ARGS));
10327 return GEN_INT (cum->call_cookie & ~CALL_LIBCALL);
10330 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10332 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10334 rtx rslt = rs6000_darwin64_record_arg (cum, type, named, /*retval= */false);
10335 if (rslt != NULL_RTX)
10336 return rslt;
10337 /* Else fall through to usual handling. */
10340 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10342 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10343 rtx r, off;
10344 int i, k = 0;
10346 /* Do we also need to pass this argument in the parameter
10347 save area? */
10348 if (TARGET_64BIT && ! cum->prototype)
10350 int align_words = (cum->words + 1) & ~1;
10351 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10354 /* Describe where this argument goes in the vector registers. */
10355 for (i = 0; i < n_elts && cum->vregno + i <= ALTIVEC_ARG_MAX_REG; i++)
10357 r = gen_rtx_REG (elt_mode, cum->vregno + i);
10358 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10359 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10362 return rs6000_finish_function_arg (mode, rvec, k);
10364 else if (TARGET_ALTIVEC_ABI
10365 && (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
10366 || (type && TREE_CODE (type) == VECTOR_TYPE
10367 && int_size_in_bytes (type) == 16)))
10369 if (named || abi == ABI_V4)
10370 return NULL_RTX;
10371 else
10373 /* Vector parameters to varargs functions under AIX or Darwin
10374 get passed in memory and possibly also in GPRs. */
10375 int align, align_words, n_words;
10376 machine_mode part_mode;
10378 /* Vector parameters must be 16-byte aligned. In 32-bit
10379 mode this means we need to take into account the offset
10380 to the parameter save area. In 64-bit mode, they just
10381 have to start on an even word, since the parameter save
10382 area is 16-byte aligned. */
10383 if (TARGET_32BIT)
10384 align = -(rs6000_parm_offset () + cum->words) & 3;
10385 else
10386 align = cum->words & 1;
10387 align_words = cum->words + align;
10389 /* Out of registers? Memory, then. */
10390 if (align_words >= GP_ARG_NUM_REG)
10391 return NULL_RTX;
10393 if (TARGET_32BIT && TARGET_POWERPC64)
10394 return rs6000_mixed_function_arg (mode, type, align_words);
10396 /* The vector value goes in GPRs. Only the part of the
10397 value in GPRs is reported here. */
10398 part_mode = mode;
10399 n_words = rs6000_arg_size (mode, type);
10400 if (align_words + n_words > GP_ARG_NUM_REG)
10401 /* Fortunately, there are only two possibilities, the value
10402 is either wholly in GPRs or half in GPRs and half not. */
10403 part_mode = DImode;
10405 return gen_rtx_REG (part_mode, GP_ARG_MIN_REG + align_words);
10408 else if (TARGET_SPE_ABI && TARGET_SPE
10409 && (SPE_VECTOR_MODE (mode)
10410 || (TARGET_E500_DOUBLE && (mode == DFmode
10411 || mode == DCmode
10412 || mode == TFmode
10413 || mode == TCmode))))
10414 return rs6000_spe_function_arg (cum, mode, type);
10416 else if (abi == ABI_V4)
10418 if (TARGET_HARD_FLOAT && TARGET_FPRS
10419 && ((TARGET_SINGLE_FLOAT && mode == SFmode)
10420 || (TARGET_DOUBLE_FLOAT && mode == DFmode)
10421 || (mode == TFmode && !TARGET_IEEEQUAD)
10422 || mode == SDmode || mode == DDmode || mode == TDmode))
10424 /* _Decimal128 must use an even/odd register pair. This assumes
10425 that the register number is odd when fregno is odd. */
10426 if (mode == TDmode && (cum->fregno % 2) == 1)
10427 cum->fregno++;
10429 if (cum->fregno + (mode == TFmode || mode == TDmode ? 1 : 0)
10430 <= FP_ARG_V4_MAX_REG)
10431 return gen_rtx_REG (mode, cum->fregno);
10432 else
10433 return NULL_RTX;
10435 else
10437 int n_words = rs6000_arg_size (mode, type);
10438 int gregno = cum->sysv_gregno;
10440 /* Long long and SPE vectors are put in (r3,r4), (r5,r6),
10441 (r7,r8) or (r9,r10). As does any other 2 word item such
10442 as complex int due to a historical mistake. */
10443 if (n_words == 2)
10444 gregno += (1 - gregno) & 1;
10446 /* Multi-reg args are not split between registers and stack. */
10447 if (gregno + n_words - 1 > GP_ARG_MAX_REG)
10448 return NULL_RTX;
10450 if (TARGET_32BIT && TARGET_POWERPC64)
10451 return rs6000_mixed_function_arg (mode, type,
10452 gregno - GP_ARG_MIN_REG);
10453 return gen_rtx_REG (mode, gregno);
10456 else
10458 int align_words = rs6000_parm_start (mode, type, cum->words);
10460 /* _Decimal128 must be passed in an even/odd float register pair.
10461 This assumes that the register number is odd when fregno is odd. */
10462 if (elt_mode == TDmode && (cum->fregno % 2) == 1)
10463 cum->fregno++;
10465 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10467 rtx rvec[GP_ARG_NUM_REG + AGGR_ARG_NUM_REG + 1];
10468 rtx r, off;
10469 int i, k = 0;
10470 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10471 int fpr_words;
10473 /* Do we also need to pass this argument in the parameter
10474 save area? */
10475 if (type && (cum->nargs_prototype <= 0
10476 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10477 && TARGET_XL_COMPAT
10478 && align_words >= GP_ARG_NUM_REG)))
10479 k = rs6000_psave_function_arg (mode, type, align_words, rvec);
10481 /* Describe where this argument goes in the fprs. */
10482 for (i = 0; i < n_elts
10483 && cum->fregno + i * n_fpreg <= FP_ARG_MAX_REG; i++)
10485 /* Check if the argument is split over registers and memory.
10486 This can only ever happen for long double or _Decimal128;
10487 complex types are handled via split_complex_arg. */
10488 machine_mode fmode = elt_mode;
10489 if (cum->fregno + (i + 1) * n_fpreg > FP_ARG_MAX_REG + 1)
10491 gcc_assert (fmode == TFmode || fmode == TDmode);
10492 fmode = DECIMAL_FLOAT_MODE_P (fmode) ? DDmode : DFmode;
10495 r = gen_rtx_REG (fmode, cum->fregno + i * n_fpreg);
10496 off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
10497 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10500 /* If there were not enough FPRs to hold the argument, the rest
10501 usually goes into memory. However, if the current position
10502 is still within the register parameter area, a portion may
10503 actually have to go into GPRs.
10505 Note that it may happen that the portion of the argument
10506 passed in the first "half" of the first GPR was already
10507 passed in the last FPR as well.
10509 For unnamed arguments, we already set up GPRs to cover the
10510 whole argument in rs6000_psave_function_arg, so there is
10511 nothing further to do at this point. */
10512 fpr_words = (i * GET_MODE_SIZE (elt_mode)) / (TARGET_32BIT ? 4 : 8);
10513 if (i < n_elts && align_words + fpr_words < GP_ARG_NUM_REG
10514 && cum->nargs_prototype > 0)
10516 static bool warned;
10518 machine_mode rmode = TARGET_32BIT ? SImode : DImode;
10519 int n_words = rs6000_arg_size (mode, type);
10521 align_words += fpr_words;
10522 n_words -= fpr_words;
10526 r = gen_rtx_REG (rmode, GP_ARG_MIN_REG + align_words);
10527 off = GEN_INT (fpr_words++ * GET_MODE_SIZE (rmode));
10528 rvec[k++] = gen_rtx_EXPR_LIST (VOIDmode, r, off);
10530 while (++align_words < GP_ARG_NUM_REG && --n_words != 0);
10532 if (!warned && warn_psabi)
10534 warned = true;
10535 inform (input_location,
10536 "the ABI of passing homogeneous float aggregates"
10537 " has changed in GCC 5");
10541 return rs6000_finish_function_arg (mode, rvec, k);
10543 else if (align_words < GP_ARG_NUM_REG)
10545 if (TARGET_32BIT && TARGET_POWERPC64)
10546 return rs6000_mixed_function_arg (mode, type, align_words);
10548 return gen_rtx_REG (mode, GP_ARG_MIN_REG + align_words);
10550 else
10551 return NULL_RTX;
10555 /* For an arg passed partly in registers and partly in memory, this is
10556 the number of bytes passed in registers. For args passed entirely in
10557 registers or entirely in memory, zero. When an arg is described by a
10558 PARALLEL, perhaps using more than one register type, this function
10559 returns the number of bytes used by the first element of the PARALLEL. */
10561 static int
10562 rs6000_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
10563 tree type, bool named)
10565 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
10566 bool passed_in_gprs = true;
10567 int ret = 0;
10568 int align_words;
10569 machine_mode elt_mode;
10570 int n_elts;
10572 rs6000_discover_homogeneous_aggregate (mode, type, &elt_mode, &n_elts);
10574 if (DEFAULT_ABI == ABI_V4)
10575 return 0;
10577 if (USE_ALTIVEC_FOR_ARG_P (cum, elt_mode, named))
10579 /* If we are passing this arg in the fixed parameter save area
10580 (gprs or memory) as well as VRs, we do not use the partial
10581 bytes mechanism; instead, rs6000_function_arg will return a
10582 PARALLEL including a memory element as necessary. */
10583 if (TARGET_64BIT && ! cum->prototype)
10584 return 0;
10586 /* Otherwise, we pass in VRs only. Check for partial copies. */
10587 passed_in_gprs = false;
10588 if (cum->vregno + n_elts > ALTIVEC_ARG_MAX_REG + 1)
10589 ret = (ALTIVEC_ARG_MAX_REG + 1 - cum->vregno) * 16;
10592 /* In this complicated case we just disable the partial_nregs code. */
10593 if (TARGET_MACHO && rs6000_darwin64_struct_check_p (mode, type))
10594 return 0;
10596 align_words = rs6000_parm_start (mode, type, cum->words);
10598 if (USE_FP_FOR_ARG_P (cum, elt_mode))
10600 unsigned long n_fpreg = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
10602 /* If we are passing this arg in the fixed parameter save area
10603 (gprs or memory) as well as FPRs, we do not use the partial
10604 bytes mechanism; instead, rs6000_function_arg will return a
10605 PARALLEL including a memory element as necessary. */
10606 if (type
10607 && (cum->nargs_prototype <= 0
10608 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
10609 && TARGET_XL_COMPAT
10610 && align_words >= GP_ARG_NUM_REG)))
10611 return 0;
10613 /* Otherwise, we pass in FPRs only. Check for partial copies. */
10614 passed_in_gprs = false;
10615 if (cum->fregno + n_elts * n_fpreg > FP_ARG_MAX_REG + 1)
10617 /* Compute number of bytes / words passed in FPRs. If there
10618 is still space available in the register parameter area
10619 *after* that amount, a part of the argument will be passed
10620 in GPRs. In that case, the total amount passed in any
10621 registers is equal to the amount that would have been passed
10622 in GPRs if everything were passed there, so we fall back to
10623 the GPR code below to compute the appropriate value. */
10624 int fpr = ((FP_ARG_MAX_REG + 1 - cum->fregno)
10625 * MIN (8, GET_MODE_SIZE (elt_mode)));
10626 int fpr_words = fpr / (TARGET_32BIT ? 4 : 8);
10628 if (align_words + fpr_words < GP_ARG_NUM_REG)
10629 passed_in_gprs = true;
10630 else
10631 ret = fpr;
10635 if (passed_in_gprs
10636 && align_words < GP_ARG_NUM_REG
10637 && GP_ARG_NUM_REG < align_words + rs6000_arg_size (mode, type))
10638 ret = (GP_ARG_NUM_REG - align_words) * (TARGET_32BIT ? 4 : 8);
10640 if (ret != 0 && TARGET_DEBUG_ARG)
10641 fprintf (stderr, "rs6000_arg_partial_bytes: %d\n", ret);
10643 return ret;
10646 /* A C expression that indicates when an argument must be passed by
10647 reference. If nonzero for an argument, a copy of that argument is
10648 made in memory and a pointer to the argument is passed instead of
10649 the argument itself. The pointer is passed in whatever way is
10650 appropriate for passing a pointer to that type.
10652 Under V.4, aggregates and long double are passed by reference.
10654 As an extension to all 32-bit ABIs, AltiVec vectors are passed by
10655 reference unless the AltiVec vector extension ABI is in force.
10657 As an extension to all ABIs, variable sized types are passed by
10658 reference. */
10660 static bool
10661 rs6000_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
10662 machine_mode mode, const_tree type,
10663 bool named ATTRIBUTE_UNUSED)
10665 if (DEFAULT_ABI == ABI_V4 && TARGET_IEEEQUAD && mode == TFmode)
10667 if (TARGET_DEBUG_ARG)
10668 fprintf (stderr, "function_arg_pass_by_reference: V4 long double\n");
10669 return 1;
10672 if (!type)
10673 return 0;
10675 if (DEFAULT_ABI == ABI_V4 && AGGREGATE_TYPE_P (type))
10677 if (TARGET_DEBUG_ARG)
10678 fprintf (stderr, "function_arg_pass_by_reference: V4 aggregate\n");
10679 return 1;
10682 if (int_size_in_bytes (type) < 0)
10684 if (TARGET_DEBUG_ARG)
10685 fprintf (stderr, "function_arg_pass_by_reference: variable size\n");
10686 return 1;
10689 /* Allow -maltivec -mabi=no-altivec without warning. Altivec vector
10690 modes only exist for GCC vector types if -maltivec. */
10691 if (TARGET_32BIT && !TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
10693 if (TARGET_DEBUG_ARG)
10694 fprintf (stderr, "function_arg_pass_by_reference: AltiVec\n");
10695 return 1;
10698 /* Pass synthetic vectors in memory. */
10699 if (TREE_CODE (type) == VECTOR_TYPE
10700 && int_size_in_bytes (type) > (TARGET_ALTIVEC_ABI ? 16 : 8))
10702 static bool warned_for_pass_big_vectors = false;
10703 if (TARGET_DEBUG_ARG)
10704 fprintf (stderr, "function_arg_pass_by_reference: synthetic vector\n");
10705 if (!warned_for_pass_big_vectors)
10707 warning (0, "GCC vector passed by reference: "
10708 "non-standard ABI extension with no compatibility guarantee");
10709 warned_for_pass_big_vectors = true;
10711 return 1;
10714 return 0;
10717 /* Process parameter of type TYPE after ARGS_SO_FAR parameters were
10718 already processes. Return true if the parameter must be passed
10719 (fully or partially) on the stack. */
10721 static bool
10722 rs6000_parm_needs_stack (cumulative_args_t args_so_far, tree type)
10724 machine_mode mode;
10725 int unsignedp;
10726 rtx entry_parm;
10728 /* Catch errors. */
10729 if (type == NULL || type == error_mark_node)
10730 return true;
10732 /* Handle types with no storage requirement. */
10733 if (TYPE_MODE (type) == VOIDmode)
10734 return false;
10736 /* Handle complex types. */
10737 if (TREE_CODE (type) == COMPLEX_TYPE)
10738 return (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type))
10739 || rs6000_parm_needs_stack (args_so_far, TREE_TYPE (type)));
10741 /* Handle transparent aggregates. */
10742 if ((TREE_CODE (type) == UNION_TYPE || TREE_CODE (type) == RECORD_TYPE)
10743 && TYPE_TRANSPARENT_AGGR (type))
10744 type = TREE_TYPE (first_field (type));
10746 /* See if this arg was passed by invisible reference. */
10747 if (pass_by_reference (get_cumulative_args (args_so_far),
10748 TYPE_MODE (type), type, true))
10749 type = build_pointer_type (type);
10751 /* Find mode as it is passed by the ABI. */
10752 unsignedp = TYPE_UNSIGNED (type);
10753 mode = promote_mode (type, TYPE_MODE (type), &unsignedp);
10755 /* If we must pass in stack, we need a stack. */
10756 if (rs6000_must_pass_in_stack (mode, type))
10757 return true;
10759 /* If there is no incoming register, we need a stack. */
10760 entry_parm = rs6000_function_arg (args_so_far, mode, type, true);
10761 if (entry_parm == NULL)
10762 return true;
10764 /* Likewise if we need to pass both in registers and on the stack. */
10765 if (GET_CODE (entry_parm) == PARALLEL
10766 && XEXP (XVECEXP (entry_parm, 0, 0), 0) == NULL_RTX)
10767 return true;
10769 /* Also true if we're partially in registers and partially not. */
10770 if (rs6000_arg_partial_bytes (args_so_far, mode, type, true) != 0)
10771 return true;
10773 /* Update info on where next arg arrives in registers. */
10774 rs6000_function_arg_advance (args_so_far, mode, type, true);
10775 return false;
10778 /* Return true if FUN has no prototype, has a variable argument
10779 list, or passes any parameter in memory. */
10781 static bool
10782 rs6000_function_parms_need_stack (tree fun, bool incoming)
10784 tree fntype, result;
10785 CUMULATIVE_ARGS args_so_far_v;
10786 cumulative_args_t args_so_far;
10788 if (!fun)
10789 /* Must be a libcall, all of which only use reg parms. */
10790 return false;
10792 fntype = fun;
10793 if (!TYPE_P (fun))
10794 fntype = TREE_TYPE (fun);
10796 /* Varargs functions need the parameter save area. */
10797 if ((!incoming && !prototype_p (fntype)) || stdarg_p (fntype))
10798 return true;
10800 INIT_CUMULATIVE_INCOMING_ARGS (args_so_far_v, fntype, NULL_RTX);
10801 args_so_far = pack_cumulative_args (&args_so_far_v);
10803 /* When incoming, we will have been passed the function decl.
10804 It is necessary to use the decl to handle K&R style functions,
10805 where TYPE_ARG_TYPES may not be available. */
10806 if (incoming)
10808 gcc_assert (DECL_P (fun));
10809 result = DECL_RESULT (fun);
10811 else
10812 result = TREE_TYPE (fntype);
10814 if (result && aggregate_value_p (result, fntype))
10816 if (!TYPE_P (result))
10817 result = TREE_TYPE (result);
10818 result = build_pointer_type (result);
10819 rs6000_parm_needs_stack (args_so_far, result);
10822 if (incoming)
10824 tree parm;
10826 for (parm = DECL_ARGUMENTS (fun);
10827 parm && parm != void_list_node;
10828 parm = TREE_CHAIN (parm))
10829 if (rs6000_parm_needs_stack (args_so_far, TREE_TYPE (parm)))
10830 return true;
10832 else
10834 function_args_iterator args_iter;
10835 tree arg_type;
10837 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
10838 if (rs6000_parm_needs_stack (args_so_far, arg_type))
10839 return true;
10842 return false;
10845 /* Return the size of the REG_PARM_STACK_SPACE are for FUN. This is
10846 usually a constant depending on the ABI. However, in the ELFv2 ABI
10847 the register parameter area is optional when calling a function that
10848 has a prototype is scope, has no variable argument list, and passes
10849 all parameters in registers. */
10852 rs6000_reg_parm_stack_space (tree fun, bool incoming)
10854 int reg_parm_stack_space;
10856 switch (DEFAULT_ABI)
10858 default:
10859 reg_parm_stack_space = 0;
10860 break;
10862 case ABI_AIX:
10863 case ABI_DARWIN:
10864 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10865 break;
10867 case ABI_ELFv2:
10868 /* ??? Recomputing this every time is a bit expensive. Is there
10869 a place to cache this information? */
10870 if (rs6000_function_parms_need_stack (fun, incoming))
10871 reg_parm_stack_space = TARGET_64BIT ? 64 : 32;
10872 else
10873 reg_parm_stack_space = 0;
10874 break;
10877 return reg_parm_stack_space;
10880 static void
10881 rs6000_move_block_from_reg (int regno, rtx x, int nregs)
10883 int i;
10884 machine_mode reg_mode = TARGET_32BIT ? SImode : DImode;
10886 if (nregs == 0)
10887 return;
10889 for (i = 0; i < nregs; i++)
10891 rtx tem = adjust_address_nv (x, reg_mode, i * GET_MODE_SIZE (reg_mode));
10892 if (reload_completed)
10894 if (! strict_memory_address_p (reg_mode, XEXP (tem, 0)))
10895 tem = NULL_RTX;
10896 else
10897 tem = simplify_gen_subreg (reg_mode, x, BLKmode,
10898 i * GET_MODE_SIZE (reg_mode));
10900 else
10901 tem = replace_equiv_address (tem, XEXP (tem, 0));
10903 gcc_assert (tem);
10905 emit_move_insn (tem, gen_rtx_REG (reg_mode, regno + i));
10909 /* Perform any needed actions needed for a function that is receiving a
10910 variable number of arguments.
10912 CUM is as above.
10914 MODE and TYPE are the mode and type of the current parameter.
10916 PRETEND_SIZE is a variable that should be set to the amount of stack
10917 that must be pushed by the prolog to pretend that our caller pushed
10920 Normally, this macro will push all remaining incoming registers on the
10921 stack and set PRETEND_SIZE to the length of the registers pushed. */
10923 static void
10924 setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
10925 tree type, int *pretend_size ATTRIBUTE_UNUSED,
10926 int no_rtl)
10928 CUMULATIVE_ARGS next_cum;
10929 int reg_size = TARGET_32BIT ? 4 : 8;
10930 rtx save_area = NULL_RTX, mem;
10931 int first_reg_offset;
10932 alias_set_type set;
10934 /* Skip the last named argument. */
10935 next_cum = *get_cumulative_args (cum);
10936 rs6000_function_arg_advance_1 (&next_cum, mode, type, true, 0);
10938 if (DEFAULT_ABI == ABI_V4)
10940 first_reg_offset = next_cum.sysv_gregno - GP_ARG_MIN_REG;
10942 if (! no_rtl)
10944 int gpr_reg_num = 0, gpr_size = 0, fpr_size = 0;
10945 HOST_WIDE_INT offset = 0;
10947 /* Try to optimize the size of the varargs save area.
10948 The ABI requires that ap.reg_save_area is doubleword
10949 aligned, but we don't need to allocate space for all
10950 the bytes, only those to which we actually will save
10951 anything. */
10952 if (cfun->va_list_gpr_size && first_reg_offset < GP_ARG_NUM_REG)
10953 gpr_reg_num = GP_ARG_NUM_REG - first_reg_offset;
10954 if (TARGET_HARD_FLOAT && TARGET_FPRS
10955 && next_cum.fregno <= FP_ARG_V4_MAX_REG
10956 && cfun->va_list_fpr_size)
10958 if (gpr_reg_num)
10959 fpr_size = (next_cum.fregno - FP_ARG_MIN_REG)
10960 * UNITS_PER_FP_WORD;
10961 if (cfun->va_list_fpr_size
10962 < FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10963 fpr_size += cfun->va_list_fpr_size * UNITS_PER_FP_WORD;
10964 else
10965 fpr_size += (FP_ARG_V4_MAX_REG + 1 - next_cum.fregno)
10966 * UNITS_PER_FP_WORD;
10968 if (gpr_reg_num)
10970 offset = -((first_reg_offset * reg_size) & ~7);
10971 if (!fpr_size && gpr_reg_num > cfun->va_list_gpr_size)
10973 gpr_reg_num = cfun->va_list_gpr_size;
10974 if (reg_size == 4 && (first_reg_offset & 1))
10975 gpr_reg_num++;
10977 gpr_size = (gpr_reg_num * reg_size + 7) & ~7;
10979 else if (fpr_size)
10980 offset = - (int) (next_cum.fregno - FP_ARG_MIN_REG)
10981 * UNITS_PER_FP_WORD
10982 - (int) (GP_ARG_NUM_REG * reg_size);
10984 if (gpr_size + fpr_size)
10986 rtx reg_save_area
10987 = assign_stack_local (BLKmode, gpr_size + fpr_size, 64);
10988 gcc_assert (GET_CODE (reg_save_area) == MEM);
10989 reg_save_area = XEXP (reg_save_area, 0);
10990 if (GET_CODE (reg_save_area) == PLUS)
10992 gcc_assert (XEXP (reg_save_area, 0)
10993 == virtual_stack_vars_rtx);
10994 gcc_assert (GET_CODE (XEXP (reg_save_area, 1)) == CONST_INT);
10995 offset += INTVAL (XEXP (reg_save_area, 1));
10997 else
10998 gcc_assert (reg_save_area == virtual_stack_vars_rtx);
11001 cfun->machine->varargs_save_offset = offset;
11002 save_area = plus_constant (Pmode, virtual_stack_vars_rtx, offset);
11005 else
11007 first_reg_offset = next_cum.words;
11008 save_area = virtual_incoming_args_rtx;
11010 if (targetm.calls.must_pass_in_stack (mode, type))
11011 first_reg_offset += rs6000_arg_size (TYPE_MODE (type), type);
11014 set = get_varargs_alias_set ();
11015 if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
11016 && cfun->va_list_gpr_size)
11018 int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
11020 if (va_list_gpr_counter_field)
11021 /* V4 va_list_gpr_size counts number of registers needed. */
11022 n_gpr = cfun->va_list_gpr_size;
11023 else
11024 /* char * va_list instead counts number of bytes needed. */
11025 n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
11027 if (nregs > n_gpr)
11028 nregs = n_gpr;
11030 mem = gen_rtx_MEM (BLKmode,
11031 plus_constant (Pmode, save_area,
11032 first_reg_offset * reg_size));
11033 MEM_NOTRAP_P (mem) = 1;
11034 set_mem_alias_set (mem, set);
11035 set_mem_align (mem, BITS_PER_WORD);
11037 rs6000_move_block_from_reg (GP_ARG_MIN_REG + first_reg_offset, mem,
11038 nregs);
11041 /* Save FP registers if needed. */
11042 if (DEFAULT_ABI == ABI_V4
11043 && TARGET_HARD_FLOAT && TARGET_FPRS
11044 && ! no_rtl
11045 && next_cum.fregno <= FP_ARG_V4_MAX_REG
11046 && cfun->va_list_fpr_size)
11048 int fregno = next_cum.fregno, nregs;
11049 rtx cr1 = gen_rtx_REG (CCmode, CR1_REGNO);
11050 rtx lab = gen_label_rtx ();
11051 int off = (GP_ARG_NUM_REG * reg_size) + ((fregno - FP_ARG_MIN_REG)
11052 * UNITS_PER_FP_WORD);
11054 emit_jump_insn
11055 (gen_rtx_SET (VOIDmode,
11056 pc_rtx,
11057 gen_rtx_IF_THEN_ELSE (VOIDmode,
11058 gen_rtx_NE (VOIDmode, cr1,
11059 const0_rtx),
11060 gen_rtx_LABEL_REF (VOIDmode, lab),
11061 pc_rtx)));
11063 for (nregs = 0;
11064 fregno <= FP_ARG_V4_MAX_REG && nregs < cfun->va_list_fpr_size;
11065 fregno++, off += UNITS_PER_FP_WORD, nregs++)
11067 mem = gen_rtx_MEM ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11068 ? DFmode : SFmode,
11069 plus_constant (Pmode, save_area, off));
11070 MEM_NOTRAP_P (mem) = 1;
11071 set_mem_alias_set (mem, set);
11072 set_mem_align (mem, GET_MODE_ALIGNMENT (
11073 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11074 ? DFmode : SFmode));
11075 emit_move_insn (mem, gen_rtx_REG (
11076 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT)
11077 ? DFmode : SFmode, fregno));
11080 emit_label (lab);
11084 /* Create the va_list data type. */
11086 static tree
11087 rs6000_build_builtin_va_list (void)
11089 tree f_gpr, f_fpr, f_res, f_ovf, f_sav, record, type_decl;
11091 /* For AIX, prefer 'char *' because that's what the system
11092 header files like. */
11093 if (DEFAULT_ABI != ABI_V4)
11094 return build_pointer_type (char_type_node);
11096 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
11097 type_decl = build_decl (BUILTINS_LOCATION, TYPE_DECL,
11098 get_identifier ("__va_list_tag"), record);
11100 f_gpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("gpr"),
11101 unsigned_char_type_node);
11102 f_fpr = build_decl (BUILTINS_LOCATION, FIELD_DECL, get_identifier ("fpr"),
11103 unsigned_char_type_node);
11104 /* Give the two bytes of padding a name, so that -Wpadded won't warn on
11105 every user file. */
11106 f_res = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11107 get_identifier ("reserved"), short_unsigned_type_node);
11108 f_ovf = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11109 get_identifier ("overflow_arg_area"),
11110 ptr_type_node);
11111 f_sav = build_decl (BUILTINS_LOCATION, FIELD_DECL,
11112 get_identifier ("reg_save_area"),
11113 ptr_type_node);
11115 va_list_gpr_counter_field = f_gpr;
11116 va_list_fpr_counter_field = f_fpr;
11118 DECL_FIELD_CONTEXT (f_gpr) = record;
11119 DECL_FIELD_CONTEXT (f_fpr) = record;
11120 DECL_FIELD_CONTEXT (f_res) = record;
11121 DECL_FIELD_CONTEXT (f_ovf) = record;
11122 DECL_FIELD_CONTEXT (f_sav) = record;
11124 TYPE_STUB_DECL (record) = type_decl;
11125 TYPE_NAME (record) = type_decl;
11126 TYPE_FIELDS (record) = f_gpr;
11127 DECL_CHAIN (f_gpr) = f_fpr;
11128 DECL_CHAIN (f_fpr) = f_res;
11129 DECL_CHAIN (f_res) = f_ovf;
11130 DECL_CHAIN (f_ovf) = f_sav;
11132 layout_type (record);
11134 /* The correct type is an array type of one element. */
11135 return build_array_type (record, build_index_type (size_zero_node));
11138 /* Implement va_start. */
11140 static void
11141 rs6000_va_start (tree valist, rtx nextarg)
11143 HOST_WIDE_INT words, n_gpr, n_fpr;
11144 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11145 tree gpr, fpr, ovf, sav, t;
11147 /* Only SVR4 needs something special. */
11148 if (DEFAULT_ABI != ABI_V4)
11150 std_expand_builtin_va_start (valist, nextarg);
11151 return;
11154 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11155 f_fpr = DECL_CHAIN (f_gpr);
11156 f_res = DECL_CHAIN (f_fpr);
11157 f_ovf = DECL_CHAIN (f_res);
11158 f_sav = DECL_CHAIN (f_ovf);
11160 valist = build_simple_mem_ref (valist);
11161 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11162 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11163 f_fpr, NULL_TREE);
11164 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11165 f_ovf, NULL_TREE);
11166 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11167 f_sav, NULL_TREE);
11169 /* Count number of gp and fp argument registers used. */
11170 words = crtl->args.info.words;
11171 n_gpr = MIN (crtl->args.info.sysv_gregno - GP_ARG_MIN_REG,
11172 GP_ARG_NUM_REG);
11173 n_fpr = MIN (crtl->args.info.fregno - FP_ARG_MIN_REG,
11174 FP_ARG_NUM_REG);
11176 if (TARGET_DEBUG_ARG)
11177 fprintf (stderr, "va_start: words = "HOST_WIDE_INT_PRINT_DEC", n_gpr = "
11178 HOST_WIDE_INT_PRINT_DEC", n_fpr = "HOST_WIDE_INT_PRINT_DEC"\n",
11179 words, n_gpr, n_fpr);
11181 if (cfun->va_list_gpr_size)
11183 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
11184 build_int_cst (NULL_TREE, n_gpr));
11185 TREE_SIDE_EFFECTS (t) = 1;
11186 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11189 if (cfun->va_list_fpr_size)
11191 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
11192 build_int_cst (NULL_TREE, n_fpr));
11193 TREE_SIDE_EFFECTS (t) = 1;
11194 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11196 #ifdef HAVE_AS_GNU_ATTRIBUTE
11197 if (call_ABI_of_interest (cfun->decl))
11198 rs6000_passes_float = true;
11199 #endif
11202 /* Find the overflow area. */
11203 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
11204 if (words != 0)
11205 t = fold_build_pointer_plus_hwi (t, words * UNITS_PER_WORD);
11206 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
11207 TREE_SIDE_EFFECTS (t) = 1;
11208 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11210 /* If there were no va_arg invocations, don't set up the register
11211 save area. */
11212 if (!cfun->va_list_gpr_size
11213 && !cfun->va_list_fpr_size
11214 && n_gpr < GP_ARG_NUM_REG
11215 && n_fpr < FP_ARG_V4_MAX_REG)
11216 return;
11218 /* Find the register save area. */
11219 t = make_tree (TREE_TYPE (sav), virtual_stack_vars_rtx);
11220 if (cfun->machine->varargs_save_offset)
11221 t = fold_build_pointer_plus_hwi (t, cfun->machine->varargs_save_offset);
11222 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
11223 TREE_SIDE_EFFECTS (t) = 1;
11224 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
11227 /* Implement va_arg. */
11229 static tree
11230 rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
11231 gimple_seq *post_p)
11233 tree f_gpr, f_fpr, f_res, f_ovf, f_sav;
11234 tree gpr, fpr, ovf, sav, reg, t, u;
11235 int size, rsize, n_reg, sav_ofs, sav_scale;
11236 tree lab_false, lab_over, addr;
11237 int align;
11238 tree ptrtype = build_pointer_type_for_mode (type, ptr_mode, true);
11239 int regalign = 0;
11240 gimple stmt;
11242 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
11244 t = rs6000_gimplify_va_arg (valist, ptrtype, pre_p, post_p);
11245 return build_va_arg_indirect_ref (t);
11248 /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
11249 earlier version of gcc, with the property that it always applied alignment
11250 adjustments to the va-args (even for zero-sized types). The cheapest way
11251 to deal with this is to replicate the effect of the part of
11252 std_gimplify_va_arg_expr that carries out the align adjust, for the case
11253 of relevance.
11254 We don't need to check for pass-by-reference because of the test above.
11255 We can return a simplifed answer, since we know there's no offset to add. */
11257 if (((TARGET_MACHO
11258 && rs6000_darwin64_abi)
11259 || DEFAULT_ABI == ABI_ELFv2
11260 || (DEFAULT_ABI == ABI_AIX && !rs6000_compat_align_parm))
11261 && integer_zerop (TYPE_SIZE (type)))
11263 unsigned HOST_WIDE_INT align, boundary;
11264 tree valist_tmp = get_initialized_tmp_var (valist, pre_p, NULL);
11265 align = PARM_BOUNDARY / BITS_PER_UNIT;
11266 boundary = rs6000_function_arg_boundary (TYPE_MODE (type), type);
11267 if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
11268 boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
11269 boundary /= BITS_PER_UNIT;
11270 if (boundary > align)
11272 tree t ;
11273 /* This updates arg ptr by the amount that would be necessary
11274 to align the zero-sized (but not zero-alignment) item. */
11275 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11276 fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
11277 gimplify_and_add (t, pre_p);
11279 t = fold_convert (sizetype, valist_tmp);
11280 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
11281 fold_convert (TREE_TYPE (valist),
11282 fold_build2 (BIT_AND_EXPR, sizetype, t,
11283 size_int (-boundary))));
11284 t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
11285 gimplify_and_add (t, pre_p);
11287 /* Since it is zero-sized there's no increment for the item itself. */
11288 valist_tmp = fold_convert (build_pointer_type (type), valist_tmp);
11289 return build_va_arg_indirect_ref (valist_tmp);
11292 if (DEFAULT_ABI != ABI_V4)
11294 if (targetm.calls.split_complex_arg && TREE_CODE (type) == COMPLEX_TYPE)
11296 tree elem_type = TREE_TYPE (type);
11297 machine_mode elem_mode = TYPE_MODE (elem_type);
11298 int elem_size = GET_MODE_SIZE (elem_mode);
11300 if (elem_size < UNITS_PER_WORD)
11302 tree real_part, imag_part;
11303 gimple_seq post = NULL;
11305 real_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11306 &post);
11307 /* Copy the value into a temporary, lest the formal temporary
11308 be reused out from under us. */
11309 real_part = get_initialized_tmp_var (real_part, pre_p, &post);
11310 gimple_seq_add_seq (pre_p, post);
11312 imag_part = rs6000_gimplify_va_arg (valist, elem_type, pre_p,
11313 post_p);
11315 return build2 (COMPLEX_EXPR, type, real_part, imag_part);
11319 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
11322 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
11323 f_fpr = DECL_CHAIN (f_gpr);
11324 f_res = DECL_CHAIN (f_fpr);
11325 f_ovf = DECL_CHAIN (f_res);
11326 f_sav = DECL_CHAIN (f_ovf);
11328 valist = build_va_arg_indirect_ref (valist);
11329 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
11330 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), unshare_expr (valist),
11331 f_fpr, NULL_TREE);
11332 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), unshare_expr (valist),
11333 f_ovf, NULL_TREE);
11334 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), unshare_expr (valist),
11335 f_sav, NULL_TREE);
11337 size = int_size_in_bytes (type);
11338 rsize = (size + 3) / 4;
11339 align = 1;
11341 if (TARGET_HARD_FLOAT && TARGET_FPRS
11342 && ((TARGET_SINGLE_FLOAT && TYPE_MODE (type) == SFmode)
11343 || (TARGET_DOUBLE_FLOAT
11344 && (TYPE_MODE (type) == DFmode
11345 || TYPE_MODE (type) == TFmode
11346 || TYPE_MODE (type) == SDmode
11347 || TYPE_MODE (type) == DDmode
11348 || TYPE_MODE (type) == TDmode))))
11350 /* FP args go in FP registers, if present. */
11351 reg = fpr;
11352 n_reg = (size + 7) / 8;
11353 sav_ofs = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4) * 4;
11354 sav_scale = ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? 8 : 4);
11355 if (TYPE_MODE (type) != SFmode && TYPE_MODE (type) != SDmode)
11356 align = 8;
11358 else
11360 /* Otherwise into GP registers. */
11361 reg = gpr;
11362 n_reg = rsize;
11363 sav_ofs = 0;
11364 sav_scale = 4;
11365 if (n_reg == 2)
11366 align = 8;
11369 /* Pull the value out of the saved registers.... */
11371 lab_over = NULL;
11372 addr = create_tmp_var (ptr_type_node, "addr");
11374 /* AltiVec vectors never go in registers when -mabi=altivec. */
11375 if (TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (TYPE_MODE (type)))
11376 align = 16;
11377 else
11379 lab_false = create_artificial_label (input_location);
11380 lab_over = create_artificial_label (input_location);
11382 /* Long long and SPE vectors are aligned in the registers.
11383 As are any other 2 gpr item such as complex int due to a
11384 historical mistake. */
11385 u = reg;
11386 if (n_reg == 2 && reg == gpr)
11388 regalign = 1;
11389 u = build2 (BIT_AND_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11390 build_int_cst (TREE_TYPE (reg), n_reg - 1));
11391 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg),
11392 unshare_expr (reg), u);
11394 /* _Decimal128 is passed in even/odd fpr pairs; the stored
11395 reg number is 0 for f1, so we want to make it odd. */
11396 else if (reg == fpr && TYPE_MODE (type) == TDmode)
11398 t = build2 (BIT_IOR_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11399 build_int_cst (TREE_TYPE (reg), 1));
11400 u = build2 (MODIFY_EXPR, void_type_node, unshare_expr (reg), t);
11403 t = fold_convert (TREE_TYPE (reg), size_int (8 - n_reg + 1));
11404 t = build2 (GE_EXPR, boolean_type_node, u, t);
11405 u = build1 (GOTO_EXPR, void_type_node, lab_false);
11406 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
11407 gimplify_and_add (t, pre_p);
11409 t = sav;
11410 if (sav_ofs)
11411 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
11413 u = build2 (POSTINCREMENT_EXPR, TREE_TYPE (reg), unshare_expr (reg),
11414 build_int_cst (TREE_TYPE (reg), n_reg));
11415 u = fold_convert (sizetype, u);
11416 u = build2 (MULT_EXPR, sizetype, u, size_int (sav_scale));
11417 t = fold_build_pointer_plus (t, u);
11419 /* _Decimal32 varargs are located in the second word of the 64-bit
11420 FP register for 32-bit binaries. */
11421 if (!TARGET_POWERPC64
11422 && TARGET_HARD_FLOAT && TARGET_FPRS
11423 && TYPE_MODE (type) == SDmode)
11424 t = fold_build_pointer_plus_hwi (t, size);
11426 gimplify_assign (addr, t, pre_p);
11428 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
11430 stmt = gimple_build_label (lab_false);
11431 gimple_seq_add_stmt (pre_p, stmt);
11433 if ((n_reg == 2 && !regalign) || n_reg > 2)
11435 /* Ensure that we don't find any more args in regs.
11436 Alignment has taken care of for special cases. */
11437 gimplify_assign (reg, build_int_cst (TREE_TYPE (reg), 8), pre_p);
11441 /* ... otherwise out of the overflow area. */
11443 /* Care for on-stack alignment if needed. */
11444 t = ovf;
11445 if (align != 1)
11447 t = fold_build_pointer_plus_hwi (t, align - 1);
11448 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
11449 build_int_cst (TREE_TYPE (t), -align));
11451 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
11453 gimplify_assign (unshare_expr (addr), t, pre_p);
11455 t = fold_build_pointer_plus_hwi (t, size);
11456 gimplify_assign (unshare_expr (ovf), t, pre_p);
11458 if (lab_over)
11460 stmt = gimple_build_label (lab_over);
11461 gimple_seq_add_stmt (pre_p, stmt);
11464 if (STRICT_ALIGNMENT
11465 && (TYPE_ALIGN (type)
11466 > (unsigned) BITS_PER_UNIT * (align < 4 ? 4 : align)))
11468 /* The value (of type complex double, for example) may not be
11469 aligned in memory in the saved registers, so copy via a
11470 temporary. (This is the same code as used for SPARC.) */
11471 tree tmp = create_tmp_var (type, "va_arg_tmp");
11472 tree dest_addr = build_fold_addr_expr (tmp);
11474 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
11475 3, dest_addr, addr, size_int (rsize * 4));
11477 gimplify_and_add (copy, pre_p);
11478 addr = dest_addr;
11481 addr = fold_convert (ptrtype, addr);
11482 return build_va_arg_indirect_ref (addr);
11485 /* Builtins. */
11487 static void
11488 def_builtin (const char *name, tree type, enum rs6000_builtins code)
11490 tree t;
11491 unsigned classify = rs6000_builtin_info[(int)code].attr;
11492 const char *attr_string = "";
11494 gcc_assert (name != NULL);
11495 gcc_assert (IN_RANGE ((int)code, 0, (int)RS6000_BUILTIN_COUNT));
11497 if (rs6000_builtin_decls[(int)code])
11498 fatal_error ("internal error: builtin function %s already processed", name);
11500 rs6000_builtin_decls[(int)code] = t =
11501 add_builtin_function (name, type, (int)code, BUILT_IN_MD, NULL, NULL_TREE);
11503 /* Set any special attributes. */
11504 if ((classify & RS6000_BTC_CONST) != 0)
11506 /* const function, function only depends on the inputs. */
11507 TREE_READONLY (t) = 1;
11508 TREE_NOTHROW (t) = 1;
11509 attr_string = ", pure";
11511 else if ((classify & RS6000_BTC_PURE) != 0)
11513 /* pure function, function can read global memory, but does not set any
11514 external state. */
11515 DECL_PURE_P (t) = 1;
11516 TREE_NOTHROW (t) = 1;
11517 attr_string = ", const";
11519 else if ((classify & RS6000_BTC_FP) != 0)
11521 /* Function is a math function. If rounding mode is on, then treat the
11522 function as not reading global memory, but it can have arbitrary side
11523 effects. If it is off, then assume the function is a const function.
11524 This mimics the ATTR_MATHFN_FPROUNDING attribute in
11525 builtin-attribute.def that is used for the math functions. */
11526 TREE_NOTHROW (t) = 1;
11527 if (flag_rounding_math)
11529 DECL_PURE_P (t) = 1;
11530 DECL_IS_NOVOPS (t) = 1;
11531 attr_string = ", fp, pure";
11533 else
11535 TREE_READONLY (t) = 1;
11536 attr_string = ", fp, const";
11539 else if ((classify & RS6000_BTC_ATTR_MASK) != 0)
11540 gcc_unreachable ();
11542 if (TARGET_DEBUG_BUILTIN)
11543 fprintf (stderr, "rs6000_builtin, code = %4d, %s%s\n",
11544 (int)code, name, attr_string);
11547 /* Simple ternary operations: VECd = foo (VECa, VECb, VECc). */
11549 #undef RS6000_BUILTIN_1
11550 #undef RS6000_BUILTIN_2
11551 #undef RS6000_BUILTIN_3
11552 #undef RS6000_BUILTIN_A
11553 #undef RS6000_BUILTIN_D
11554 #undef RS6000_BUILTIN_E
11555 #undef RS6000_BUILTIN_H
11556 #undef RS6000_BUILTIN_P
11557 #undef RS6000_BUILTIN_Q
11558 #undef RS6000_BUILTIN_S
11559 #undef RS6000_BUILTIN_X
11561 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11562 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11563 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE) \
11564 { MASK, ICODE, NAME, ENUM },
11566 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11567 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11568 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11569 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11570 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11571 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11572 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11573 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11575 static const struct builtin_description bdesc_3arg[] =
11577 #include "rs6000-builtin.def"
11580 /* DST operations: void foo (void *, const int, const char). */
11582 #undef RS6000_BUILTIN_1
11583 #undef RS6000_BUILTIN_2
11584 #undef RS6000_BUILTIN_3
11585 #undef RS6000_BUILTIN_A
11586 #undef RS6000_BUILTIN_D
11587 #undef RS6000_BUILTIN_E
11588 #undef RS6000_BUILTIN_H
11589 #undef RS6000_BUILTIN_P
11590 #undef RS6000_BUILTIN_Q
11591 #undef RS6000_BUILTIN_S
11592 #undef RS6000_BUILTIN_X
11594 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11595 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11596 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11597 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11598 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE) \
11599 { MASK, ICODE, NAME, ENUM },
11601 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11602 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11603 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11604 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11605 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11606 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11608 static const struct builtin_description bdesc_dst[] =
11610 #include "rs6000-builtin.def"
11613 /* Simple binary operations: VECc = foo (VECa, VECb). */
11615 #undef RS6000_BUILTIN_1
11616 #undef RS6000_BUILTIN_2
11617 #undef RS6000_BUILTIN_3
11618 #undef RS6000_BUILTIN_A
11619 #undef RS6000_BUILTIN_D
11620 #undef RS6000_BUILTIN_E
11621 #undef RS6000_BUILTIN_H
11622 #undef RS6000_BUILTIN_P
11623 #undef RS6000_BUILTIN_Q
11624 #undef RS6000_BUILTIN_S
11625 #undef RS6000_BUILTIN_X
11627 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11628 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE) \
11629 { MASK, ICODE, NAME, ENUM },
11631 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11632 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11633 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11634 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11635 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11636 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11637 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11638 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11639 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11641 static const struct builtin_description bdesc_2arg[] =
11643 #include "rs6000-builtin.def"
11646 #undef RS6000_BUILTIN_1
11647 #undef RS6000_BUILTIN_2
11648 #undef RS6000_BUILTIN_3
11649 #undef RS6000_BUILTIN_A
11650 #undef RS6000_BUILTIN_D
11651 #undef RS6000_BUILTIN_E
11652 #undef RS6000_BUILTIN_H
11653 #undef RS6000_BUILTIN_P
11654 #undef RS6000_BUILTIN_Q
11655 #undef RS6000_BUILTIN_S
11656 #undef RS6000_BUILTIN_X
11658 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11659 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11660 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11661 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11662 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11663 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11664 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11665 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE) \
11666 { MASK, ICODE, NAME, ENUM },
11668 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11669 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11670 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11672 /* AltiVec predicates. */
11674 static const struct builtin_description bdesc_altivec_preds[] =
11676 #include "rs6000-builtin.def"
11679 /* SPE predicates. */
11680 #undef RS6000_BUILTIN_1
11681 #undef RS6000_BUILTIN_2
11682 #undef RS6000_BUILTIN_3
11683 #undef RS6000_BUILTIN_A
11684 #undef RS6000_BUILTIN_D
11685 #undef RS6000_BUILTIN_E
11686 #undef RS6000_BUILTIN_H
11687 #undef RS6000_BUILTIN_P
11688 #undef RS6000_BUILTIN_Q
11689 #undef RS6000_BUILTIN_S
11690 #undef RS6000_BUILTIN_X
11692 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11693 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11694 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11695 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11696 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11697 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11698 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11699 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11700 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11701 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE) \
11702 { MASK, ICODE, NAME, ENUM },
11704 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11706 static const struct builtin_description bdesc_spe_predicates[] =
11708 #include "rs6000-builtin.def"
11711 /* SPE evsel predicates. */
11712 #undef RS6000_BUILTIN_1
11713 #undef RS6000_BUILTIN_2
11714 #undef RS6000_BUILTIN_3
11715 #undef RS6000_BUILTIN_A
11716 #undef RS6000_BUILTIN_D
11717 #undef RS6000_BUILTIN_E
11718 #undef RS6000_BUILTIN_H
11719 #undef RS6000_BUILTIN_P
11720 #undef RS6000_BUILTIN_Q
11721 #undef RS6000_BUILTIN_S
11722 #undef RS6000_BUILTIN_X
11724 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11725 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11726 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11727 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11728 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11729 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE) \
11730 { MASK, ICODE, NAME, ENUM },
11732 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11733 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11734 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11735 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11736 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11738 static const struct builtin_description bdesc_spe_evsel[] =
11740 #include "rs6000-builtin.def"
11743 /* PAIRED predicates. */
11744 #undef RS6000_BUILTIN_1
11745 #undef RS6000_BUILTIN_2
11746 #undef RS6000_BUILTIN_3
11747 #undef RS6000_BUILTIN_A
11748 #undef RS6000_BUILTIN_D
11749 #undef RS6000_BUILTIN_E
11750 #undef RS6000_BUILTIN_H
11751 #undef RS6000_BUILTIN_P
11752 #undef RS6000_BUILTIN_Q
11753 #undef RS6000_BUILTIN_S
11754 #undef RS6000_BUILTIN_X
11756 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11757 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11758 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11759 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11760 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11761 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11762 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11763 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11764 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE) \
11765 { MASK, ICODE, NAME, ENUM },
11767 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11768 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11770 static const struct builtin_description bdesc_paired_preds[] =
11772 #include "rs6000-builtin.def"
11775 /* ABS* operations. */
11777 #undef RS6000_BUILTIN_1
11778 #undef RS6000_BUILTIN_2
11779 #undef RS6000_BUILTIN_3
11780 #undef RS6000_BUILTIN_A
11781 #undef RS6000_BUILTIN_D
11782 #undef RS6000_BUILTIN_E
11783 #undef RS6000_BUILTIN_H
11784 #undef RS6000_BUILTIN_P
11785 #undef RS6000_BUILTIN_Q
11786 #undef RS6000_BUILTIN_S
11787 #undef RS6000_BUILTIN_X
11789 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11790 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11791 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11792 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE) \
11793 { MASK, ICODE, NAME, ENUM },
11795 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11796 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11797 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11798 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11799 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11800 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11801 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11803 static const struct builtin_description bdesc_abs[] =
11805 #include "rs6000-builtin.def"
11808 /* Simple unary operations: VECb = foo (unsigned literal) or VECb =
11809 foo (VECa). */
11811 #undef RS6000_BUILTIN_1
11812 #undef RS6000_BUILTIN_2
11813 #undef RS6000_BUILTIN_3
11814 #undef RS6000_BUILTIN_A
11815 #undef RS6000_BUILTIN_D
11816 #undef RS6000_BUILTIN_E
11817 #undef RS6000_BUILTIN_H
11818 #undef RS6000_BUILTIN_P
11819 #undef RS6000_BUILTIN_Q
11820 #undef RS6000_BUILTIN_S
11821 #undef RS6000_BUILTIN_X
11823 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE) \
11824 { MASK, ICODE, NAME, ENUM },
11826 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11827 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11828 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11829 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11830 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11831 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE)
11832 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11833 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11834 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11835 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11837 static const struct builtin_description bdesc_1arg[] =
11839 #include "rs6000-builtin.def"
11842 /* HTM builtins. */
11843 #undef RS6000_BUILTIN_1
11844 #undef RS6000_BUILTIN_2
11845 #undef RS6000_BUILTIN_3
11846 #undef RS6000_BUILTIN_A
11847 #undef RS6000_BUILTIN_D
11848 #undef RS6000_BUILTIN_E
11849 #undef RS6000_BUILTIN_H
11850 #undef RS6000_BUILTIN_P
11851 #undef RS6000_BUILTIN_Q
11852 #undef RS6000_BUILTIN_S
11853 #undef RS6000_BUILTIN_X
11855 #define RS6000_BUILTIN_1(ENUM, NAME, MASK, ATTR, ICODE)
11856 #define RS6000_BUILTIN_2(ENUM, NAME, MASK, ATTR, ICODE)
11857 #define RS6000_BUILTIN_3(ENUM, NAME, MASK, ATTR, ICODE)
11858 #define RS6000_BUILTIN_A(ENUM, NAME, MASK, ATTR, ICODE)
11859 #define RS6000_BUILTIN_D(ENUM, NAME, MASK, ATTR, ICODE)
11860 #define RS6000_BUILTIN_E(ENUM, NAME, MASK, ATTR, ICODE)
11861 #define RS6000_BUILTIN_H(ENUM, NAME, MASK, ATTR, ICODE) \
11862 { MASK, ICODE, NAME, ENUM },
11864 #define RS6000_BUILTIN_P(ENUM, NAME, MASK, ATTR, ICODE)
11865 #define RS6000_BUILTIN_Q(ENUM, NAME, MASK, ATTR, ICODE)
11866 #define RS6000_BUILTIN_S(ENUM, NAME, MASK, ATTR, ICODE)
11867 #define RS6000_BUILTIN_X(ENUM, NAME, MASK, ATTR, ICODE)
11869 static const struct builtin_description bdesc_htm[] =
11871 #include "rs6000-builtin.def"
11874 #undef RS6000_BUILTIN_1
11875 #undef RS6000_BUILTIN_2
11876 #undef RS6000_BUILTIN_3
11877 #undef RS6000_BUILTIN_A
11878 #undef RS6000_BUILTIN_D
11879 #undef RS6000_BUILTIN_E
11880 #undef RS6000_BUILTIN_H
11881 #undef RS6000_BUILTIN_P
11882 #undef RS6000_BUILTIN_Q
11883 #undef RS6000_BUILTIN_S
11885 /* Return true if a builtin function is overloaded. */
11886 bool
11887 rs6000_overloaded_builtin_p (enum rs6000_builtins fncode)
11889 return (rs6000_builtin_info[(int)fncode].attr & RS6000_BTC_OVERLOADED) != 0;
11892 /* Expand an expression EXP that calls a builtin without arguments. */
11893 static rtx
11894 rs6000_expand_zeroop_builtin (enum insn_code icode, rtx target)
11896 rtx pat;
11897 machine_mode tmode = insn_data[icode].operand[0].mode;
11899 if (icode == CODE_FOR_nothing)
11900 /* Builtin not supported on this processor. */
11901 return 0;
11903 if (target == 0
11904 || GET_MODE (target) != tmode
11905 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11906 target = gen_reg_rtx (tmode);
11908 pat = GEN_FCN (icode) (target);
11909 if (! pat)
11910 return 0;
11911 emit_insn (pat);
11913 return target;
11917 static rtx
11918 rs6000_expand_mtfsf_builtin (enum insn_code icode, tree exp)
11920 rtx pat;
11921 tree arg0 = CALL_EXPR_ARG (exp, 0);
11922 tree arg1 = CALL_EXPR_ARG (exp, 1);
11923 rtx op0 = expand_normal (arg0);
11924 rtx op1 = expand_normal (arg1);
11925 machine_mode mode0 = insn_data[icode].operand[0].mode;
11926 machine_mode mode1 = insn_data[icode].operand[1].mode;
11928 if (icode == CODE_FOR_nothing)
11929 /* Builtin not supported on this processor. */
11930 return 0;
11932 /* If we got invalid arguments bail out before generating bad rtl. */
11933 if (arg0 == error_mark_node || arg1 == error_mark_node)
11934 return const0_rtx;
11936 if (GET_CODE (op0) != CONST_INT
11937 || INTVAL (op0) > 255
11938 || INTVAL (op0) < 0)
11940 error ("argument 1 must be an 8-bit field value");
11941 return const0_rtx;
11944 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
11945 op0 = copy_to_mode_reg (mode0, op0);
11947 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
11948 op1 = copy_to_mode_reg (mode1, op1);
11950 pat = GEN_FCN (icode) (op0, op1);
11951 if (! pat)
11952 return const0_rtx;
11953 emit_insn (pat);
11955 return NULL_RTX;
11959 static rtx
11960 rs6000_expand_unop_builtin (enum insn_code icode, tree exp, rtx target)
11962 rtx pat;
11963 tree arg0 = CALL_EXPR_ARG (exp, 0);
11964 rtx op0 = expand_normal (arg0);
11965 machine_mode tmode = insn_data[icode].operand[0].mode;
11966 machine_mode mode0 = insn_data[icode].operand[1].mode;
11968 if (icode == CODE_FOR_nothing)
11969 /* Builtin not supported on this processor. */
11970 return 0;
11972 /* If we got invalid arguments bail out before generating bad rtl. */
11973 if (arg0 == error_mark_node)
11974 return const0_rtx;
11976 if (icode == CODE_FOR_altivec_vspltisb
11977 || icode == CODE_FOR_altivec_vspltish
11978 || icode == CODE_FOR_altivec_vspltisw
11979 || icode == CODE_FOR_spe_evsplatfi
11980 || icode == CODE_FOR_spe_evsplati)
11982 /* Only allow 5-bit *signed* literals. */
11983 if (GET_CODE (op0) != CONST_INT
11984 || INTVAL (op0) > 15
11985 || INTVAL (op0) < -16)
11987 error ("argument 1 must be a 5-bit signed literal");
11988 return const0_rtx;
11992 if (target == 0
11993 || GET_MODE (target) != tmode
11994 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11995 target = gen_reg_rtx (tmode);
11997 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
11998 op0 = copy_to_mode_reg (mode0, op0);
12000 pat = GEN_FCN (icode) (target, op0);
12001 if (! pat)
12002 return 0;
12003 emit_insn (pat);
12005 return target;
12008 static rtx
12009 altivec_expand_abs_builtin (enum insn_code icode, tree exp, rtx target)
12011 rtx pat, scratch1, scratch2;
12012 tree arg0 = CALL_EXPR_ARG (exp, 0);
12013 rtx op0 = expand_normal (arg0);
12014 machine_mode tmode = insn_data[icode].operand[0].mode;
12015 machine_mode mode0 = insn_data[icode].operand[1].mode;
12017 /* If we have invalid arguments, bail out before generating bad rtl. */
12018 if (arg0 == error_mark_node)
12019 return const0_rtx;
12021 if (target == 0
12022 || GET_MODE (target) != tmode
12023 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12024 target = gen_reg_rtx (tmode);
12026 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12027 op0 = copy_to_mode_reg (mode0, op0);
12029 scratch1 = gen_reg_rtx (mode0);
12030 scratch2 = gen_reg_rtx (mode0);
12032 pat = GEN_FCN (icode) (target, op0, scratch1, scratch2);
12033 if (! pat)
12034 return 0;
12035 emit_insn (pat);
12037 return target;
12040 static rtx
12041 rs6000_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
12043 rtx pat;
12044 tree arg0 = CALL_EXPR_ARG (exp, 0);
12045 tree arg1 = CALL_EXPR_ARG (exp, 1);
12046 rtx op0 = expand_normal (arg0);
12047 rtx op1 = expand_normal (arg1);
12048 machine_mode tmode = insn_data[icode].operand[0].mode;
12049 machine_mode mode0 = insn_data[icode].operand[1].mode;
12050 machine_mode mode1 = insn_data[icode].operand[2].mode;
12052 if (icode == CODE_FOR_nothing)
12053 /* Builtin not supported on this processor. */
12054 return 0;
12056 /* If we got invalid arguments bail out before generating bad rtl. */
12057 if (arg0 == error_mark_node || arg1 == error_mark_node)
12058 return const0_rtx;
12060 if (icode == CODE_FOR_altivec_vcfux
12061 || icode == CODE_FOR_altivec_vcfsx
12062 || icode == CODE_FOR_altivec_vctsxs
12063 || icode == CODE_FOR_altivec_vctuxs
12064 || icode == CODE_FOR_altivec_vspltb
12065 || icode == CODE_FOR_altivec_vsplth
12066 || icode == CODE_FOR_altivec_vspltw
12067 || icode == CODE_FOR_spe_evaddiw
12068 || icode == CODE_FOR_spe_evldd
12069 || icode == CODE_FOR_spe_evldh
12070 || icode == CODE_FOR_spe_evldw
12071 || icode == CODE_FOR_spe_evlhhesplat
12072 || icode == CODE_FOR_spe_evlhhossplat
12073 || icode == CODE_FOR_spe_evlhhousplat
12074 || icode == CODE_FOR_spe_evlwhe
12075 || icode == CODE_FOR_spe_evlwhos
12076 || icode == CODE_FOR_spe_evlwhou
12077 || icode == CODE_FOR_spe_evlwhsplat
12078 || icode == CODE_FOR_spe_evlwwsplat
12079 || icode == CODE_FOR_spe_evrlwi
12080 || icode == CODE_FOR_spe_evslwi
12081 || icode == CODE_FOR_spe_evsrwis
12082 || icode == CODE_FOR_spe_evsubifw
12083 || icode == CODE_FOR_spe_evsrwiu)
12085 /* Only allow 5-bit unsigned literals. */
12086 STRIP_NOPS (arg1);
12087 if (TREE_CODE (arg1) != INTEGER_CST
12088 || TREE_INT_CST_LOW (arg1) & ~0x1f)
12090 error ("argument 2 must be a 5-bit unsigned literal");
12091 return const0_rtx;
12095 if (target == 0
12096 || GET_MODE (target) != tmode
12097 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12098 target = gen_reg_rtx (tmode);
12100 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12101 op0 = copy_to_mode_reg (mode0, op0);
12102 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12103 op1 = copy_to_mode_reg (mode1, op1);
12105 pat = GEN_FCN (icode) (target, op0, op1);
12106 if (! pat)
12107 return 0;
12108 emit_insn (pat);
12110 return target;
12113 static rtx
12114 altivec_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
12116 rtx pat, scratch;
12117 tree cr6_form = CALL_EXPR_ARG (exp, 0);
12118 tree arg0 = CALL_EXPR_ARG (exp, 1);
12119 tree arg1 = CALL_EXPR_ARG (exp, 2);
12120 rtx op0 = expand_normal (arg0);
12121 rtx op1 = expand_normal (arg1);
12122 machine_mode tmode = SImode;
12123 machine_mode mode0 = insn_data[icode].operand[1].mode;
12124 machine_mode mode1 = insn_data[icode].operand[2].mode;
12125 int cr6_form_int;
12127 if (TREE_CODE (cr6_form) != INTEGER_CST)
12129 error ("argument 1 of __builtin_altivec_predicate must be a constant");
12130 return const0_rtx;
12132 else
12133 cr6_form_int = TREE_INT_CST_LOW (cr6_form);
12135 gcc_assert (mode0 == mode1);
12137 /* If we have invalid arguments, bail out before generating bad rtl. */
12138 if (arg0 == error_mark_node || arg1 == error_mark_node)
12139 return const0_rtx;
12141 if (target == 0
12142 || GET_MODE (target) != tmode
12143 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12144 target = gen_reg_rtx (tmode);
12146 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12147 op0 = copy_to_mode_reg (mode0, op0);
12148 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12149 op1 = copy_to_mode_reg (mode1, op1);
12151 scratch = gen_reg_rtx (mode0);
12153 pat = GEN_FCN (icode) (scratch, op0, op1);
12154 if (! pat)
12155 return 0;
12156 emit_insn (pat);
12158 /* The vec_any* and vec_all* predicates use the same opcodes for two
12159 different operations, but the bits in CR6 will be different
12160 depending on what information we want. So we have to play tricks
12161 with CR6 to get the right bits out.
12163 If you think this is disgusting, look at the specs for the
12164 AltiVec predicates. */
12166 switch (cr6_form_int)
12168 case 0:
12169 emit_insn (gen_cr6_test_for_zero (target));
12170 break;
12171 case 1:
12172 emit_insn (gen_cr6_test_for_zero_reverse (target));
12173 break;
12174 case 2:
12175 emit_insn (gen_cr6_test_for_lt (target));
12176 break;
12177 case 3:
12178 emit_insn (gen_cr6_test_for_lt_reverse (target));
12179 break;
12180 default:
12181 error ("argument 1 of __builtin_altivec_predicate is out of range");
12182 break;
12185 return target;
12188 static rtx
12189 paired_expand_lv_builtin (enum insn_code icode, tree exp, rtx target)
12191 rtx pat, addr;
12192 tree arg0 = CALL_EXPR_ARG (exp, 0);
12193 tree arg1 = CALL_EXPR_ARG (exp, 1);
12194 machine_mode tmode = insn_data[icode].operand[0].mode;
12195 machine_mode mode0 = Pmode;
12196 machine_mode mode1 = Pmode;
12197 rtx op0 = expand_normal (arg0);
12198 rtx op1 = expand_normal (arg1);
12200 if (icode == CODE_FOR_nothing)
12201 /* Builtin not supported on this processor. */
12202 return 0;
12204 /* If we got invalid arguments bail out before generating bad rtl. */
12205 if (arg0 == error_mark_node || arg1 == error_mark_node)
12206 return const0_rtx;
12208 if (target == 0
12209 || GET_MODE (target) != tmode
12210 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12211 target = gen_reg_rtx (tmode);
12213 op1 = copy_to_mode_reg (mode1, op1);
12215 if (op0 == const0_rtx)
12217 addr = gen_rtx_MEM (tmode, op1);
12219 else
12221 op0 = copy_to_mode_reg (mode0, op0);
12222 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op0, op1));
12225 pat = GEN_FCN (icode) (target, addr);
12227 if (! pat)
12228 return 0;
12229 emit_insn (pat);
12231 return target;
12234 /* Return a constant vector for use as a little-endian permute control vector
12235 to reverse the order of elements of the given vector mode. */
12236 static rtx
12237 swap_selector_for_mode (machine_mode mode)
12239 /* These are little endian vectors, so their elements are reversed
12240 from what you would normally expect for a permute control vector. */
12241 unsigned int swap2[16] = {7,6,5,4,3,2,1,0,15,14,13,12,11,10,9,8};
12242 unsigned int swap4[16] = {3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12};
12243 unsigned int swap8[16] = {1,0,3,2,5,4,7,6,9,8,11,10,13,12,15,14};
12244 unsigned int swap16[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15};
12245 unsigned int *swaparray, i;
12246 rtx perm[16];
12248 switch (mode)
12250 case V2DFmode:
12251 case V2DImode:
12252 swaparray = swap2;
12253 break;
12254 case V4SFmode:
12255 case V4SImode:
12256 swaparray = swap4;
12257 break;
12258 case V8HImode:
12259 swaparray = swap8;
12260 break;
12261 case V16QImode:
12262 swaparray = swap16;
12263 break;
12264 default:
12265 gcc_unreachable ();
12268 for (i = 0; i < 16; ++i)
12269 perm[i] = GEN_INT (swaparray[i]);
12271 return force_reg (V16QImode, gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm)));
12274 /* Generate code for an "lvx", "lvxl", or "lve*x" built-in for a little endian target
12275 with -maltivec=be specified. Issue the load followed by an element-reversing
12276 permute. */
12277 void
12278 altivec_expand_lvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12280 rtx tmp = gen_reg_rtx (mode);
12281 rtx load = gen_rtx_SET (VOIDmode, tmp, op1);
12282 rtx lvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12283 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, load, lvx));
12284 rtx sel = swap_selector_for_mode (mode);
12285 rtx vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, tmp, tmp, sel), UNSPEC_VPERM);
12287 gcc_assert (REG_P (op0));
12288 emit_insn (par);
12289 emit_insn (gen_rtx_SET (VOIDmode, op0, vperm));
12292 /* Generate code for a "stvx" or "stvxl" built-in for a little endian target
12293 with -maltivec=be specified. Issue the store preceded by an element-reversing
12294 permute. */
12295 void
12296 altivec_expand_stvx_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12298 rtx tmp = gen_reg_rtx (mode);
12299 rtx store = gen_rtx_SET (VOIDmode, op0, tmp);
12300 rtx stvx = gen_rtx_UNSPEC (mode, gen_rtvec (1, const0_rtx), unspec);
12301 rtx par = gen_rtx_PARALLEL (mode, gen_rtvec (2, store, stvx));
12302 rtx sel = swap_selector_for_mode (mode);
12303 rtx vperm;
12305 gcc_assert (REG_P (op1));
12306 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12307 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12308 emit_insn (par);
12311 /* Generate code for a "stve*x" built-in for a little endian target with -maltivec=be
12312 specified. Issue the store preceded by an element-reversing permute. */
12313 void
12314 altivec_expand_stvex_be (rtx op0, rtx op1, machine_mode mode, unsigned unspec)
12316 machine_mode inner_mode = GET_MODE_INNER (mode);
12317 rtx tmp = gen_reg_rtx (mode);
12318 rtx stvx = gen_rtx_UNSPEC (inner_mode, gen_rtvec (1, tmp), unspec);
12319 rtx sel = swap_selector_for_mode (mode);
12320 rtx vperm;
12322 gcc_assert (REG_P (op1));
12323 vperm = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op1, sel), UNSPEC_VPERM);
12324 emit_insn (gen_rtx_SET (VOIDmode, tmp, vperm));
12325 emit_insn (gen_rtx_SET (VOIDmode, op0, stvx));
12328 static rtx
12329 altivec_expand_lv_builtin (enum insn_code icode, tree exp, rtx target, bool blk)
12331 rtx pat, addr;
12332 tree arg0 = CALL_EXPR_ARG (exp, 0);
12333 tree arg1 = CALL_EXPR_ARG (exp, 1);
12334 machine_mode tmode = insn_data[icode].operand[0].mode;
12335 machine_mode mode0 = Pmode;
12336 machine_mode mode1 = Pmode;
12337 rtx op0 = expand_normal (arg0);
12338 rtx op1 = expand_normal (arg1);
12340 if (icode == CODE_FOR_nothing)
12341 /* Builtin not supported on this processor. */
12342 return 0;
12344 /* If we got invalid arguments bail out before generating bad rtl. */
12345 if (arg0 == error_mark_node || arg1 == error_mark_node)
12346 return const0_rtx;
12348 if (target == 0
12349 || GET_MODE (target) != tmode
12350 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12351 target = gen_reg_rtx (tmode);
12353 op1 = copy_to_mode_reg (mode1, op1);
12355 if (op0 == const0_rtx)
12357 addr = gen_rtx_MEM (blk ? BLKmode : tmode, op1);
12359 else
12361 op0 = copy_to_mode_reg (mode0, op0);
12362 addr = gen_rtx_MEM (blk ? BLKmode : tmode, gen_rtx_PLUS (Pmode, op0, op1));
12365 pat = GEN_FCN (icode) (target, addr);
12367 if (! pat)
12368 return 0;
12369 emit_insn (pat);
12371 return target;
12374 static rtx
12375 spe_expand_stv_builtin (enum insn_code icode, tree exp)
12377 tree arg0 = CALL_EXPR_ARG (exp, 0);
12378 tree arg1 = CALL_EXPR_ARG (exp, 1);
12379 tree arg2 = CALL_EXPR_ARG (exp, 2);
12380 rtx op0 = expand_normal (arg0);
12381 rtx op1 = expand_normal (arg1);
12382 rtx op2 = expand_normal (arg2);
12383 rtx pat;
12384 machine_mode mode0 = insn_data[icode].operand[0].mode;
12385 machine_mode mode1 = insn_data[icode].operand[1].mode;
12386 machine_mode mode2 = insn_data[icode].operand[2].mode;
12388 /* Invalid arguments. Bail before doing anything stoopid! */
12389 if (arg0 == error_mark_node
12390 || arg1 == error_mark_node
12391 || arg2 == error_mark_node)
12392 return const0_rtx;
12394 if (! (*insn_data[icode].operand[2].predicate) (op0, mode2))
12395 op0 = copy_to_mode_reg (mode2, op0);
12396 if (! (*insn_data[icode].operand[0].predicate) (op1, mode0))
12397 op1 = copy_to_mode_reg (mode0, op1);
12398 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
12399 op2 = copy_to_mode_reg (mode1, op2);
12401 pat = GEN_FCN (icode) (op1, op2, op0);
12402 if (pat)
12403 emit_insn (pat);
12404 return NULL_RTX;
12407 static rtx
12408 paired_expand_stv_builtin (enum insn_code icode, tree exp)
12410 tree arg0 = CALL_EXPR_ARG (exp, 0);
12411 tree arg1 = CALL_EXPR_ARG (exp, 1);
12412 tree arg2 = CALL_EXPR_ARG (exp, 2);
12413 rtx op0 = expand_normal (arg0);
12414 rtx op1 = expand_normal (arg1);
12415 rtx op2 = expand_normal (arg2);
12416 rtx pat, addr;
12417 machine_mode tmode = insn_data[icode].operand[0].mode;
12418 machine_mode mode1 = Pmode;
12419 machine_mode mode2 = Pmode;
12421 /* Invalid arguments. Bail before doing anything stoopid! */
12422 if (arg0 == error_mark_node
12423 || arg1 == error_mark_node
12424 || arg2 == error_mark_node)
12425 return const0_rtx;
12427 if (! (*insn_data[icode].operand[1].predicate) (op0, tmode))
12428 op0 = copy_to_mode_reg (tmode, op0);
12430 op2 = copy_to_mode_reg (mode2, op2);
12432 if (op1 == const0_rtx)
12434 addr = gen_rtx_MEM (tmode, op2);
12436 else
12438 op1 = copy_to_mode_reg (mode1, op1);
12439 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12442 pat = GEN_FCN (icode) (addr, op0);
12443 if (pat)
12444 emit_insn (pat);
12445 return NULL_RTX;
12448 static rtx
12449 altivec_expand_stv_builtin (enum insn_code icode, tree exp)
12451 tree arg0 = CALL_EXPR_ARG (exp, 0);
12452 tree arg1 = CALL_EXPR_ARG (exp, 1);
12453 tree arg2 = CALL_EXPR_ARG (exp, 2);
12454 rtx op0 = expand_normal (arg0);
12455 rtx op1 = expand_normal (arg1);
12456 rtx op2 = expand_normal (arg2);
12457 rtx pat, addr;
12458 machine_mode tmode = insn_data[icode].operand[0].mode;
12459 machine_mode smode = insn_data[icode].operand[1].mode;
12460 machine_mode mode1 = Pmode;
12461 machine_mode mode2 = Pmode;
12463 /* Invalid arguments. Bail before doing anything stoopid! */
12464 if (arg0 == error_mark_node
12465 || arg1 == error_mark_node
12466 || arg2 == error_mark_node)
12467 return const0_rtx;
12469 if (! (*insn_data[icode].operand[1].predicate) (op0, smode))
12470 op0 = copy_to_mode_reg (smode, op0);
12472 op2 = copy_to_mode_reg (mode2, op2);
12474 if (op1 == const0_rtx)
12476 addr = gen_rtx_MEM (tmode, op2);
12478 else
12480 op1 = copy_to_mode_reg (mode1, op1);
12481 addr = gen_rtx_MEM (tmode, gen_rtx_PLUS (Pmode, op1, op2));
12484 pat = GEN_FCN (icode) (addr, op0);
12485 if (pat)
12486 emit_insn (pat);
12487 return NULL_RTX;
12490 /* Return the appropriate SPR number associated with the given builtin. */
12491 static inline HOST_WIDE_INT
12492 htm_spr_num (enum rs6000_builtins code)
12494 if (code == HTM_BUILTIN_GET_TFHAR
12495 || code == HTM_BUILTIN_SET_TFHAR)
12496 return TFHAR_SPR;
12497 else if (code == HTM_BUILTIN_GET_TFIAR
12498 || code == HTM_BUILTIN_SET_TFIAR)
12499 return TFIAR_SPR;
12500 else if (code == HTM_BUILTIN_GET_TEXASR
12501 || code == HTM_BUILTIN_SET_TEXASR)
12502 return TEXASR_SPR;
12503 gcc_assert (code == HTM_BUILTIN_GET_TEXASRU
12504 || code == HTM_BUILTIN_SET_TEXASRU);
12505 return TEXASRU_SPR;
12508 /* Return the appropriate SPR regno associated with the given builtin. */
12509 static inline HOST_WIDE_INT
12510 htm_spr_regno (enum rs6000_builtins code)
12512 if (code == HTM_BUILTIN_GET_TFHAR
12513 || code == HTM_BUILTIN_SET_TFHAR)
12514 return TFHAR_REGNO;
12515 else if (code == HTM_BUILTIN_GET_TFIAR
12516 || code == HTM_BUILTIN_SET_TFIAR)
12517 return TFIAR_REGNO;
12518 gcc_assert (code == HTM_BUILTIN_GET_TEXASR
12519 || code == HTM_BUILTIN_SET_TEXASR
12520 || code == HTM_BUILTIN_GET_TEXASRU
12521 || code == HTM_BUILTIN_SET_TEXASRU);
12522 return TEXASR_REGNO;
12525 /* Return the correct ICODE value depending on whether we are
12526 setting or reading the HTM SPRs. */
12527 static inline enum insn_code
12528 rs6000_htm_spr_icode (bool nonvoid)
12530 if (nonvoid)
12531 return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si;
12532 else
12533 return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si;
12536 /* Expand the HTM builtin in EXP and store the result in TARGET.
12537 Store true in *EXPANDEDP if we found a builtin to expand. */
12538 static rtx
12539 htm_expand_builtin (tree exp, rtx target, bool * expandedp)
12541 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12542 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
12543 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12544 const struct builtin_description *d;
12545 size_t i;
12547 *expandedp = false;
12549 /* Expand the HTM builtins. */
12550 d = bdesc_htm;
12551 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
12552 if (d->code == fcode)
12554 rtx op[MAX_HTM_OPERANDS], pat;
12555 int nopnds = 0;
12556 tree arg;
12557 call_expr_arg_iterator iter;
12558 unsigned attr = rs6000_builtin_info[fcode].attr;
12559 enum insn_code icode = d->icode;
12561 if (attr & RS6000_BTC_SPR)
12562 icode = rs6000_htm_spr_icode (nonvoid);
12564 if (nonvoid)
12566 machine_mode tmode = insn_data[icode].operand[0].mode;
12567 if (!target
12568 || GET_MODE (target) != tmode
12569 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
12570 target = gen_reg_rtx (tmode);
12571 op[nopnds++] = target;
12574 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
12576 const struct insn_operand_data *insn_op;
12578 if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS)
12579 return NULL_RTX;
12581 insn_op = &insn_data[icode].operand[nopnds];
12583 op[nopnds] = expand_normal (arg);
12585 if (!(*insn_op->predicate) (op[nopnds], insn_op->mode))
12587 if (!strcmp (insn_op->constraint, "n"))
12589 int arg_num = (nonvoid) ? nopnds : nopnds + 1;
12590 if (!CONST_INT_P (op[nopnds]))
12591 error ("argument %d must be an unsigned literal", arg_num);
12592 else
12593 error ("argument %d is an unsigned literal that is "
12594 "out of range", arg_num);
12595 return const0_rtx;
12597 op[nopnds] = copy_to_mode_reg (insn_op->mode, op[nopnds]);
12600 nopnds++;
12603 /* Handle the builtins for extended mnemonics. These accept
12604 no arguments, but map to builtins that take arguments. */
12605 switch (fcode)
12607 case HTM_BUILTIN_TENDALL: /* Alias for: tend. 1 */
12608 case HTM_BUILTIN_TRESUME: /* Alias for: tsr. 1 */
12609 op[nopnds++] = GEN_INT (1);
12610 #ifdef ENABLE_CHECKING
12611 attr |= RS6000_BTC_UNARY;
12612 #endif
12613 break;
12614 case HTM_BUILTIN_TSUSPEND: /* Alias for: tsr. 0 */
12615 op[nopnds++] = GEN_INT (0);
12616 #ifdef ENABLE_CHECKING
12617 attr |= RS6000_BTC_UNARY;
12618 #endif
12619 break;
12620 default:
12621 break;
12624 /* If this builtin accesses SPRs, then pass in the appropriate
12625 SPR number and SPR regno as the last two operands. */
12626 if (attr & RS6000_BTC_SPR)
12628 op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode));
12629 op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode));
12632 #ifdef ENABLE_CHECKING
12633 int expected_nopnds = 0;
12634 if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_UNARY)
12635 expected_nopnds = 1;
12636 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_BINARY)
12637 expected_nopnds = 2;
12638 else if ((attr & RS6000_BTC_TYPE_MASK) == RS6000_BTC_TERNARY)
12639 expected_nopnds = 3;
12640 if (!(attr & RS6000_BTC_VOID))
12641 expected_nopnds += 1;
12642 if (attr & RS6000_BTC_SPR)
12643 expected_nopnds += 2;
12645 gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS);
12646 #endif
12648 switch (nopnds)
12650 case 1:
12651 pat = GEN_FCN (icode) (op[0]);
12652 break;
12653 case 2:
12654 pat = GEN_FCN (icode) (op[0], op[1]);
12655 break;
12656 case 3:
12657 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
12658 break;
12659 case 4:
12660 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
12661 break;
12662 default:
12663 gcc_unreachable ();
12665 if (!pat)
12666 return NULL_RTX;
12667 emit_insn (pat);
12669 *expandedp = true;
12670 if (nonvoid)
12671 return target;
12672 return const0_rtx;
12675 return NULL_RTX;
12678 static rtx
12679 rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
12681 rtx pat;
12682 tree arg0 = CALL_EXPR_ARG (exp, 0);
12683 tree arg1 = CALL_EXPR_ARG (exp, 1);
12684 tree arg2 = CALL_EXPR_ARG (exp, 2);
12685 rtx op0 = expand_normal (arg0);
12686 rtx op1 = expand_normal (arg1);
12687 rtx op2 = expand_normal (arg2);
12688 machine_mode tmode = insn_data[icode].operand[0].mode;
12689 machine_mode mode0 = insn_data[icode].operand[1].mode;
12690 machine_mode mode1 = insn_data[icode].operand[2].mode;
12691 machine_mode mode2 = insn_data[icode].operand[3].mode;
12693 if (icode == CODE_FOR_nothing)
12694 /* Builtin not supported on this processor. */
12695 return 0;
12697 /* If we got invalid arguments bail out before generating bad rtl. */
12698 if (arg0 == error_mark_node
12699 || arg1 == error_mark_node
12700 || arg2 == error_mark_node)
12701 return const0_rtx;
12703 /* Check and prepare argument depending on the instruction code.
12705 Note that a switch statement instead of the sequence of tests
12706 would be incorrect as many of the CODE_FOR values could be
12707 CODE_FOR_nothing and that would yield multiple alternatives
12708 with identical values. We'd never reach here at runtime in
12709 this case. */
12710 if (icode == CODE_FOR_altivec_vsldoi_v4sf
12711 || icode == CODE_FOR_altivec_vsldoi_v4si
12712 || icode == CODE_FOR_altivec_vsldoi_v8hi
12713 || icode == CODE_FOR_altivec_vsldoi_v16qi)
12715 /* Only allow 4-bit unsigned literals. */
12716 STRIP_NOPS (arg2);
12717 if (TREE_CODE (arg2) != INTEGER_CST
12718 || TREE_INT_CST_LOW (arg2) & ~0xf)
12720 error ("argument 3 must be a 4-bit unsigned literal");
12721 return const0_rtx;
12724 else if (icode == CODE_FOR_vsx_xxpermdi_v2df
12725 || icode == CODE_FOR_vsx_xxpermdi_v2di
12726 || icode == CODE_FOR_vsx_xxsldwi_v16qi
12727 || icode == CODE_FOR_vsx_xxsldwi_v8hi
12728 || icode == CODE_FOR_vsx_xxsldwi_v4si
12729 || icode == CODE_FOR_vsx_xxsldwi_v4sf
12730 || icode == CODE_FOR_vsx_xxsldwi_v2di
12731 || icode == CODE_FOR_vsx_xxsldwi_v2df)
12733 /* Only allow 2-bit unsigned literals. */
12734 STRIP_NOPS (arg2);
12735 if (TREE_CODE (arg2) != INTEGER_CST
12736 || TREE_INT_CST_LOW (arg2) & ~0x3)
12738 error ("argument 3 must be a 2-bit unsigned literal");
12739 return const0_rtx;
12742 else if (icode == CODE_FOR_vsx_set_v2df
12743 || icode == CODE_FOR_vsx_set_v2di
12744 || icode == CODE_FOR_bcdadd
12745 || icode == CODE_FOR_bcdadd_lt
12746 || icode == CODE_FOR_bcdadd_eq
12747 || icode == CODE_FOR_bcdadd_gt
12748 || icode == CODE_FOR_bcdsub
12749 || icode == CODE_FOR_bcdsub_lt
12750 || icode == CODE_FOR_bcdsub_eq
12751 || icode == CODE_FOR_bcdsub_gt)
12753 /* Only allow 1-bit unsigned literals. */
12754 STRIP_NOPS (arg2);
12755 if (TREE_CODE (arg2) != INTEGER_CST
12756 || TREE_INT_CST_LOW (arg2) & ~0x1)
12758 error ("argument 3 must be a 1-bit unsigned literal");
12759 return const0_rtx;
12762 else if (icode == CODE_FOR_dfp_ddedpd_dd
12763 || icode == CODE_FOR_dfp_ddedpd_td)
12765 /* Only allow 2-bit unsigned literals where the value is 0 or 2. */
12766 STRIP_NOPS (arg0);
12767 if (TREE_CODE (arg0) != INTEGER_CST
12768 || TREE_INT_CST_LOW (arg2) & ~0x3)
12770 error ("argument 1 must be 0 or 2");
12771 return const0_rtx;
12774 else if (icode == CODE_FOR_dfp_denbcd_dd
12775 || icode == CODE_FOR_dfp_denbcd_td)
12777 /* Only allow 1-bit unsigned literals. */
12778 STRIP_NOPS (arg0);
12779 if (TREE_CODE (arg0) != INTEGER_CST
12780 || TREE_INT_CST_LOW (arg0) & ~0x1)
12782 error ("argument 1 must be a 1-bit unsigned literal");
12783 return const0_rtx;
12786 else if (icode == CODE_FOR_dfp_dscli_dd
12787 || icode == CODE_FOR_dfp_dscli_td
12788 || icode == CODE_FOR_dfp_dscri_dd
12789 || icode == CODE_FOR_dfp_dscri_td)
12791 /* Only allow 6-bit unsigned literals. */
12792 STRIP_NOPS (arg1);
12793 if (TREE_CODE (arg1) != INTEGER_CST
12794 || TREE_INT_CST_LOW (arg1) & ~0x3f)
12796 error ("argument 2 must be a 6-bit unsigned literal");
12797 return const0_rtx;
12800 else if (icode == CODE_FOR_crypto_vshasigmaw
12801 || icode == CODE_FOR_crypto_vshasigmad)
12803 /* Check whether the 2nd and 3rd arguments are integer constants and in
12804 range and prepare arguments. */
12805 STRIP_NOPS (arg1);
12806 if (TREE_CODE (arg1) != INTEGER_CST || wi::geu_p (arg1, 2))
12808 error ("argument 2 must be 0 or 1");
12809 return const0_rtx;
12812 STRIP_NOPS (arg2);
12813 if (TREE_CODE (arg2) != INTEGER_CST || wi::geu_p (arg1, 16))
12815 error ("argument 3 must be in the range 0..15");
12816 return const0_rtx;
12820 if (target == 0
12821 || GET_MODE (target) != tmode
12822 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12823 target = gen_reg_rtx (tmode);
12825 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12826 op0 = copy_to_mode_reg (mode0, op0);
12827 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12828 op1 = copy_to_mode_reg (mode1, op1);
12829 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12830 op2 = copy_to_mode_reg (mode2, op2);
12832 if (TARGET_PAIRED_FLOAT && icode == CODE_FOR_selv2sf4)
12833 pat = GEN_FCN (icode) (target, op0, op1, op2, CONST0_RTX (SFmode));
12834 else
12835 pat = GEN_FCN (icode) (target, op0, op1, op2);
12836 if (! pat)
12837 return 0;
12838 emit_insn (pat);
12840 return target;
12843 /* Expand the lvx builtins. */
12844 static rtx
12845 altivec_expand_ld_builtin (tree exp, rtx target, bool *expandedp)
12847 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12848 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12849 tree arg0;
12850 machine_mode tmode, mode0;
12851 rtx pat, op0;
12852 enum insn_code icode;
12854 switch (fcode)
12856 case ALTIVEC_BUILTIN_LD_INTERNAL_16qi:
12857 icode = CODE_FOR_vector_altivec_load_v16qi;
12858 break;
12859 case ALTIVEC_BUILTIN_LD_INTERNAL_8hi:
12860 icode = CODE_FOR_vector_altivec_load_v8hi;
12861 break;
12862 case ALTIVEC_BUILTIN_LD_INTERNAL_4si:
12863 icode = CODE_FOR_vector_altivec_load_v4si;
12864 break;
12865 case ALTIVEC_BUILTIN_LD_INTERNAL_4sf:
12866 icode = CODE_FOR_vector_altivec_load_v4sf;
12867 break;
12868 case ALTIVEC_BUILTIN_LD_INTERNAL_2df:
12869 icode = CODE_FOR_vector_altivec_load_v2df;
12870 break;
12871 case ALTIVEC_BUILTIN_LD_INTERNAL_2di:
12872 icode = CODE_FOR_vector_altivec_load_v2di;
12873 case ALTIVEC_BUILTIN_LD_INTERNAL_1ti:
12874 icode = CODE_FOR_vector_altivec_load_v1ti;
12875 break;
12876 default:
12877 *expandedp = false;
12878 return NULL_RTX;
12881 *expandedp = true;
12883 arg0 = CALL_EXPR_ARG (exp, 0);
12884 op0 = expand_normal (arg0);
12885 tmode = insn_data[icode].operand[0].mode;
12886 mode0 = insn_data[icode].operand[1].mode;
12888 if (target == 0
12889 || GET_MODE (target) != tmode
12890 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12891 target = gen_reg_rtx (tmode);
12893 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12894 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12896 pat = GEN_FCN (icode) (target, op0);
12897 if (! pat)
12898 return 0;
12899 emit_insn (pat);
12900 return target;
12903 /* Expand the stvx builtins. */
12904 static rtx
12905 altivec_expand_st_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12906 bool *expandedp)
12908 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12909 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
12910 tree arg0, arg1;
12911 machine_mode mode0, mode1;
12912 rtx pat, op0, op1;
12913 enum insn_code icode;
12915 switch (fcode)
12917 case ALTIVEC_BUILTIN_ST_INTERNAL_16qi:
12918 icode = CODE_FOR_vector_altivec_store_v16qi;
12919 break;
12920 case ALTIVEC_BUILTIN_ST_INTERNAL_8hi:
12921 icode = CODE_FOR_vector_altivec_store_v8hi;
12922 break;
12923 case ALTIVEC_BUILTIN_ST_INTERNAL_4si:
12924 icode = CODE_FOR_vector_altivec_store_v4si;
12925 break;
12926 case ALTIVEC_BUILTIN_ST_INTERNAL_4sf:
12927 icode = CODE_FOR_vector_altivec_store_v4sf;
12928 break;
12929 case ALTIVEC_BUILTIN_ST_INTERNAL_2df:
12930 icode = CODE_FOR_vector_altivec_store_v2df;
12931 break;
12932 case ALTIVEC_BUILTIN_ST_INTERNAL_2di:
12933 icode = CODE_FOR_vector_altivec_store_v2di;
12934 case ALTIVEC_BUILTIN_ST_INTERNAL_1ti:
12935 icode = CODE_FOR_vector_altivec_store_v1ti;
12936 break;
12937 default:
12938 *expandedp = false;
12939 return NULL_RTX;
12942 arg0 = CALL_EXPR_ARG (exp, 0);
12943 arg1 = CALL_EXPR_ARG (exp, 1);
12944 op0 = expand_normal (arg0);
12945 op1 = expand_normal (arg1);
12946 mode0 = insn_data[icode].operand[0].mode;
12947 mode1 = insn_data[icode].operand[1].mode;
12949 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
12950 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12951 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
12952 op1 = copy_to_mode_reg (mode1, op1);
12954 pat = GEN_FCN (icode) (op0, op1);
12955 if (pat)
12956 emit_insn (pat);
12958 *expandedp = true;
12959 return NULL_RTX;
12962 /* Expand the dst builtins. */
12963 static rtx
12964 altivec_expand_dst_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
12965 bool *expandedp)
12967 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
12968 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
12969 tree arg0, arg1, arg2;
12970 machine_mode mode0, mode1;
12971 rtx pat, op0, op1, op2;
12972 const struct builtin_description *d;
12973 size_t i;
12975 *expandedp = false;
12977 /* Handle DST variants. */
12978 d = bdesc_dst;
12979 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
12980 if (d->code == fcode)
12982 arg0 = CALL_EXPR_ARG (exp, 0);
12983 arg1 = CALL_EXPR_ARG (exp, 1);
12984 arg2 = CALL_EXPR_ARG (exp, 2);
12985 op0 = expand_normal (arg0);
12986 op1 = expand_normal (arg1);
12987 op2 = expand_normal (arg2);
12988 mode0 = insn_data[d->icode].operand[0].mode;
12989 mode1 = insn_data[d->icode].operand[1].mode;
12991 /* Invalid arguments, bail out before generating bad rtl. */
12992 if (arg0 == error_mark_node
12993 || arg1 == error_mark_node
12994 || arg2 == error_mark_node)
12995 return const0_rtx;
12997 *expandedp = true;
12998 STRIP_NOPS (arg2);
12999 if (TREE_CODE (arg2) != INTEGER_CST
13000 || TREE_INT_CST_LOW (arg2) & ~0x3)
13002 error ("argument to %qs must be a 2-bit unsigned literal", d->name);
13003 return const0_rtx;
13006 if (! (*insn_data[d->icode].operand[0].predicate) (op0, mode0))
13007 op0 = copy_to_mode_reg (Pmode, op0);
13008 if (! (*insn_data[d->icode].operand[1].predicate) (op1, mode1))
13009 op1 = copy_to_mode_reg (mode1, op1);
13011 pat = GEN_FCN (d->icode) (op0, op1, op2);
13012 if (pat != 0)
13013 emit_insn (pat);
13015 return NULL_RTX;
13018 return NULL_RTX;
13021 /* Expand vec_init builtin. */
13022 static rtx
13023 altivec_expand_vec_init_builtin (tree type, tree exp, rtx target)
13025 machine_mode tmode = TYPE_MODE (type);
13026 machine_mode inner_mode = GET_MODE_INNER (tmode);
13027 int i, n_elt = GET_MODE_NUNITS (tmode);
13029 gcc_assert (VECTOR_MODE_P (tmode));
13030 gcc_assert (n_elt == call_expr_nargs (exp));
13032 if (!target || !register_operand (target, tmode))
13033 target = gen_reg_rtx (tmode);
13035 /* If we have a vector compromised of a single element, such as V1TImode, do
13036 the initialization directly. */
13037 if (n_elt == 1 && GET_MODE_SIZE (tmode) == GET_MODE_SIZE (inner_mode))
13039 rtx x = expand_normal (CALL_EXPR_ARG (exp, 0));
13040 emit_move_insn (target, gen_lowpart (tmode, x));
13042 else
13044 rtvec v = rtvec_alloc (n_elt);
13046 for (i = 0; i < n_elt; ++i)
13048 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
13049 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
13052 rs6000_expand_vector_init (target, gen_rtx_PARALLEL (tmode, v));
13055 return target;
13058 /* Return the integer constant in ARG. Constrain it to be in the range
13059 of the subparts of VEC_TYPE; issue an error if not. */
13061 static int
13062 get_element_number (tree vec_type, tree arg)
13064 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
13066 if (!tree_fits_uhwi_p (arg)
13067 || (elt = tree_to_uhwi (arg), elt > max))
13069 error ("selector must be an integer constant in the range 0..%wi", max);
13070 return 0;
13073 return elt;
13076 /* Expand vec_set builtin. */
13077 static rtx
13078 altivec_expand_vec_set_builtin (tree exp)
13080 machine_mode tmode, mode1;
13081 tree arg0, arg1, arg2;
13082 int elt;
13083 rtx op0, op1;
13085 arg0 = CALL_EXPR_ARG (exp, 0);
13086 arg1 = CALL_EXPR_ARG (exp, 1);
13087 arg2 = CALL_EXPR_ARG (exp, 2);
13089 tmode = TYPE_MODE (TREE_TYPE (arg0));
13090 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13091 gcc_assert (VECTOR_MODE_P (tmode));
13093 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
13094 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
13095 elt = get_element_number (TREE_TYPE (arg0), arg2);
13097 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
13098 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
13100 op0 = force_reg (tmode, op0);
13101 op1 = force_reg (mode1, op1);
13103 rs6000_expand_vector_set (op0, op1, elt);
13105 return op0;
13108 /* Expand vec_ext builtin. */
13109 static rtx
13110 altivec_expand_vec_ext_builtin (tree exp, rtx target)
13112 machine_mode tmode, mode0;
13113 tree arg0, arg1;
13114 int elt;
13115 rtx op0;
13117 arg0 = CALL_EXPR_ARG (exp, 0);
13118 arg1 = CALL_EXPR_ARG (exp, 1);
13120 op0 = expand_normal (arg0);
13121 elt = get_element_number (TREE_TYPE (arg0), arg1);
13123 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
13124 mode0 = TYPE_MODE (TREE_TYPE (arg0));
13125 gcc_assert (VECTOR_MODE_P (mode0));
13127 op0 = force_reg (mode0, op0);
13129 if (optimize || !target || !register_operand (target, tmode))
13130 target = gen_reg_rtx (tmode);
13132 rs6000_expand_vector_extract (target, op0, elt);
13134 return target;
13137 /* Expand the builtin in EXP and store the result in TARGET. Store
13138 true in *EXPANDEDP if we found a builtin to expand. */
13139 static rtx
13140 altivec_expand_builtin (tree exp, rtx target, bool *expandedp)
13142 const struct builtin_description *d;
13143 size_t i;
13144 enum insn_code icode;
13145 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13146 tree arg0;
13147 rtx op0, pat;
13148 machine_mode tmode, mode0;
13149 enum rs6000_builtins fcode
13150 = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13152 if (rs6000_overloaded_builtin_p (fcode))
13154 *expandedp = true;
13155 error ("unresolved overload for Altivec builtin %qF", fndecl);
13157 /* Given it is invalid, just generate a normal call. */
13158 return expand_call (exp, target, false);
13161 target = altivec_expand_ld_builtin (exp, target, expandedp);
13162 if (*expandedp)
13163 return target;
13165 target = altivec_expand_st_builtin (exp, target, expandedp);
13166 if (*expandedp)
13167 return target;
13169 target = altivec_expand_dst_builtin (exp, target, expandedp);
13170 if (*expandedp)
13171 return target;
13173 *expandedp = true;
13175 switch (fcode)
13177 case ALTIVEC_BUILTIN_STVX_V2DF:
13178 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2df, exp);
13179 case ALTIVEC_BUILTIN_STVX_V2DI:
13180 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v2di, exp);
13181 case ALTIVEC_BUILTIN_STVX_V4SF:
13182 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4sf, exp);
13183 case ALTIVEC_BUILTIN_STVX:
13184 case ALTIVEC_BUILTIN_STVX_V4SI:
13185 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v4si, exp);
13186 case ALTIVEC_BUILTIN_STVX_V8HI:
13187 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v8hi, exp);
13188 case ALTIVEC_BUILTIN_STVX_V16QI:
13189 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvx_v16qi, exp);
13190 case ALTIVEC_BUILTIN_STVEBX:
13191 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvebx, exp);
13192 case ALTIVEC_BUILTIN_STVEHX:
13193 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvehx, exp);
13194 case ALTIVEC_BUILTIN_STVEWX:
13195 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvewx, exp);
13196 case ALTIVEC_BUILTIN_STVXL_V2DF:
13197 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2df, exp);
13198 case ALTIVEC_BUILTIN_STVXL_V2DI:
13199 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v2di, exp);
13200 case ALTIVEC_BUILTIN_STVXL_V4SF:
13201 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4sf, exp);
13202 case ALTIVEC_BUILTIN_STVXL:
13203 case ALTIVEC_BUILTIN_STVXL_V4SI:
13204 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v4si, exp);
13205 case ALTIVEC_BUILTIN_STVXL_V8HI:
13206 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v8hi, exp);
13207 case ALTIVEC_BUILTIN_STVXL_V16QI:
13208 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvxl_v16qi, exp);
13210 case ALTIVEC_BUILTIN_STVLX:
13211 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlx, exp);
13212 case ALTIVEC_BUILTIN_STVLXL:
13213 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvlxl, exp);
13214 case ALTIVEC_BUILTIN_STVRX:
13215 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrx, exp);
13216 case ALTIVEC_BUILTIN_STVRXL:
13217 return altivec_expand_stv_builtin (CODE_FOR_altivec_stvrxl, exp);
13219 case VSX_BUILTIN_STXVD2X_V1TI:
13220 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v1ti, exp);
13221 case VSX_BUILTIN_STXVD2X_V2DF:
13222 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2df, exp);
13223 case VSX_BUILTIN_STXVD2X_V2DI:
13224 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v2di, exp);
13225 case VSX_BUILTIN_STXVW4X_V4SF:
13226 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4sf, exp);
13227 case VSX_BUILTIN_STXVW4X_V4SI:
13228 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v4si, exp);
13229 case VSX_BUILTIN_STXVW4X_V8HI:
13230 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v8hi, exp);
13231 case VSX_BUILTIN_STXVW4X_V16QI:
13232 return altivec_expand_stv_builtin (CODE_FOR_vsx_store_v16qi, exp);
13234 case ALTIVEC_BUILTIN_MFVSCR:
13235 icode = CODE_FOR_altivec_mfvscr;
13236 tmode = insn_data[icode].operand[0].mode;
13238 if (target == 0
13239 || GET_MODE (target) != tmode
13240 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13241 target = gen_reg_rtx (tmode);
13243 pat = GEN_FCN (icode) (target);
13244 if (! pat)
13245 return 0;
13246 emit_insn (pat);
13247 return target;
13249 case ALTIVEC_BUILTIN_MTVSCR:
13250 icode = CODE_FOR_altivec_mtvscr;
13251 arg0 = CALL_EXPR_ARG (exp, 0);
13252 op0 = expand_normal (arg0);
13253 mode0 = insn_data[icode].operand[0].mode;
13255 /* If we got invalid arguments bail out before generating bad rtl. */
13256 if (arg0 == error_mark_node)
13257 return const0_rtx;
13259 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13260 op0 = copy_to_mode_reg (mode0, op0);
13262 pat = GEN_FCN (icode) (op0);
13263 if (pat)
13264 emit_insn (pat);
13265 return NULL_RTX;
13267 case ALTIVEC_BUILTIN_DSSALL:
13268 emit_insn (gen_altivec_dssall ());
13269 return NULL_RTX;
13271 case ALTIVEC_BUILTIN_DSS:
13272 icode = CODE_FOR_altivec_dss;
13273 arg0 = CALL_EXPR_ARG (exp, 0);
13274 STRIP_NOPS (arg0);
13275 op0 = expand_normal (arg0);
13276 mode0 = insn_data[icode].operand[0].mode;
13278 /* If we got invalid arguments bail out before generating bad rtl. */
13279 if (arg0 == error_mark_node)
13280 return const0_rtx;
13282 if (TREE_CODE (arg0) != INTEGER_CST
13283 || TREE_INT_CST_LOW (arg0) & ~0x3)
13285 error ("argument to dss must be a 2-bit unsigned literal");
13286 return const0_rtx;
13289 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13290 op0 = copy_to_mode_reg (mode0, op0);
13292 emit_insn (gen_altivec_dss (op0));
13293 return NULL_RTX;
13295 case ALTIVEC_BUILTIN_VEC_INIT_V4SI:
13296 case ALTIVEC_BUILTIN_VEC_INIT_V8HI:
13297 case ALTIVEC_BUILTIN_VEC_INIT_V16QI:
13298 case ALTIVEC_BUILTIN_VEC_INIT_V4SF:
13299 case VSX_BUILTIN_VEC_INIT_V2DF:
13300 case VSX_BUILTIN_VEC_INIT_V2DI:
13301 case VSX_BUILTIN_VEC_INIT_V1TI:
13302 return altivec_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
13304 case ALTIVEC_BUILTIN_VEC_SET_V4SI:
13305 case ALTIVEC_BUILTIN_VEC_SET_V8HI:
13306 case ALTIVEC_BUILTIN_VEC_SET_V16QI:
13307 case ALTIVEC_BUILTIN_VEC_SET_V4SF:
13308 case VSX_BUILTIN_VEC_SET_V2DF:
13309 case VSX_BUILTIN_VEC_SET_V2DI:
13310 case VSX_BUILTIN_VEC_SET_V1TI:
13311 return altivec_expand_vec_set_builtin (exp);
13313 case ALTIVEC_BUILTIN_VEC_EXT_V4SI:
13314 case ALTIVEC_BUILTIN_VEC_EXT_V8HI:
13315 case ALTIVEC_BUILTIN_VEC_EXT_V16QI:
13316 case ALTIVEC_BUILTIN_VEC_EXT_V4SF:
13317 case VSX_BUILTIN_VEC_EXT_V2DF:
13318 case VSX_BUILTIN_VEC_EXT_V2DI:
13319 case VSX_BUILTIN_VEC_EXT_V1TI:
13320 return altivec_expand_vec_ext_builtin (exp, target);
13322 default:
13323 break;
13324 /* Fall through. */
13327 /* Expand abs* operations. */
13328 d = bdesc_abs;
13329 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
13330 if (d->code == fcode)
13331 return altivec_expand_abs_builtin (d->icode, exp, target);
13333 /* Expand the AltiVec predicates. */
13334 d = bdesc_altivec_preds;
13335 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
13336 if (d->code == fcode)
13337 return altivec_expand_predicate_builtin (d->icode, exp, target);
13339 /* LV* are funky. We initialized them differently. */
13340 switch (fcode)
13342 case ALTIVEC_BUILTIN_LVSL:
13343 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsl,
13344 exp, target, false);
13345 case ALTIVEC_BUILTIN_LVSR:
13346 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvsr,
13347 exp, target, false);
13348 case ALTIVEC_BUILTIN_LVEBX:
13349 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvebx,
13350 exp, target, false);
13351 case ALTIVEC_BUILTIN_LVEHX:
13352 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvehx,
13353 exp, target, false);
13354 case ALTIVEC_BUILTIN_LVEWX:
13355 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvewx,
13356 exp, target, false);
13357 case ALTIVEC_BUILTIN_LVXL_V2DF:
13358 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2df,
13359 exp, target, false);
13360 case ALTIVEC_BUILTIN_LVXL_V2DI:
13361 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v2di,
13362 exp, target, false);
13363 case ALTIVEC_BUILTIN_LVXL_V4SF:
13364 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4sf,
13365 exp, target, false);
13366 case ALTIVEC_BUILTIN_LVXL:
13367 case ALTIVEC_BUILTIN_LVXL_V4SI:
13368 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v4si,
13369 exp, target, false);
13370 case ALTIVEC_BUILTIN_LVXL_V8HI:
13371 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v8hi,
13372 exp, target, false);
13373 case ALTIVEC_BUILTIN_LVXL_V16QI:
13374 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvxl_v16qi,
13375 exp, target, false);
13376 case ALTIVEC_BUILTIN_LVX_V2DF:
13377 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2df,
13378 exp, target, false);
13379 case ALTIVEC_BUILTIN_LVX_V2DI:
13380 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v2di,
13381 exp, target, false);
13382 case ALTIVEC_BUILTIN_LVX_V4SF:
13383 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4sf,
13384 exp, target, false);
13385 case ALTIVEC_BUILTIN_LVX:
13386 case ALTIVEC_BUILTIN_LVX_V4SI:
13387 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v4si,
13388 exp, target, false);
13389 case ALTIVEC_BUILTIN_LVX_V8HI:
13390 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v8hi,
13391 exp, target, false);
13392 case ALTIVEC_BUILTIN_LVX_V16QI:
13393 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvx_v16qi,
13394 exp, target, false);
13395 case ALTIVEC_BUILTIN_LVLX:
13396 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlx,
13397 exp, target, true);
13398 case ALTIVEC_BUILTIN_LVLXL:
13399 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvlxl,
13400 exp, target, true);
13401 case ALTIVEC_BUILTIN_LVRX:
13402 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrx,
13403 exp, target, true);
13404 case ALTIVEC_BUILTIN_LVRXL:
13405 return altivec_expand_lv_builtin (CODE_FOR_altivec_lvrxl,
13406 exp, target, true);
13407 case VSX_BUILTIN_LXVD2X_V1TI:
13408 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v1ti,
13409 exp, target, false);
13410 case VSX_BUILTIN_LXVD2X_V2DF:
13411 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2df,
13412 exp, target, false);
13413 case VSX_BUILTIN_LXVD2X_V2DI:
13414 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v2di,
13415 exp, target, false);
13416 case VSX_BUILTIN_LXVW4X_V4SF:
13417 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4sf,
13418 exp, target, false);
13419 case VSX_BUILTIN_LXVW4X_V4SI:
13420 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v4si,
13421 exp, target, false);
13422 case VSX_BUILTIN_LXVW4X_V8HI:
13423 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v8hi,
13424 exp, target, false);
13425 case VSX_BUILTIN_LXVW4X_V16QI:
13426 return altivec_expand_lv_builtin (CODE_FOR_vsx_load_v16qi,
13427 exp, target, false);
13428 break;
13429 default:
13430 break;
13431 /* Fall through. */
13434 *expandedp = false;
13435 return NULL_RTX;
13438 /* Expand the builtin in EXP and store the result in TARGET. Store
13439 true in *EXPANDEDP if we found a builtin to expand. */
13440 static rtx
13441 paired_expand_builtin (tree exp, rtx target, bool * expandedp)
13443 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13444 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13445 const struct builtin_description *d;
13446 size_t i;
13448 *expandedp = true;
13450 switch (fcode)
13452 case PAIRED_BUILTIN_STX:
13453 return paired_expand_stv_builtin (CODE_FOR_paired_stx, exp);
13454 case PAIRED_BUILTIN_LX:
13455 return paired_expand_lv_builtin (CODE_FOR_paired_lx, exp, target);
13456 default:
13457 break;
13458 /* Fall through. */
13461 /* Expand the paired predicates. */
13462 d = bdesc_paired_preds;
13463 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); i++, d++)
13464 if (d->code == fcode)
13465 return paired_expand_predicate_builtin (d->icode, exp, target);
13467 *expandedp = false;
13468 return NULL_RTX;
13471 /* Binops that need to be initialized manually, but can be expanded
13472 automagically by rs6000_expand_binop_builtin. */
13473 static const struct builtin_description bdesc_2arg_spe[] =
13475 { RS6000_BTM_SPE, CODE_FOR_spe_evlddx, "__builtin_spe_evlddx", SPE_BUILTIN_EVLDDX },
13476 { RS6000_BTM_SPE, CODE_FOR_spe_evldwx, "__builtin_spe_evldwx", SPE_BUILTIN_EVLDWX },
13477 { RS6000_BTM_SPE, CODE_FOR_spe_evldhx, "__builtin_spe_evldhx", SPE_BUILTIN_EVLDHX },
13478 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhex, "__builtin_spe_evlwhex", SPE_BUILTIN_EVLWHEX },
13479 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhoux, "__builtin_spe_evlwhoux", SPE_BUILTIN_EVLWHOUX },
13480 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhosx, "__builtin_spe_evlwhosx", SPE_BUILTIN_EVLWHOSX },
13481 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplatx, "__builtin_spe_evlwwsplatx", SPE_BUILTIN_EVLWWSPLATX },
13482 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplatx, "__builtin_spe_evlwhsplatx", SPE_BUILTIN_EVLWHSPLATX },
13483 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplatx, "__builtin_spe_evlhhesplatx", SPE_BUILTIN_EVLHHESPLATX },
13484 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplatx, "__builtin_spe_evlhhousplatx", SPE_BUILTIN_EVLHHOUSPLATX },
13485 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplatx, "__builtin_spe_evlhhossplatx", SPE_BUILTIN_EVLHHOSSPLATX },
13486 { RS6000_BTM_SPE, CODE_FOR_spe_evldd, "__builtin_spe_evldd", SPE_BUILTIN_EVLDD },
13487 { RS6000_BTM_SPE, CODE_FOR_spe_evldw, "__builtin_spe_evldw", SPE_BUILTIN_EVLDW },
13488 { RS6000_BTM_SPE, CODE_FOR_spe_evldh, "__builtin_spe_evldh", SPE_BUILTIN_EVLDH },
13489 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhe, "__builtin_spe_evlwhe", SPE_BUILTIN_EVLWHE },
13490 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhou, "__builtin_spe_evlwhou", SPE_BUILTIN_EVLWHOU },
13491 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhos, "__builtin_spe_evlwhos", SPE_BUILTIN_EVLWHOS },
13492 { RS6000_BTM_SPE, CODE_FOR_spe_evlwwsplat, "__builtin_spe_evlwwsplat", SPE_BUILTIN_EVLWWSPLAT },
13493 { RS6000_BTM_SPE, CODE_FOR_spe_evlwhsplat, "__builtin_spe_evlwhsplat", SPE_BUILTIN_EVLWHSPLAT },
13494 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhesplat, "__builtin_spe_evlhhesplat", SPE_BUILTIN_EVLHHESPLAT },
13495 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhousplat, "__builtin_spe_evlhhousplat", SPE_BUILTIN_EVLHHOUSPLAT },
13496 { RS6000_BTM_SPE, CODE_FOR_spe_evlhhossplat, "__builtin_spe_evlhhossplat", SPE_BUILTIN_EVLHHOSSPLAT }
13499 /* Expand the builtin in EXP and store the result in TARGET. Store
13500 true in *EXPANDEDP if we found a builtin to expand.
13502 This expands the SPE builtins that are not simple unary and binary
13503 operations. */
13504 static rtx
13505 spe_expand_builtin (tree exp, rtx target, bool *expandedp)
13507 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13508 tree arg1, arg0;
13509 enum rs6000_builtins fcode = (enum rs6000_builtins) DECL_FUNCTION_CODE (fndecl);
13510 enum insn_code icode;
13511 machine_mode tmode, mode0;
13512 rtx pat, op0;
13513 const struct builtin_description *d;
13514 size_t i;
13516 *expandedp = true;
13518 /* Syntax check for a 5-bit unsigned immediate. */
13519 switch (fcode)
13521 case SPE_BUILTIN_EVSTDD:
13522 case SPE_BUILTIN_EVSTDH:
13523 case SPE_BUILTIN_EVSTDW:
13524 case SPE_BUILTIN_EVSTWHE:
13525 case SPE_BUILTIN_EVSTWHO:
13526 case SPE_BUILTIN_EVSTWWE:
13527 case SPE_BUILTIN_EVSTWWO:
13528 arg1 = CALL_EXPR_ARG (exp, 2);
13529 if (TREE_CODE (arg1) != INTEGER_CST
13530 || TREE_INT_CST_LOW (arg1) & ~0x1f)
13532 error ("argument 2 must be a 5-bit unsigned literal");
13533 return const0_rtx;
13535 break;
13536 default:
13537 break;
13540 /* The evsplat*i instructions are not quite generic. */
13541 switch (fcode)
13543 case SPE_BUILTIN_EVSPLATFI:
13544 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplatfi,
13545 exp, target);
13546 case SPE_BUILTIN_EVSPLATI:
13547 return rs6000_expand_unop_builtin (CODE_FOR_spe_evsplati,
13548 exp, target);
13549 default:
13550 break;
13553 d = bdesc_2arg_spe;
13554 for (i = 0; i < ARRAY_SIZE (bdesc_2arg_spe); ++i, ++d)
13555 if (d->code == fcode)
13556 return rs6000_expand_binop_builtin (d->icode, exp, target);
13558 d = bdesc_spe_predicates;
13559 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, ++d)
13560 if (d->code == fcode)
13561 return spe_expand_predicate_builtin (d->icode, exp, target);
13563 d = bdesc_spe_evsel;
13564 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, ++d)
13565 if (d->code == fcode)
13566 return spe_expand_evsel_builtin (d->icode, exp, target);
13568 switch (fcode)
13570 case SPE_BUILTIN_EVSTDDX:
13571 return spe_expand_stv_builtin (CODE_FOR_spe_evstddx, exp);
13572 case SPE_BUILTIN_EVSTDHX:
13573 return spe_expand_stv_builtin (CODE_FOR_spe_evstdhx, exp);
13574 case SPE_BUILTIN_EVSTDWX:
13575 return spe_expand_stv_builtin (CODE_FOR_spe_evstdwx, exp);
13576 case SPE_BUILTIN_EVSTWHEX:
13577 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhex, exp);
13578 case SPE_BUILTIN_EVSTWHOX:
13579 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhox, exp);
13580 case SPE_BUILTIN_EVSTWWEX:
13581 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwex, exp);
13582 case SPE_BUILTIN_EVSTWWOX:
13583 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwox, exp);
13584 case SPE_BUILTIN_EVSTDD:
13585 return spe_expand_stv_builtin (CODE_FOR_spe_evstdd, exp);
13586 case SPE_BUILTIN_EVSTDH:
13587 return spe_expand_stv_builtin (CODE_FOR_spe_evstdh, exp);
13588 case SPE_BUILTIN_EVSTDW:
13589 return spe_expand_stv_builtin (CODE_FOR_spe_evstdw, exp);
13590 case SPE_BUILTIN_EVSTWHE:
13591 return spe_expand_stv_builtin (CODE_FOR_spe_evstwhe, exp);
13592 case SPE_BUILTIN_EVSTWHO:
13593 return spe_expand_stv_builtin (CODE_FOR_spe_evstwho, exp);
13594 case SPE_BUILTIN_EVSTWWE:
13595 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwe, exp);
13596 case SPE_BUILTIN_EVSTWWO:
13597 return spe_expand_stv_builtin (CODE_FOR_spe_evstwwo, exp);
13598 case SPE_BUILTIN_MFSPEFSCR:
13599 icode = CODE_FOR_spe_mfspefscr;
13600 tmode = insn_data[icode].operand[0].mode;
13602 if (target == 0
13603 || GET_MODE (target) != tmode
13604 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
13605 target = gen_reg_rtx (tmode);
13607 pat = GEN_FCN (icode) (target);
13608 if (! pat)
13609 return 0;
13610 emit_insn (pat);
13611 return target;
13612 case SPE_BUILTIN_MTSPEFSCR:
13613 icode = CODE_FOR_spe_mtspefscr;
13614 arg0 = CALL_EXPR_ARG (exp, 0);
13615 op0 = expand_normal (arg0);
13616 mode0 = insn_data[icode].operand[0].mode;
13618 if (arg0 == error_mark_node)
13619 return const0_rtx;
13621 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
13622 op0 = copy_to_mode_reg (mode0, op0);
13624 pat = GEN_FCN (icode) (op0);
13625 if (pat)
13626 emit_insn (pat);
13627 return NULL_RTX;
13628 default:
13629 break;
13632 *expandedp = false;
13633 return NULL_RTX;
13636 static rtx
13637 paired_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13639 rtx pat, scratch, tmp;
13640 tree form = CALL_EXPR_ARG (exp, 0);
13641 tree arg0 = CALL_EXPR_ARG (exp, 1);
13642 tree arg1 = CALL_EXPR_ARG (exp, 2);
13643 rtx op0 = expand_normal (arg0);
13644 rtx op1 = expand_normal (arg1);
13645 machine_mode mode0 = insn_data[icode].operand[1].mode;
13646 machine_mode mode1 = insn_data[icode].operand[2].mode;
13647 int form_int;
13648 enum rtx_code code;
13650 if (TREE_CODE (form) != INTEGER_CST)
13652 error ("argument 1 of __builtin_paired_predicate must be a constant");
13653 return const0_rtx;
13655 else
13656 form_int = TREE_INT_CST_LOW (form);
13658 gcc_assert (mode0 == mode1);
13660 if (arg0 == error_mark_node || arg1 == error_mark_node)
13661 return const0_rtx;
13663 if (target == 0
13664 || GET_MODE (target) != SImode
13665 || !(*insn_data[icode].operand[0].predicate) (target, SImode))
13666 target = gen_reg_rtx (SImode);
13667 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
13668 op0 = copy_to_mode_reg (mode0, op0);
13669 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
13670 op1 = copy_to_mode_reg (mode1, op1);
13672 scratch = gen_reg_rtx (CCFPmode);
13674 pat = GEN_FCN (icode) (scratch, op0, op1);
13675 if (!pat)
13676 return const0_rtx;
13678 emit_insn (pat);
13680 switch (form_int)
13682 /* LT bit. */
13683 case 0:
13684 code = LT;
13685 break;
13686 /* GT bit. */
13687 case 1:
13688 code = GT;
13689 break;
13690 /* EQ bit. */
13691 case 2:
13692 code = EQ;
13693 break;
13694 /* UN bit. */
13695 case 3:
13696 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13697 return target;
13698 default:
13699 error ("argument 1 of __builtin_paired_predicate is out of range");
13700 return const0_rtx;
13703 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13704 emit_move_insn (target, tmp);
13705 return target;
13708 static rtx
13709 spe_expand_predicate_builtin (enum insn_code icode, tree exp, rtx target)
13711 rtx pat, scratch, tmp;
13712 tree form = CALL_EXPR_ARG (exp, 0);
13713 tree arg0 = CALL_EXPR_ARG (exp, 1);
13714 tree arg1 = CALL_EXPR_ARG (exp, 2);
13715 rtx op0 = expand_normal (arg0);
13716 rtx op1 = expand_normal (arg1);
13717 machine_mode mode0 = insn_data[icode].operand[1].mode;
13718 machine_mode mode1 = insn_data[icode].operand[2].mode;
13719 int form_int;
13720 enum rtx_code code;
13722 if (TREE_CODE (form) != INTEGER_CST)
13724 error ("argument 1 of __builtin_spe_predicate must be a constant");
13725 return const0_rtx;
13727 else
13728 form_int = TREE_INT_CST_LOW (form);
13730 gcc_assert (mode0 == mode1);
13732 if (arg0 == error_mark_node || arg1 == error_mark_node)
13733 return const0_rtx;
13735 if (target == 0
13736 || GET_MODE (target) != SImode
13737 || ! (*insn_data[icode].operand[0].predicate) (target, SImode))
13738 target = gen_reg_rtx (SImode);
13740 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13741 op0 = copy_to_mode_reg (mode0, op0);
13742 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
13743 op1 = copy_to_mode_reg (mode1, op1);
13745 scratch = gen_reg_rtx (CCmode);
13747 pat = GEN_FCN (icode) (scratch, op0, op1);
13748 if (! pat)
13749 return const0_rtx;
13750 emit_insn (pat);
13752 /* There are 4 variants for each predicate: _any_, _all_, _upper_,
13753 _lower_. We use one compare, but look in different bits of the
13754 CR for each variant.
13756 There are 2 elements in each SPE simd type (upper/lower). The CR
13757 bits are set as follows:
13759 BIT0 | BIT 1 | BIT 2 | BIT 3
13760 U | L | (U | L) | (U & L)
13762 So, for an "all" relationship, BIT 3 would be set.
13763 For an "any" relationship, BIT 2 would be set. Etc.
13765 Following traditional nomenclature, these bits map to:
13767 BIT0 | BIT 1 | BIT 2 | BIT 3
13768 LT | GT | EQ | OV
13770 Later, we will generate rtl to look in the LT/EQ/EQ/OV bits.
13773 switch (form_int)
13775 /* All variant. OV bit. */
13776 case 0:
13777 /* We need to get to the OV bit, which is the ORDERED bit. We
13778 could generate (ordered:SI (reg:CC xx) (const_int 0)), but
13779 that's ugly and will make validate_condition_mode die.
13780 So let's just use another pattern. */
13781 emit_insn (gen_move_from_CR_ov_bit (target, scratch));
13782 return target;
13783 /* Any variant. EQ bit. */
13784 case 1:
13785 code = EQ;
13786 break;
13787 /* Upper variant. LT bit. */
13788 case 2:
13789 code = LT;
13790 break;
13791 /* Lower variant. GT bit. */
13792 case 3:
13793 code = GT;
13794 break;
13795 default:
13796 error ("argument 1 of __builtin_spe_predicate is out of range");
13797 return const0_rtx;
13800 tmp = gen_rtx_fmt_ee (code, SImode, scratch, const0_rtx);
13801 emit_move_insn (target, tmp);
13803 return target;
13806 /* The evsel builtins look like this:
13808 e = __builtin_spe_evsel_OP (a, b, c, d);
13810 and work like this:
13812 e[upper] = a[upper] *OP* b[upper] ? c[upper] : d[upper];
13813 e[lower] = a[lower] *OP* b[lower] ? c[lower] : d[lower];
13816 static rtx
13817 spe_expand_evsel_builtin (enum insn_code icode, tree exp, rtx target)
13819 rtx pat, scratch;
13820 tree arg0 = CALL_EXPR_ARG (exp, 0);
13821 tree arg1 = CALL_EXPR_ARG (exp, 1);
13822 tree arg2 = CALL_EXPR_ARG (exp, 2);
13823 tree arg3 = CALL_EXPR_ARG (exp, 3);
13824 rtx op0 = expand_normal (arg0);
13825 rtx op1 = expand_normal (arg1);
13826 rtx op2 = expand_normal (arg2);
13827 rtx op3 = expand_normal (arg3);
13828 machine_mode mode0 = insn_data[icode].operand[1].mode;
13829 machine_mode mode1 = insn_data[icode].operand[2].mode;
13831 gcc_assert (mode0 == mode1);
13833 if (arg0 == error_mark_node || arg1 == error_mark_node
13834 || arg2 == error_mark_node || arg3 == error_mark_node)
13835 return const0_rtx;
13837 if (target == 0
13838 || GET_MODE (target) != mode0
13839 || ! (*insn_data[icode].operand[0].predicate) (target, mode0))
13840 target = gen_reg_rtx (mode0);
13842 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
13843 op0 = copy_to_mode_reg (mode0, op0);
13844 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
13845 op1 = copy_to_mode_reg (mode0, op1);
13846 if (! (*insn_data[icode].operand[1].predicate) (op2, mode1))
13847 op2 = copy_to_mode_reg (mode0, op2);
13848 if (! (*insn_data[icode].operand[1].predicate) (op3, mode1))
13849 op3 = copy_to_mode_reg (mode0, op3);
13851 /* Generate the compare. */
13852 scratch = gen_reg_rtx (CCmode);
13853 pat = GEN_FCN (icode) (scratch, op0, op1);
13854 if (! pat)
13855 return const0_rtx;
13856 emit_insn (pat);
13858 if (mode0 == V2SImode)
13859 emit_insn (gen_spe_evsel (target, op2, op3, scratch));
13860 else
13861 emit_insn (gen_spe_evsel_fs (target, op2, op3, scratch));
13863 return target;
13866 /* Raise an error message for a builtin function that is called without the
13867 appropriate target options being set. */
13869 static void
13870 rs6000_invalid_builtin (enum rs6000_builtins fncode)
13872 size_t uns_fncode = (size_t)fncode;
13873 const char *name = rs6000_builtin_info[uns_fncode].name;
13874 HOST_WIDE_INT fnmask = rs6000_builtin_info[uns_fncode].mask;
13876 gcc_assert (name != NULL);
13877 if ((fnmask & RS6000_BTM_CELL) != 0)
13878 error ("Builtin function %s is only valid for the cell processor", name);
13879 else if ((fnmask & RS6000_BTM_VSX) != 0)
13880 error ("Builtin function %s requires the -mvsx option", name);
13881 else if ((fnmask & RS6000_BTM_HTM) != 0)
13882 error ("Builtin function %s requires the -mhtm option", name);
13883 else if ((fnmask & RS6000_BTM_ALTIVEC) != 0)
13884 error ("Builtin function %s requires the -maltivec option", name);
13885 else if ((fnmask & RS6000_BTM_PAIRED) != 0)
13886 error ("Builtin function %s requires the -mpaired option", name);
13887 else if ((fnmask & RS6000_BTM_SPE) != 0)
13888 error ("Builtin function %s requires the -mspe option", name);
13889 else if ((fnmask & (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13890 == (RS6000_BTM_DFP | RS6000_BTM_P8_VECTOR))
13891 error ("Builtin function %s requires the -mhard-dfp and"
13892 " -mpower8-vector options", name);
13893 else if ((fnmask & RS6000_BTM_DFP) != 0)
13894 error ("Builtin function %s requires the -mhard-dfp option", name);
13895 else if ((fnmask & RS6000_BTM_P8_VECTOR) != 0)
13896 error ("Builtin function %s requires the -mpower8-vector option", name);
13897 else if ((fnmask & (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13898 == (RS6000_BTM_HARD_FLOAT | RS6000_BTM_LDBL128))
13899 error ("Builtin function %s requires the -mhard-float and"
13900 " -mlong-double-128 options", name);
13901 else if ((fnmask & RS6000_BTM_HARD_FLOAT) != 0)
13902 error ("Builtin function %s requires the -mhard-float option", name);
13903 else
13904 error ("Builtin function %s is not supported with the current options",
13905 name);
13908 /* Expand an expression EXP that calls a built-in function,
13909 with result going to TARGET if that's convenient
13910 (and in mode MODE if that's convenient).
13911 SUBTARGET may be used as the target for computing one of EXP's operands.
13912 IGNORE is nonzero if the value is to be ignored. */
13914 static rtx
13915 rs6000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
13916 machine_mode mode ATTRIBUTE_UNUSED,
13917 int ignore ATTRIBUTE_UNUSED)
13919 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
13920 enum rs6000_builtins fcode
13921 = (enum rs6000_builtins)DECL_FUNCTION_CODE (fndecl);
13922 size_t uns_fcode = (size_t)fcode;
13923 const struct builtin_description *d;
13924 size_t i;
13925 rtx ret;
13926 bool success;
13927 HOST_WIDE_INT mask = rs6000_builtin_info[uns_fcode].mask;
13928 bool func_valid_p = ((rs6000_builtin_mask & mask) == mask);
13930 if (TARGET_DEBUG_BUILTIN)
13932 enum insn_code icode = rs6000_builtin_info[uns_fcode].icode;
13933 const char *name1 = rs6000_builtin_info[uns_fcode].name;
13934 const char *name2 = ((icode != CODE_FOR_nothing)
13935 ? get_insn_name ((int)icode)
13936 : "nothing");
13937 const char *name3;
13939 switch (rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK)
13941 default: name3 = "unknown"; break;
13942 case RS6000_BTC_SPECIAL: name3 = "special"; break;
13943 case RS6000_BTC_UNARY: name3 = "unary"; break;
13944 case RS6000_BTC_BINARY: name3 = "binary"; break;
13945 case RS6000_BTC_TERNARY: name3 = "ternary"; break;
13946 case RS6000_BTC_PREDICATE: name3 = "predicate"; break;
13947 case RS6000_BTC_ABS: name3 = "abs"; break;
13948 case RS6000_BTC_EVSEL: name3 = "evsel"; break;
13949 case RS6000_BTC_DST: name3 = "dst"; break;
13953 fprintf (stderr,
13954 "rs6000_expand_builtin, %s (%d), insn = %s (%d), type=%s%s\n",
13955 (name1) ? name1 : "---", fcode,
13956 (name2) ? name2 : "---", (int)icode,
13957 name3,
13958 func_valid_p ? "" : ", not valid");
13961 if (!func_valid_p)
13963 rs6000_invalid_builtin (fcode);
13965 /* Given it is invalid, just generate a normal call. */
13966 return expand_call (exp, target, ignore);
13969 switch (fcode)
13971 case RS6000_BUILTIN_RECIP:
13972 return rs6000_expand_binop_builtin (CODE_FOR_recipdf3, exp, target);
13974 case RS6000_BUILTIN_RECIPF:
13975 return rs6000_expand_binop_builtin (CODE_FOR_recipsf3, exp, target);
13977 case RS6000_BUILTIN_RSQRTF:
13978 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtsf2, exp, target);
13980 case RS6000_BUILTIN_RSQRT:
13981 return rs6000_expand_unop_builtin (CODE_FOR_rsqrtdf2, exp, target);
13983 case POWER7_BUILTIN_BPERMD:
13984 return rs6000_expand_binop_builtin (((TARGET_64BIT)
13985 ? CODE_FOR_bpermd_di
13986 : CODE_FOR_bpermd_si), exp, target);
13988 case RS6000_BUILTIN_GET_TB:
13989 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_get_timebase,
13990 target);
13992 case RS6000_BUILTIN_MFTB:
13993 return rs6000_expand_zeroop_builtin (((TARGET_64BIT)
13994 ? CODE_FOR_rs6000_mftb_di
13995 : CODE_FOR_rs6000_mftb_si),
13996 target);
13998 case RS6000_BUILTIN_MFFS:
13999 return rs6000_expand_zeroop_builtin (CODE_FOR_rs6000_mffs, target);
14001 case RS6000_BUILTIN_MTFSF:
14002 return rs6000_expand_mtfsf_builtin (CODE_FOR_rs6000_mtfsf, exp);
14004 case ALTIVEC_BUILTIN_MASK_FOR_LOAD:
14005 case ALTIVEC_BUILTIN_MASK_FOR_STORE:
14007 int icode = (BYTES_BIG_ENDIAN ? (int) CODE_FOR_altivec_lvsr_direct
14008 : (int) CODE_FOR_altivec_lvsl_direct);
14009 machine_mode tmode = insn_data[icode].operand[0].mode;
14010 machine_mode mode = insn_data[icode].operand[1].mode;
14011 tree arg;
14012 rtx op, addr, pat;
14014 gcc_assert (TARGET_ALTIVEC);
14016 arg = CALL_EXPR_ARG (exp, 0);
14017 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
14018 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
14019 addr = memory_address (mode, op);
14020 if (fcode == ALTIVEC_BUILTIN_MASK_FOR_STORE)
14021 op = addr;
14022 else
14024 /* For the load case need to negate the address. */
14025 op = gen_reg_rtx (GET_MODE (addr));
14026 emit_insn (gen_rtx_SET (VOIDmode, op,
14027 gen_rtx_NEG (GET_MODE (addr), addr)));
14029 op = gen_rtx_MEM (mode, op);
14031 if (target == 0
14032 || GET_MODE (target) != tmode
14033 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
14034 target = gen_reg_rtx (tmode);
14036 pat = GEN_FCN (icode) (target, op);
14037 if (!pat)
14038 return 0;
14039 emit_insn (pat);
14041 return target;
14044 case ALTIVEC_BUILTIN_VCFUX:
14045 case ALTIVEC_BUILTIN_VCFSX:
14046 case ALTIVEC_BUILTIN_VCTUXS:
14047 case ALTIVEC_BUILTIN_VCTSXS:
14048 /* FIXME: There's got to be a nicer way to handle this case than
14049 constructing a new CALL_EXPR. */
14050 if (call_expr_nargs (exp) == 1)
14052 exp = build_call_nary (TREE_TYPE (exp), CALL_EXPR_FN (exp),
14053 2, CALL_EXPR_ARG (exp, 0), integer_zero_node);
14055 break;
14057 default:
14058 break;
14061 if (TARGET_ALTIVEC)
14063 ret = altivec_expand_builtin (exp, target, &success);
14065 if (success)
14066 return ret;
14068 if (TARGET_SPE)
14070 ret = spe_expand_builtin (exp, target, &success);
14072 if (success)
14073 return ret;
14075 if (TARGET_PAIRED_FLOAT)
14077 ret = paired_expand_builtin (exp, target, &success);
14079 if (success)
14080 return ret;
14082 if (TARGET_HTM)
14084 ret = htm_expand_builtin (exp, target, &success);
14086 if (success)
14087 return ret;
14090 unsigned attr = rs6000_builtin_info[uns_fcode].attr & RS6000_BTC_TYPE_MASK;
14091 gcc_assert (attr == RS6000_BTC_UNARY
14092 || attr == RS6000_BTC_BINARY
14093 || attr == RS6000_BTC_TERNARY);
14095 /* Handle simple unary operations. */
14096 d = bdesc_1arg;
14097 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
14098 if (d->code == fcode)
14099 return rs6000_expand_unop_builtin (d->icode, exp, target);
14101 /* Handle simple binary operations. */
14102 d = bdesc_2arg;
14103 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
14104 if (d->code == fcode)
14105 return rs6000_expand_binop_builtin (d->icode, exp, target);
14107 /* Handle simple ternary operations. */
14108 d = bdesc_3arg;
14109 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
14110 if (d->code == fcode)
14111 return rs6000_expand_ternop_builtin (d->icode, exp, target);
14113 gcc_unreachable ();
14116 static void
14117 rs6000_init_builtins (void)
14119 tree tdecl;
14120 tree ftype;
14121 machine_mode mode;
14123 if (TARGET_DEBUG_BUILTIN)
14124 fprintf (stderr, "rs6000_init_builtins%s%s%s%s\n",
14125 (TARGET_PAIRED_FLOAT) ? ", paired" : "",
14126 (TARGET_SPE) ? ", spe" : "",
14127 (TARGET_ALTIVEC) ? ", altivec" : "",
14128 (TARGET_VSX) ? ", vsx" : "");
14130 V2SI_type_node = build_vector_type (intSI_type_node, 2);
14131 V2SF_type_node = build_vector_type (float_type_node, 2);
14132 V2DI_type_node = build_vector_type (intDI_type_node, 2);
14133 V2DF_type_node = build_vector_type (double_type_node, 2);
14134 V4HI_type_node = build_vector_type (intHI_type_node, 4);
14135 V4SI_type_node = build_vector_type (intSI_type_node, 4);
14136 V4SF_type_node = build_vector_type (float_type_node, 4);
14137 V8HI_type_node = build_vector_type (intHI_type_node, 8);
14138 V16QI_type_node = build_vector_type (intQI_type_node, 16);
14140 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
14141 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
14142 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
14143 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
14145 opaque_V2SF_type_node = build_opaque_vector_type (float_type_node, 2);
14146 opaque_V2SI_type_node = build_opaque_vector_type (intSI_type_node, 2);
14147 opaque_p_V2SI_type_node = build_pointer_type (opaque_V2SI_type_node);
14148 opaque_V4SI_type_node = build_opaque_vector_type (intSI_type_node, 4);
14150 /* We use V1TI mode as a special container to hold __int128_t items that
14151 must live in VSX registers. */
14152 if (intTI_type_node)
14154 V1TI_type_node = build_vector_type (intTI_type_node, 1);
14155 unsigned_V1TI_type_node = build_vector_type (unsigned_intTI_type_node, 1);
14158 /* The 'vector bool ...' types must be kept distinct from 'vector unsigned ...'
14159 types, especially in C++ land. Similarly, 'vector pixel' is distinct from
14160 'vector unsigned short'. */
14162 bool_char_type_node = build_distinct_type_copy (unsigned_intQI_type_node);
14163 bool_short_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14164 bool_int_type_node = build_distinct_type_copy (unsigned_intSI_type_node);
14165 bool_long_type_node = build_distinct_type_copy (unsigned_intDI_type_node);
14166 pixel_type_node = build_distinct_type_copy (unsigned_intHI_type_node);
14168 long_integer_type_internal_node = long_integer_type_node;
14169 long_unsigned_type_internal_node = long_unsigned_type_node;
14170 long_long_integer_type_internal_node = long_long_integer_type_node;
14171 long_long_unsigned_type_internal_node = long_long_unsigned_type_node;
14172 intQI_type_internal_node = intQI_type_node;
14173 uintQI_type_internal_node = unsigned_intQI_type_node;
14174 intHI_type_internal_node = intHI_type_node;
14175 uintHI_type_internal_node = unsigned_intHI_type_node;
14176 intSI_type_internal_node = intSI_type_node;
14177 uintSI_type_internal_node = unsigned_intSI_type_node;
14178 intDI_type_internal_node = intDI_type_node;
14179 uintDI_type_internal_node = unsigned_intDI_type_node;
14180 intTI_type_internal_node = intTI_type_node;
14181 uintTI_type_internal_node = unsigned_intTI_type_node;
14182 float_type_internal_node = float_type_node;
14183 double_type_internal_node = double_type_node;
14184 long_double_type_internal_node = long_double_type_node;
14185 dfloat64_type_internal_node = dfloat64_type_node;
14186 dfloat128_type_internal_node = dfloat128_type_node;
14187 void_type_internal_node = void_type_node;
14189 /* Initialize the modes for builtin_function_type, mapping a machine mode to
14190 tree type node. */
14191 builtin_mode_to_type[QImode][0] = integer_type_node;
14192 builtin_mode_to_type[HImode][0] = integer_type_node;
14193 builtin_mode_to_type[SImode][0] = intSI_type_node;
14194 builtin_mode_to_type[SImode][1] = unsigned_intSI_type_node;
14195 builtin_mode_to_type[DImode][0] = intDI_type_node;
14196 builtin_mode_to_type[DImode][1] = unsigned_intDI_type_node;
14197 builtin_mode_to_type[TImode][0] = intTI_type_node;
14198 builtin_mode_to_type[TImode][1] = unsigned_intTI_type_node;
14199 builtin_mode_to_type[SFmode][0] = float_type_node;
14200 builtin_mode_to_type[DFmode][0] = double_type_node;
14201 builtin_mode_to_type[TFmode][0] = long_double_type_node;
14202 builtin_mode_to_type[DDmode][0] = dfloat64_type_node;
14203 builtin_mode_to_type[TDmode][0] = dfloat128_type_node;
14204 builtin_mode_to_type[V1TImode][0] = V1TI_type_node;
14205 builtin_mode_to_type[V1TImode][1] = unsigned_V1TI_type_node;
14206 builtin_mode_to_type[V2SImode][0] = V2SI_type_node;
14207 builtin_mode_to_type[V2SFmode][0] = V2SF_type_node;
14208 builtin_mode_to_type[V2DImode][0] = V2DI_type_node;
14209 builtin_mode_to_type[V2DImode][1] = unsigned_V2DI_type_node;
14210 builtin_mode_to_type[V2DFmode][0] = V2DF_type_node;
14211 builtin_mode_to_type[V4HImode][0] = V4HI_type_node;
14212 builtin_mode_to_type[V4SImode][0] = V4SI_type_node;
14213 builtin_mode_to_type[V4SImode][1] = unsigned_V4SI_type_node;
14214 builtin_mode_to_type[V4SFmode][0] = V4SF_type_node;
14215 builtin_mode_to_type[V8HImode][0] = V8HI_type_node;
14216 builtin_mode_to_type[V8HImode][1] = unsigned_V8HI_type_node;
14217 builtin_mode_to_type[V16QImode][0] = V16QI_type_node;
14218 builtin_mode_to_type[V16QImode][1] = unsigned_V16QI_type_node;
14220 tdecl = add_builtin_type ("__bool char", bool_char_type_node);
14221 TYPE_NAME (bool_char_type_node) = tdecl;
14223 tdecl = add_builtin_type ("__bool short", bool_short_type_node);
14224 TYPE_NAME (bool_short_type_node) = tdecl;
14226 tdecl = add_builtin_type ("__bool int", bool_int_type_node);
14227 TYPE_NAME (bool_int_type_node) = tdecl;
14229 tdecl = add_builtin_type ("__pixel", pixel_type_node);
14230 TYPE_NAME (pixel_type_node) = tdecl;
14232 bool_V16QI_type_node = build_vector_type (bool_char_type_node, 16);
14233 bool_V8HI_type_node = build_vector_type (bool_short_type_node, 8);
14234 bool_V4SI_type_node = build_vector_type (bool_int_type_node, 4);
14235 bool_V2DI_type_node = build_vector_type (bool_long_type_node, 2);
14236 pixel_V8HI_type_node = build_vector_type (pixel_type_node, 8);
14238 tdecl = add_builtin_type ("__vector unsigned char", unsigned_V16QI_type_node);
14239 TYPE_NAME (unsigned_V16QI_type_node) = tdecl;
14241 tdecl = add_builtin_type ("__vector signed char", V16QI_type_node);
14242 TYPE_NAME (V16QI_type_node) = tdecl;
14244 tdecl = add_builtin_type ("__vector __bool char", bool_V16QI_type_node);
14245 TYPE_NAME ( bool_V16QI_type_node) = tdecl;
14247 tdecl = add_builtin_type ("__vector unsigned short", unsigned_V8HI_type_node);
14248 TYPE_NAME (unsigned_V8HI_type_node) = tdecl;
14250 tdecl = add_builtin_type ("__vector signed short", V8HI_type_node);
14251 TYPE_NAME (V8HI_type_node) = tdecl;
14253 tdecl = add_builtin_type ("__vector __bool short", bool_V8HI_type_node);
14254 TYPE_NAME (bool_V8HI_type_node) = tdecl;
14256 tdecl = add_builtin_type ("__vector unsigned int", unsigned_V4SI_type_node);
14257 TYPE_NAME (unsigned_V4SI_type_node) = tdecl;
14259 tdecl = add_builtin_type ("__vector signed int", V4SI_type_node);
14260 TYPE_NAME (V4SI_type_node) = tdecl;
14262 tdecl = add_builtin_type ("__vector __bool int", bool_V4SI_type_node);
14263 TYPE_NAME (bool_V4SI_type_node) = tdecl;
14265 tdecl = add_builtin_type ("__vector float", V4SF_type_node);
14266 TYPE_NAME (V4SF_type_node) = tdecl;
14268 tdecl = add_builtin_type ("__vector __pixel", pixel_V8HI_type_node);
14269 TYPE_NAME (pixel_V8HI_type_node) = tdecl;
14271 tdecl = add_builtin_type ("__vector double", V2DF_type_node);
14272 TYPE_NAME (V2DF_type_node) = tdecl;
14274 if (TARGET_POWERPC64)
14276 tdecl = add_builtin_type ("__vector long", V2DI_type_node);
14277 TYPE_NAME (V2DI_type_node) = tdecl;
14279 tdecl = add_builtin_type ("__vector unsigned long",
14280 unsigned_V2DI_type_node);
14281 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14283 tdecl = add_builtin_type ("__vector __bool long", bool_V2DI_type_node);
14284 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14286 else
14288 tdecl = add_builtin_type ("__vector long long", V2DI_type_node);
14289 TYPE_NAME (V2DI_type_node) = tdecl;
14291 tdecl = add_builtin_type ("__vector unsigned long long",
14292 unsigned_V2DI_type_node);
14293 TYPE_NAME (unsigned_V2DI_type_node) = tdecl;
14295 tdecl = add_builtin_type ("__vector __bool long long",
14296 bool_V2DI_type_node);
14297 TYPE_NAME (bool_V2DI_type_node) = tdecl;
14300 if (V1TI_type_node)
14302 tdecl = add_builtin_type ("__vector __int128", V1TI_type_node);
14303 TYPE_NAME (V1TI_type_node) = tdecl;
14305 tdecl = add_builtin_type ("__vector unsigned __int128",
14306 unsigned_V1TI_type_node);
14307 TYPE_NAME (unsigned_V1TI_type_node) = tdecl;
14310 /* Paired and SPE builtins are only available if you build a compiler with
14311 the appropriate options, so only create those builtins with the
14312 appropriate compiler option. Create Altivec and VSX builtins on machines
14313 with at least the general purpose extensions (970 and newer) to allow the
14314 use of the target attribute. */
14315 if (TARGET_PAIRED_FLOAT)
14316 paired_init_builtins ();
14317 if (TARGET_SPE)
14318 spe_init_builtins ();
14319 if (TARGET_EXTRA_BUILTINS)
14320 altivec_init_builtins ();
14321 if (TARGET_HTM)
14322 htm_init_builtins ();
14324 if (TARGET_EXTRA_BUILTINS || TARGET_SPE || TARGET_PAIRED_FLOAT)
14325 rs6000_common_init_builtins ();
14327 ftype = builtin_function_type (DFmode, DFmode, DFmode, VOIDmode,
14328 RS6000_BUILTIN_RECIP, "__builtin_recipdiv");
14329 def_builtin ("__builtin_recipdiv", ftype, RS6000_BUILTIN_RECIP);
14331 ftype = builtin_function_type (SFmode, SFmode, SFmode, VOIDmode,
14332 RS6000_BUILTIN_RECIPF, "__builtin_recipdivf");
14333 def_builtin ("__builtin_recipdivf", ftype, RS6000_BUILTIN_RECIPF);
14335 ftype = builtin_function_type (DFmode, DFmode, VOIDmode, VOIDmode,
14336 RS6000_BUILTIN_RSQRT, "__builtin_rsqrt");
14337 def_builtin ("__builtin_rsqrt", ftype, RS6000_BUILTIN_RSQRT);
14339 ftype = builtin_function_type (SFmode, SFmode, VOIDmode, VOIDmode,
14340 RS6000_BUILTIN_RSQRTF, "__builtin_rsqrtf");
14341 def_builtin ("__builtin_rsqrtf", ftype, RS6000_BUILTIN_RSQRTF);
14343 mode = (TARGET_64BIT) ? DImode : SImode;
14344 ftype = builtin_function_type (mode, mode, mode, VOIDmode,
14345 POWER7_BUILTIN_BPERMD, "__builtin_bpermd");
14346 def_builtin ("__builtin_bpermd", ftype, POWER7_BUILTIN_BPERMD);
14348 ftype = build_function_type_list (unsigned_intDI_type_node,
14349 NULL_TREE);
14350 def_builtin ("__builtin_ppc_get_timebase", ftype, RS6000_BUILTIN_GET_TB);
14352 if (TARGET_64BIT)
14353 ftype = build_function_type_list (unsigned_intDI_type_node,
14354 NULL_TREE);
14355 else
14356 ftype = build_function_type_list (unsigned_intSI_type_node,
14357 NULL_TREE);
14358 def_builtin ("__builtin_ppc_mftb", ftype, RS6000_BUILTIN_MFTB);
14360 ftype = build_function_type_list (double_type_node, NULL_TREE);
14361 def_builtin ("__builtin_mffs", ftype, RS6000_BUILTIN_MFFS);
14363 ftype = build_function_type_list (void_type_node,
14364 intSI_type_node, double_type_node,
14365 NULL_TREE);
14366 def_builtin ("__builtin_mtfsf", ftype, RS6000_BUILTIN_MTFSF);
14368 #if TARGET_XCOFF
14369 /* AIX libm provides clog as __clog. */
14370 if ((tdecl = builtin_decl_explicit (BUILT_IN_CLOG)) != NULL_TREE)
14371 set_user_assembler_name (tdecl, "__clog");
14372 #endif
14374 #ifdef SUBTARGET_INIT_BUILTINS
14375 SUBTARGET_INIT_BUILTINS;
14376 #endif
14379 /* Returns the rs6000 builtin decl for CODE. */
14381 static tree
14382 rs6000_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
14384 HOST_WIDE_INT fnmask;
14386 if (code >= RS6000_BUILTIN_COUNT)
14387 return error_mark_node;
14389 fnmask = rs6000_builtin_info[code].mask;
14390 if ((fnmask & rs6000_builtin_mask) != fnmask)
14392 rs6000_invalid_builtin ((enum rs6000_builtins)code);
14393 return error_mark_node;
14396 return rs6000_builtin_decls[code];
14399 static void
14400 spe_init_builtins (void)
14402 tree puint_type_node = build_pointer_type (unsigned_type_node);
14403 tree pushort_type_node = build_pointer_type (short_unsigned_type_node);
14404 const struct builtin_description *d;
14405 size_t i;
14407 tree v2si_ftype_4_v2si
14408 = build_function_type_list (opaque_V2SI_type_node,
14409 opaque_V2SI_type_node,
14410 opaque_V2SI_type_node,
14411 opaque_V2SI_type_node,
14412 opaque_V2SI_type_node,
14413 NULL_TREE);
14415 tree v2sf_ftype_4_v2sf
14416 = build_function_type_list (opaque_V2SF_type_node,
14417 opaque_V2SF_type_node,
14418 opaque_V2SF_type_node,
14419 opaque_V2SF_type_node,
14420 opaque_V2SF_type_node,
14421 NULL_TREE);
14423 tree int_ftype_int_v2si_v2si
14424 = build_function_type_list (integer_type_node,
14425 integer_type_node,
14426 opaque_V2SI_type_node,
14427 opaque_V2SI_type_node,
14428 NULL_TREE);
14430 tree int_ftype_int_v2sf_v2sf
14431 = build_function_type_list (integer_type_node,
14432 integer_type_node,
14433 opaque_V2SF_type_node,
14434 opaque_V2SF_type_node,
14435 NULL_TREE);
14437 tree void_ftype_v2si_puint_int
14438 = build_function_type_list (void_type_node,
14439 opaque_V2SI_type_node,
14440 puint_type_node,
14441 integer_type_node,
14442 NULL_TREE);
14444 tree void_ftype_v2si_puint_char
14445 = build_function_type_list (void_type_node,
14446 opaque_V2SI_type_node,
14447 puint_type_node,
14448 char_type_node,
14449 NULL_TREE);
14451 tree void_ftype_v2si_pv2si_int
14452 = build_function_type_list (void_type_node,
14453 opaque_V2SI_type_node,
14454 opaque_p_V2SI_type_node,
14455 integer_type_node,
14456 NULL_TREE);
14458 tree void_ftype_v2si_pv2si_char
14459 = build_function_type_list (void_type_node,
14460 opaque_V2SI_type_node,
14461 opaque_p_V2SI_type_node,
14462 char_type_node,
14463 NULL_TREE);
14465 tree void_ftype_int
14466 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14468 tree int_ftype_void
14469 = build_function_type_list (integer_type_node, NULL_TREE);
14471 tree v2si_ftype_pv2si_int
14472 = build_function_type_list (opaque_V2SI_type_node,
14473 opaque_p_V2SI_type_node,
14474 integer_type_node,
14475 NULL_TREE);
14477 tree v2si_ftype_puint_int
14478 = build_function_type_list (opaque_V2SI_type_node,
14479 puint_type_node,
14480 integer_type_node,
14481 NULL_TREE);
14483 tree v2si_ftype_pushort_int
14484 = build_function_type_list (opaque_V2SI_type_node,
14485 pushort_type_node,
14486 integer_type_node,
14487 NULL_TREE);
14489 tree v2si_ftype_signed_char
14490 = build_function_type_list (opaque_V2SI_type_node,
14491 signed_char_type_node,
14492 NULL_TREE);
14494 add_builtin_type ("__ev64_opaque__", opaque_V2SI_type_node);
14496 /* Initialize irregular SPE builtins. */
14498 def_builtin ("__builtin_spe_mtspefscr", void_ftype_int, SPE_BUILTIN_MTSPEFSCR);
14499 def_builtin ("__builtin_spe_mfspefscr", int_ftype_void, SPE_BUILTIN_MFSPEFSCR);
14500 def_builtin ("__builtin_spe_evstddx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDDX);
14501 def_builtin ("__builtin_spe_evstdhx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDHX);
14502 def_builtin ("__builtin_spe_evstdwx", void_ftype_v2si_pv2si_int, SPE_BUILTIN_EVSTDWX);
14503 def_builtin ("__builtin_spe_evstwhex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHEX);
14504 def_builtin ("__builtin_spe_evstwhox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWHOX);
14505 def_builtin ("__builtin_spe_evstwwex", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWEX);
14506 def_builtin ("__builtin_spe_evstwwox", void_ftype_v2si_puint_int, SPE_BUILTIN_EVSTWWOX);
14507 def_builtin ("__builtin_spe_evstdd", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDD);
14508 def_builtin ("__builtin_spe_evstdh", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDH);
14509 def_builtin ("__builtin_spe_evstdw", void_ftype_v2si_pv2si_char, SPE_BUILTIN_EVSTDW);
14510 def_builtin ("__builtin_spe_evstwhe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHE);
14511 def_builtin ("__builtin_spe_evstwho", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWHO);
14512 def_builtin ("__builtin_spe_evstwwe", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWE);
14513 def_builtin ("__builtin_spe_evstwwo", void_ftype_v2si_puint_char, SPE_BUILTIN_EVSTWWO);
14514 def_builtin ("__builtin_spe_evsplatfi", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATFI);
14515 def_builtin ("__builtin_spe_evsplati", v2si_ftype_signed_char, SPE_BUILTIN_EVSPLATI);
14517 /* Loads. */
14518 def_builtin ("__builtin_spe_evlddx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDDX);
14519 def_builtin ("__builtin_spe_evldwx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDWX);
14520 def_builtin ("__builtin_spe_evldhx", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDHX);
14521 def_builtin ("__builtin_spe_evlwhex", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHEX);
14522 def_builtin ("__builtin_spe_evlwhoux", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOUX);
14523 def_builtin ("__builtin_spe_evlwhosx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOSX);
14524 def_builtin ("__builtin_spe_evlwwsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLATX);
14525 def_builtin ("__builtin_spe_evlwhsplatx", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLATX);
14526 def_builtin ("__builtin_spe_evlhhesplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLATX);
14527 def_builtin ("__builtin_spe_evlhhousplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLATX);
14528 def_builtin ("__builtin_spe_evlhhossplatx", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLATX);
14529 def_builtin ("__builtin_spe_evldd", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDD);
14530 def_builtin ("__builtin_spe_evldw", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDW);
14531 def_builtin ("__builtin_spe_evldh", v2si_ftype_pv2si_int, SPE_BUILTIN_EVLDH);
14532 def_builtin ("__builtin_spe_evlhhesplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHESPLAT);
14533 def_builtin ("__builtin_spe_evlhhossplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOSSPLAT);
14534 def_builtin ("__builtin_spe_evlhhousplat", v2si_ftype_pushort_int, SPE_BUILTIN_EVLHHOUSPLAT);
14535 def_builtin ("__builtin_spe_evlwhe", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHE);
14536 def_builtin ("__builtin_spe_evlwhos", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOS);
14537 def_builtin ("__builtin_spe_evlwhou", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHOU);
14538 def_builtin ("__builtin_spe_evlwhsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWHSPLAT);
14539 def_builtin ("__builtin_spe_evlwwsplat", v2si_ftype_puint_int, SPE_BUILTIN_EVLWWSPLAT);
14541 /* Predicates. */
14542 d = bdesc_spe_predicates;
14543 for (i = 0; i < ARRAY_SIZE (bdesc_spe_predicates); ++i, d++)
14545 tree type;
14547 switch (insn_data[d->icode].operand[1].mode)
14549 case V2SImode:
14550 type = int_ftype_int_v2si_v2si;
14551 break;
14552 case V2SFmode:
14553 type = int_ftype_int_v2sf_v2sf;
14554 break;
14555 default:
14556 gcc_unreachable ();
14559 def_builtin (d->name, type, d->code);
14562 /* Evsel predicates. */
14563 d = bdesc_spe_evsel;
14564 for (i = 0; i < ARRAY_SIZE (bdesc_spe_evsel); ++i, d++)
14566 tree type;
14568 switch (insn_data[d->icode].operand[1].mode)
14570 case V2SImode:
14571 type = v2si_ftype_4_v2si;
14572 break;
14573 case V2SFmode:
14574 type = v2sf_ftype_4_v2sf;
14575 break;
14576 default:
14577 gcc_unreachable ();
14580 def_builtin (d->name, type, d->code);
14584 static void
14585 paired_init_builtins (void)
14587 const struct builtin_description *d;
14588 size_t i;
14590 tree int_ftype_int_v2sf_v2sf
14591 = build_function_type_list (integer_type_node,
14592 integer_type_node,
14593 V2SF_type_node,
14594 V2SF_type_node,
14595 NULL_TREE);
14596 tree pcfloat_type_node =
14597 build_pointer_type (build_qualified_type
14598 (float_type_node, TYPE_QUAL_CONST));
14600 tree v2sf_ftype_long_pcfloat = build_function_type_list (V2SF_type_node,
14601 long_integer_type_node,
14602 pcfloat_type_node,
14603 NULL_TREE);
14604 tree void_ftype_v2sf_long_pcfloat =
14605 build_function_type_list (void_type_node,
14606 V2SF_type_node,
14607 long_integer_type_node,
14608 pcfloat_type_node,
14609 NULL_TREE);
14612 def_builtin ("__builtin_paired_lx", v2sf_ftype_long_pcfloat,
14613 PAIRED_BUILTIN_LX);
14616 def_builtin ("__builtin_paired_stx", void_ftype_v2sf_long_pcfloat,
14617 PAIRED_BUILTIN_STX);
14619 /* Predicates. */
14620 d = bdesc_paired_preds;
14621 for (i = 0; i < ARRAY_SIZE (bdesc_paired_preds); ++i, d++)
14623 tree type;
14625 if (TARGET_DEBUG_BUILTIN)
14626 fprintf (stderr, "paired pred #%d, insn = %s [%d], mode = %s\n",
14627 (int)i, get_insn_name (d->icode), (int)d->icode,
14628 GET_MODE_NAME (insn_data[d->icode].operand[1].mode));
14630 switch (insn_data[d->icode].operand[1].mode)
14632 case V2SFmode:
14633 type = int_ftype_int_v2sf_v2sf;
14634 break;
14635 default:
14636 gcc_unreachable ();
14639 def_builtin (d->name, type, d->code);
14643 static void
14644 altivec_init_builtins (void)
14646 const struct builtin_description *d;
14647 size_t i;
14648 tree ftype;
14649 tree decl;
14651 tree pvoid_type_node = build_pointer_type (void_type_node);
14653 tree pcvoid_type_node
14654 = build_pointer_type (build_qualified_type (void_type_node,
14655 TYPE_QUAL_CONST));
14657 tree int_ftype_opaque
14658 = build_function_type_list (integer_type_node,
14659 opaque_V4SI_type_node, NULL_TREE);
14660 tree opaque_ftype_opaque
14661 = build_function_type_list (integer_type_node, NULL_TREE);
14662 tree opaque_ftype_opaque_int
14663 = build_function_type_list (opaque_V4SI_type_node,
14664 opaque_V4SI_type_node, integer_type_node, NULL_TREE);
14665 tree opaque_ftype_opaque_opaque_int
14666 = build_function_type_list (opaque_V4SI_type_node,
14667 opaque_V4SI_type_node, opaque_V4SI_type_node,
14668 integer_type_node, NULL_TREE);
14669 tree int_ftype_int_opaque_opaque
14670 = build_function_type_list (integer_type_node,
14671 integer_type_node, opaque_V4SI_type_node,
14672 opaque_V4SI_type_node, NULL_TREE);
14673 tree int_ftype_int_v4si_v4si
14674 = build_function_type_list (integer_type_node,
14675 integer_type_node, V4SI_type_node,
14676 V4SI_type_node, NULL_TREE);
14677 tree int_ftype_int_v2di_v2di
14678 = build_function_type_list (integer_type_node,
14679 integer_type_node, V2DI_type_node,
14680 V2DI_type_node, NULL_TREE);
14681 tree void_ftype_v4si
14682 = build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
14683 tree v8hi_ftype_void
14684 = build_function_type_list (V8HI_type_node, NULL_TREE);
14685 tree void_ftype_void
14686 = build_function_type_list (void_type_node, NULL_TREE);
14687 tree void_ftype_int
14688 = build_function_type_list (void_type_node, integer_type_node, NULL_TREE);
14690 tree opaque_ftype_long_pcvoid
14691 = build_function_type_list (opaque_V4SI_type_node,
14692 long_integer_type_node, pcvoid_type_node,
14693 NULL_TREE);
14694 tree v16qi_ftype_long_pcvoid
14695 = build_function_type_list (V16QI_type_node,
14696 long_integer_type_node, pcvoid_type_node,
14697 NULL_TREE);
14698 tree v8hi_ftype_long_pcvoid
14699 = build_function_type_list (V8HI_type_node,
14700 long_integer_type_node, pcvoid_type_node,
14701 NULL_TREE);
14702 tree v4si_ftype_long_pcvoid
14703 = build_function_type_list (V4SI_type_node,
14704 long_integer_type_node, pcvoid_type_node,
14705 NULL_TREE);
14706 tree v4sf_ftype_long_pcvoid
14707 = build_function_type_list (V4SF_type_node,
14708 long_integer_type_node, pcvoid_type_node,
14709 NULL_TREE);
14710 tree v2df_ftype_long_pcvoid
14711 = build_function_type_list (V2DF_type_node,
14712 long_integer_type_node, pcvoid_type_node,
14713 NULL_TREE);
14714 tree v2di_ftype_long_pcvoid
14715 = build_function_type_list (V2DI_type_node,
14716 long_integer_type_node, pcvoid_type_node,
14717 NULL_TREE);
14719 tree void_ftype_opaque_long_pvoid
14720 = build_function_type_list (void_type_node,
14721 opaque_V4SI_type_node, long_integer_type_node,
14722 pvoid_type_node, NULL_TREE);
14723 tree void_ftype_v4si_long_pvoid
14724 = build_function_type_list (void_type_node,
14725 V4SI_type_node, long_integer_type_node,
14726 pvoid_type_node, NULL_TREE);
14727 tree void_ftype_v16qi_long_pvoid
14728 = build_function_type_list (void_type_node,
14729 V16QI_type_node, long_integer_type_node,
14730 pvoid_type_node, NULL_TREE);
14731 tree void_ftype_v8hi_long_pvoid
14732 = build_function_type_list (void_type_node,
14733 V8HI_type_node, long_integer_type_node,
14734 pvoid_type_node, NULL_TREE);
14735 tree void_ftype_v4sf_long_pvoid
14736 = build_function_type_list (void_type_node,
14737 V4SF_type_node, long_integer_type_node,
14738 pvoid_type_node, NULL_TREE);
14739 tree void_ftype_v2df_long_pvoid
14740 = build_function_type_list (void_type_node,
14741 V2DF_type_node, long_integer_type_node,
14742 pvoid_type_node, NULL_TREE);
14743 tree void_ftype_v2di_long_pvoid
14744 = build_function_type_list (void_type_node,
14745 V2DI_type_node, long_integer_type_node,
14746 pvoid_type_node, NULL_TREE);
14747 tree int_ftype_int_v8hi_v8hi
14748 = build_function_type_list (integer_type_node,
14749 integer_type_node, V8HI_type_node,
14750 V8HI_type_node, NULL_TREE);
14751 tree int_ftype_int_v16qi_v16qi
14752 = build_function_type_list (integer_type_node,
14753 integer_type_node, V16QI_type_node,
14754 V16QI_type_node, NULL_TREE);
14755 tree int_ftype_int_v4sf_v4sf
14756 = build_function_type_list (integer_type_node,
14757 integer_type_node, V4SF_type_node,
14758 V4SF_type_node, NULL_TREE);
14759 tree int_ftype_int_v2df_v2df
14760 = build_function_type_list (integer_type_node,
14761 integer_type_node, V2DF_type_node,
14762 V2DF_type_node, NULL_TREE);
14763 tree v2di_ftype_v2di
14764 = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
14765 tree v4si_ftype_v4si
14766 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
14767 tree v8hi_ftype_v8hi
14768 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
14769 tree v16qi_ftype_v16qi
14770 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
14771 tree v4sf_ftype_v4sf
14772 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
14773 tree v2df_ftype_v2df
14774 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
14775 tree void_ftype_pcvoid_int_int
14776 = build_function_type_list (void_type_node,
14777 pcvoid_type_node, integer_type_node,
14778 integer_type_node, NULL_TREE);
14780 def_builtin ("__builtin_altivec_mtvscr", void_ftype_v4si, ALTIVEC_BUILTIN_MTVSCR);
14781 def_builtin ("__builtin_altivec_mfvscr", v8hi_ftype_void, ALTIVEC_BUILTIN_MFVSCR);
14782 def_builtin ("__builtin_altivec_dssall", void_ftype_void, ALTIVEC_BUILTIN_DSSALL);
14783 def_builtin ("__builtin_altivec_dss", void_ftype_int, ALTIVEC_BUILTIN_DSS);
14784 def_builtin ("__builtin_altivec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSL);
14785 def_builtin ("__builtin_altivec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVSR);
14786 def_builtin ("__builtin_altivec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEBX);
14787 def_builtin ("__builtin_altivec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEHX);
14788 def_builtin ("__builtin_altivec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVEWX);
14789 def_builtin ("__builtin_altivec_lvxl", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVXL);
14790 def_builtin ("__builtin_altivec_lvxl_v2df", v2df_ftype_long_pcvoid,
14791 ALTIVEC_BUILTIN_LVXL_V2DF);
14792 def_builtin ("__builtin_altivec_lvxl_v2di", v2di_ftype_long_pcvoid,
14793 ALTIVEC_BUILTIN_LVXL_V2DI);
14794 def_builtin ("__builtin_altivec_lvxl_v4sf", v4sf_ftype_long_pcvoid,
14795 ALTIVEC_BUILTIN_LVXL_V4SF);
14796 def_builtin ("__builtin_altivec_lvxl_v4si", v4si_ftype_long_pcvoid,
14797 ALTIVEC_BUILTIN_LVXL_V4SI);
14798 def_builtin ("__builtin_altivec_lvxl_v8hi", v8hi_ftype_long_pcvoid,
14799 ALTIVEC_BUILTIN_LVXL_V8HI);
14800 def_builtin ("__builtin_altivec_lvxl_v16qi", v16qi_ftype_long_pcvoid,
14801 ALTIVEC_BUILTIN_LVXL_V16QI);
14802 def_builtin ("__builtin_altivec_lvx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVX);
14803 def_builtin ("__builtin_altivec_lvx_v2df", v2df_ftype_long_pcvoid,
14804 ALTIVEC_BUILTIN_LVX_V2DF);
14805 def_builtin ("__builtin_altivec_lvx_v2di", v2di_ftype_long_pcvoid,
14806 ALTIVEC_BUILTIN_LVX_V2DI);
14807 def_builtin ("__builtin_altivec_lvx_v4sf", v4sf_ftype_long_pcvoid,
14808 ALTIVEC_BUILTIN_LVX_V4SF);
14809 def_builtin ("__builtin_altivec_lvx_v4si", v4si_ftype_long_pcvoid,
14810 ALTIVEC_BUILTIN_LVX_V4SI);
14811 def_builtin ("__builtin_altivec_lvx_v8hi", v8hi_ftype_long_pcvoid,
14812 ALTIVEC_BUILTIN_LVX_V8HI);
14813 def_builtin ("__builtin_altivec_lvx_v16qi", v16qi_ftype_long_pcvoid,
14814 ALTIVEC_BUILTIN_LVX_V16QI);
14815 def_builtin ("__builtin_altivec_stvx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVX);
14816 def_builtin ("__builtin_altivec_stvx_v2df", void_ftype_v2df_long_pvoid,
14817 ALTIVEC_BUILTIN_STVX_V2DF);
14818 def_builtin ("__builtin_altivec_stvx_v2di", void_ftype_v2di_long_pvoid,
14819 ALTIVEC_BUILTIN_STVX_V2DI);
14820 def_builtin ("__builtin_altivec_stvx_v4sf", void_ftype_v4sf_long_pvoid,
14821 ALTIVEC_BUILTIN_STVX_V4SF);
14822 def_builtin ("__builtin_altivec_stvx_v4si", void_ftype_v4si_long_pvoid,
14823 ALTIVEC_BUILTIN_STVX_V4SI);
14824 def_builtin ("__builtin_altivec_stvx_v8hi", void_ftype_v8hi_long_pvoid,
14825 ALTIVEC_BUILTIN_STVX_V8HI);
14826 def_builtin ("__builtin_altivec_stvx_v16qi", void_ftype_v16qi_long_pvoid,
14827 ALTIVEC_BUILTIN_STVX_V16QI);
14828 def_builtin ("__builtin_altivec_stvewx", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVEWX);
14829 def_builtin ("__builtin_altivec_stvxl", void_ftype_v4si_long_pvoid, ALTIVEC_BUILTIN_STVXL);
14830 def_builtin ("__builtin_altivec_stvxl_v2df", void_ftype_v2df_long_pvoid,
14831 ALTIVEC_BUILTIN_STVXL_V2DF);
14832 def_builtin ("__builtin_altivec_stvxl_v2di", void_ftype_v2di_long_pvoid,
14833 ALTIVEC_BUILTIN_STVXL_V2DI);
14834 def_builtin ("__builtin_altivec_stvxl_v4sf", void_ftype_v4sf_long_pvoid,
14835 ALTIVEC_BUILTIN_STVXL_V4SF);
14836 def_builtin ("__builtin_altivec_stvxl_v4si", void_ftype_v4si_long_pvoid,
14837 ALTIVEC_BUILTIN_STVXL_V4SI);
14838 def_builtin ("__builtin_altivec_stvxl_v8hi", void_ftype_v8hi_long_pvoid,
14839 ALTIVEC_BUILTIN_STVXL_V8HI);
14840 def_builtin ("__builtin_altivec_stvxl_v16qi", void_ftype_v16qi_long_pvoid,
14841 ALTIVEC_BUILTIN_STVXL_V16QI);
14842 def_builtin ("__builtin_altivec_stvebx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVEBX);
14843 def_builtin ("__builtin_altivec_stvehx", void_ftype_v8hi_long_pvoid, ALTIVEC_BUILTIN_STVEHX);
14844 def_builtin ("__builtin_vec_ld", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LD);
14845 def_builtin ("__builtin_vec_lde", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDE);
14846 def_builtin ("__builtin_vec_ldl", opaque_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LDL);
14847 def_builtin ("__builtin_vec_lvsl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSL);
14848 def_builtin ("__builtin_vec_lvsr", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVSR);
14849 def_builtin ("__builtin_vec_lvebx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEBX);
14850 def_builtin ("__builtin_vec_lvehx", v8hi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEHX);
14851 def_builtin ("__builtin_vec_lvewx", v4si_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVEWX);
14852 def_builtin ("__builtin_vec_st", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_ST);
14853 def_builtin ("__builtin_vec_ste", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STE);
14854 def_builtin ("__builtin_vec_stl", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STL);
14855 def_builtin ("__builtin_vec_stvewx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEWX);
14856 def_builtin ("__builtin_vec_stvebx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEBX);
14857 def_builtin ("__builtin_vec_stvehx", void_ftype_opaque_long_pvoid, ALTIVEC_BUILTIN_VEC_STVEHX);
14859 def_builtin ("__builtin_vsx_lxvd2x_v2df", v2df_ftype_long_pcvoid,
14860 VSX_BUILTIN_LXVD2X_V2DF);
14861 def_builtin ("__builtin_vsx_lxvd2x_v2di", v2di_ftype_long_pcvoid,
14862 VSX_BUILTIN_LXVD2X_V2DI);
14863 def_builtin ("__builtin_vsx_lxvw4x_v4sf", v4sf_ftype_long_pcvoid,
14864 VSX_BUILTIN_LXVW4X_V4SF);
14865 def_builtin ("__builtin_vsx_lxvw4x_v4si", v4si_ftype_long_pcvoid,
14866 VSX_BUILTIN_LXVW4X_V4SI);
14867 def_builtin ("__builtin_vsx_lxvw4x_v8hi", v8hi_ftype_long_pcvoid,
14868 VSX_BUILTIN_LXVW4X_V8HI);
14869 def_builtin ("__builtin_vsx_lxvw4x_v16qi", v16qi_ftype_long_pcvoid,
14870 VSX_BUILTIN_LXVW4X_V16QI);
14871 def_builtin ("__builtin_vsx_stxvd2x_v2df", void_ftype_v2df_long_pvoid,
14872 VSX_BUILTIN_STXVD2X_V2DF);
14873 def_builtin ("__builtin_vsx_stxvd2x_v2di", void_ftype_v2di_long_pvoid,
14874 VSX_BUILTIN_STXVD2X_V2DI);
14875 def_builtin ("__builtin_vsx_stxvw4x_v4sf", void_ftype_v4sf_long_pvoid,
14876 VSX_BUILTIN_STXVW4X_V4SF);
14877 def_builtin ("__builtin_vsx_stxvw4x_v4si", void_ftype_v4si_long_pvoid,
14878 VSX_BUILTIN_STXVW4X_V4SI);
14879 def_builtin ("__builtin_vsx_stxvw4x_v8hi", void_ftype_v8hi_long_pvoid,
14880 VSX_BUILTIN_STXVW4X_V8HI);
14881 def_builtin ("__builtin_vsx_stxvw4x_v16qi", void_ftype_v16qi_long_pvoid,
14882 VSX_BUILTIN_STXVW4X_V16QI);
14883 def_builtin ("__builtin_vec_vsx_ld", opaque_ftype_long_pcvoid,
14884 VSX_BUILTIN_VEC_LD);
14885 def_builtin ("__builtin_vec_vsx_st", void_ftype_opaque_long_pvoid,
14886 VSX_BUILTIN_VEC_ST);
14888 def_builtin ("__builtin_vec_step", int_ftype_opaque, ALTIVEC_BUILTIN_VEC_STEP);
14889 def_builtin ("__builtin_vec_splats", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_SPLATS);
14890 def_builtin ("__builtin_vec_promote", opaque_ftype_opaque, ALTIVEC_BUILTIN_VEC_PROMOTE);
14892 def_builtin ("__builtin_vec_sld", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_SLD);
14893 def_builtin ("__builtin_vec_splat", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_SPLAT);
14894 def_builtin ("__builtin_vec_extract", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_EXTRACT);
14895 def_builtin ("__builtin_vec_insert", opaque_ftype_opaque_opaque_int, ALTIVEC_BUILTIN_VEC_INSERT);
14896 def_builtin ("__builtin_vec_vspltw", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTW);
14897 def_builtin ("__builtin_vec_vsplth", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTH);
14898 def_builtin ("__builtin_vec_vspltb", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VSPLTB);
14899 def_builtin ("__builtin_vec_ctf", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTF);
14900 def_builtin ("__builtin_vec_vcfsx", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFSX);
14901 def_builtin ("__builtin_vec_vcfux", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_VCFUX);
14902 def_builtin ("__builtin_vec_cts", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTS);
14903 def_builtin ("__builtin_vec_ctu", opaque_ftype_opaque_int, ALTIVEC_BUILTIN_VEC_CTU);
14905 /* Cell builtins. */
14906 def_builtin ("__builtin_altivec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLX);
14907 def_builtin ("__builtin_altivec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVLXL);
14908 def_builtin ("__builtin_altivec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRX);
14909 def_builtin ("__builtin_altivec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_LVRXL);
14911 def_builtin ("__builtin_vec_lvlx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLX);
14912 def_builtin ("__builtin_vec_lvlxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVLXL);
14913 def_builtin ("__builtin_vec_lvrx", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRX);
14914 def_builtin ("__builtin_vec_lvrxl", v16qi_ftype_long_pcvoid, ALTIVEC_BUILTIN_VEC_LVRXL);
14916 def_builtin ("__builtin_altivec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLX);
14917 def_builtin ("__builtin_altivec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVLXL);
14918 def_builtin ("__builtin_altivec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRX);
14919 def_builtin ("__builtin_altivec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_STVRXL);
14921 def_builtin ("__builtin_vec_stvlx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLX);
14922 def_builtin ("__builtin_vec_stvlxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVLXL);
14923 def_builtin ("__builtin_vec_stvrx", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRX);
14924 def_builtin ("__builtin_vec_stvrxl", void_ftype_v16qi_long_pvoid, ALTIVEC_BUILTIN_VEC_STVRXL);
14926 /* Add the DST variants. */
14927 d = bdesc_dst;
14928 for (i = 0; i < ARRAY_SIZE (bdesc_dst); i++, d++)
14929 def_builtin (d->name, void_ftype_pcvoid_int_int, d->code);
14931 /* Initialize the predicates. */
14932 d = bdesc_altivec_preds;
14933 for (i = 0; i < ARRAY_SIZE (bdesc_altivec_preds); i++, d++)
14935 machine_mode mode1;
14936 tree type;
14938 if (rs6000_overloaded_builtin_p (d->code))
14939 mode1 = VOIDmode;
14940 else
14941 mode1 = insn_data[d->icode].operand[1].mode;
14943 switch (mode1)
14945 case VOIDmode:
14946 type = int_ftype_int_opaque_opaque;
14947 break;
14948 case V2DImode:
14949 type = int_ftype_int_v2di_v2di;
14950 break;
14951 case V4SImode:
14952 type = int_ftype_int_v4si_v4si;
14953 break;
14954 case V8HImode:
14955 type = int_ftype_int_v8hi_v8hi;
14956 break;
14957 case V16QImode:
14958 type = int_ftype_int_v16qi_v16qi;
14959 break;
14960 case V4SFmode:
14961 type = int_ftype_int_v4sf_v4sf;
14962 break;
14963 case V2DFmode:
14964 type = int_ftype_int_v2df_v2df;
14965 break;
14966 default:
14967 gcc_unreachable ();
14970 def_builtin (d->name, type, d->code);
14973 /* Initialize the abs* operators. */
14974 d = bdesc_abs;
14975 for (i = 0; i < ARRAY_SIZE (bdesc_abs); i++, d++)
14977 machine_mode mode0;
14978 tree type;
14980 mode0 = insn_data[d->icode].operand[0].mode;
14982 switch (mode0)
14984 case V2DImode:
14985 type = v2di_ftype_v2di;
14986 break;
14987 case V4SImode:
14988 type = v4si_ftype_v4si;
14989 break;
14990 case V8HImode:
14991 type = v8hi_ftype_v8hi;
14992 break;
14993 case V16QImode:
14994 type = v16qi_ftype_v16qi;
14995 break;
14996 case V4SFmode:
14997 type = v4sf_ftype_v4sf;
14998 break;
14999 case V2DFmode:
15000 type = v2df_ftype_v2df;
15001 break;
15002 default:
15003 gcc_unreachable ();
15006 def_builtin (d->name, type, d->code);
15009 /* Initialize target builtin that implements
15010 targetm.vectorize.builtin_mask_for_load. */
15012 decl = add_builtin_function ("__builtin_altivec_mask_for_load",
15013 v16qi_ftype_long_pcvoid,
15014 ALTIVEC_BUILTIN_MASK_FOR_LOAD,
15015 BUILT_IN_MD, NULL, NULL_TREE);
15016 TREE_READONLY (decl) = 1;
15017 /* Record the decl. Will be used by rs6000_builtin_mask_for_load. */
15018 altivec_builtin_mask_for_load = decl;
15020 /* Access to the vec_init patterns. */
15021 ftype = build_function_type_list (V4SI_type_node, integer_type_node,
15022 integer_type_node, integer_type_node,
15023 integer_type_node, NULL_TREE);
15024 def_builtin ("__builtin_vec_init_v4si", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SI);
15026 ftype = build_function_type_list (V8HI_type_node, short_integer_type_node,
15027 short_integer_type_node,
15028 short_integer_type_node,
15029 short_integer_type_node,
15030 short_integer_type_node,
15031 short_integer_type_node,
15032 short_integer_type_node,
15033 short_integer_type_node, NULL_TREE);
15034 def_builtin ("__builtin_vec_init_v8hi", ftype, ALTIVEC_BUILTIN_VEC_INIT_V8HI);
15036 ftype = build_function_type_list (V16QI_type_node, char_type_node,
15037 char_type_node, char_type_node,
15038 char_type_node, char_type_node,
15039 char_type_node, char_type_node,
15040 char_type_node, char_type_node,
15041 char_type_node, char_type_node,
15042 char_type_node, char_type_node,
15043 char_type_node, char_type_node,
15044 char_type_node, NULL_TREE);
15045 def_builtin ("__builtin_vec_init_v16qi", ftype,
15046 ALTIVEC_BUILTIN_VEC_INIT_V16QI);
15048 ftype = build_function_type_list (V4SF_type_node, float_type_node,
15049 float_type_node, float_type_node,
15050 float_type_node, NULL_TREE);
15051 def_builtin ("__builtin_vec_init_v4sf", ftype, ALTIVEC_BUILTIN_VEC_INIT_V4SF);
15053 /* VSX builtins. */
15054 ftype = build_function_type_list (V2DF_type_node, double_type_node,
15055 double_type_node, NULL_TREE);
15056 def_builtin ("__builtin_vec_init_v2df", ftype, VSX_BUILTIN_VEC_INIT_V2DF);
15058 ftype = build_function_type_list (V2DI_type_node, intDI_type_node,
15059 intDI_type_node, NULL_TREE);
15060 def_builtin ("__builtin_vec_init_v2di", ftype, VSX_BUILTIN_VEC_INIT_V2DI);
15062 /* Access to the vec_set patterns. */
15063 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
15064 intSI_type_node,
15065 integer_type_node, NULL_TREE);
15066 def_builtin ("__builtin_vec_set_v4si", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SI);
15068 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
15069 intHI_type_node,
15070 integer_type_node, NULL_TREE);
15071 def_builtin ("__builtin_vec_set_v8hi", ftype, ALTIVEC_BUILTIN_VEC_SET_V8HI);
15073 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
15074 intQI_type_node,
15075 integer_type_node, NULL_TREE);
15076 def_builtin ("__builtin_vec_set_v16qi", ftype, ALTIVEC_BUILTIN_VEC_SET_V16QI);
15078 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
15079 float_type_node,
15080 integer_type_node, NULL_TREE);
15081 def_builtin ("__builtin_vec_set_v4sf", ftype, ALTIVEC_BUILTIN_VEC_SET_V4SF);
15083 ftype = build_function_type_list (V2DF_type_node, V2DF_type_node,
15084 double_type_node,
15085 integer_type_node, NULL_TREE);
15086 def_builtin ("__builtin_vec_set_v2df", ftype, VSX_BUILTIN_VEC_SET_V2DF);
15088 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
15089 intDI_type_node,
15090 integer_type_node, NULL_TREE);
15091 def_builtin ("__builtin_vec_set_v2di", ftype, VSX_BUILTIN_VEC_SET_V2DI);
15093 /* Access to the vec_extract patterns. */
15094 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
15095 integer_type_node, NULL_TREE);
15096 def_builtin ("__builtin_vec_ext_v4si", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SI);
15098 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
15099 integer_type_node, NULL_TREE);
15100 def_builtin ("__builtin_vec_ext_v8hi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V8HI);
15102 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
15103 integer_type_node, NULL_TREE);
15104 def_builtin ("__builtin_vec_ext_v16qi", ftype, ALTIVEC_BUILTIN_VEC_EXT_V16QI);
15106 ftype = build_function_type_list (float_type_node, V4SF_type_node,
15107 integer_type_node, NULL_TREE);
15108 def_builtin ("__builtin_vec_ext_v4sf", ftype, ALTIVEC_BUILTIN_VEC_EXT_V4SF);
15110 ftype = build_function_type_list (double_type_node, V2DF_type_node,
15111 integer_type_node, NULL_TREE);
15112 def_builtin ("__builtin_vec_ext_v2df", ftype, VSX_BUILTIN_VEC_EXT_V2DF);
15114 ftype = build_function_type_list (intDI_type_node, V2DI_type_node,
15115 integer_type_node, NULL_TREE);
15116 def_builtin ("__builtin_vec_ext_v2di", ftype, VSX_BUILTIN_VEC_EXT_V2DI);
15119 if (V1TI_type_node)
15121 tree v1ti_ftype_long_pcvoid
15122 = build_function_type_list (V1TI_type_node,
15123 long_integer_type_node, pcvoid_type_node,
15124 NULL_TREE);
15125 tree void_ftype_v1ti_long_pvoid
15126 = build_function_type_list (void_type_node,
15127 V1TI_type_node, long_integer_type_node,
15128 pvoid_type_node, NULL_TREE);
15129 def_builtin ("__builtin_vsx_lxvd2x_v1ti", v1ti_ftype_long_pcvoid,
15130 VSX_BUILTIN_LXVD2X_V1TI);
15131 def_builtin ("__builtin_vsx_stxvd2x_v1ti", void_ftype_v1ti_long_pvoid,
15132 VSX_BUILTIN_STXVD2X_V1TI);
15133 ftype = build_function_type_list (V1TI_type_node, intTI_type_node,
15134 NULL_TREE, NULL_TREE);
15135 def_builtin ("__builtin_vec_init_v1ti", ftype, VSX_BUILTIN_VEC_INIT_V1TI);
15136 ftype = build_function_type_list (V1TI_type_node, V1TI_type_node,
15137 intTI_type_node,
15138 integer_type_node, NULL_TREE);
15139 def_builtin ("__builtin_vec_set_v1ti", ftype, VSX_BUILTIN_VEC_SET_V1TI);
15140 ftype = build_function_type_list (intTI_type_node, V1TI_type_node,
15141 integer_type_node, NULL_TREE);
15142 def_builtin ("__builtin_vec_ext_v1ti", ftype, VSX_BUILTIN_VEC_EXT_V1TI);
15147 static void
15148 htm_init_builtins (void)
15150 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15151 const struct builtin_description *d;
15152 size_t i;
15154 d = bdesc_htm;
15155 for (i = 0; i < ARRAY_SIZE (bdesc_htm); i++, d++)
15157 tree op[MAX_HTM_OPERANDS], type;
15158 HOST_WIDE_INT mask = d->mask;
15159 unsigned attr = rs6000_builtin_info[d->code].attr;
15160 bool void_func = (attr & RS6000_BTC_VOID);
15161 int attr_args = (attr & RS6000_BTC_TYPE_MASK);
15162 int nopnds = 0;
15163 tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node
15164 : unsigned_type_node;
15166 if ((mask & builtin_mask) != mask)
15168 if (TARGET_DEBUG_BUILTIN)
15169 fprintf (stderr, "htm_builtin, skip binary %s\n", d->name);
15170 continue;
15173 if (d->name == 0)
15175 if (TARGET_DEBUG_BUILTIN)
15176 fprintf (stderr, "htm_builtin, bdesc_htm[%ld] no name\n",
15177 (long unsigned) i);
15178 continue;
15181 op[nopnds++] = (void_func) ? void_type_node : argtype;
15183 if (attr_args == RS6000_BTC_UNARY)
15184 op[nopnds++] = argtype;
15185 else if (attr_args == RS6000_BTC_BINARY)
15187 op[nopnds++] = argtype;
15188 op[nopnds++] = argtype;
15190 else if (attr_args == RS6000_BTC_TERNARY)
15192 op[nopnds++] = argtype;
15193 op[nopnds++] = argtype;
15194 op[nopnds++] = argtype;
15197 switch (nopnds)
15199 case 1:
15200 type = build_function_type_list (op[0], NULL_TREE);
15201 break;
15202 case 2:
15203 type = build_function_type_list (op[0], op[1], NULL_TREE);
15204 break;
15205 case 3:
15206 type = build_function_type_list (op[0], op[1], op[2], NULL_TREE);
15207 break;
15208 case 4:
15209 type = build_function_type_list (op[0], op[1], op[2], op[3],
15210 NULL_TREE);
15211 break;
15212 default:
15213 gcc_unreachable ();
15216 def_builtin (d->name, type, d->code);
15220 /* Hash function for builtin functions with up to 3 arguments and a return
15221 type. */
15222 hashval_t
15223 builtin_hasher::hash (builtin_hash_struct *bh)
15225 unsigned ret = 0;
15226 int i;
15228 for (i = 0; i < 4; i++)
15230 ret = (ret * (unsigned)MAX_MACHINE_MODE) + ((unsigned)bh->mode[i]);
15231 ret = (ret * 2) + bh->uns_p[i];
15234 return ret;
15237 /* Compare builtin hash entries H1 and H2 for equivalence. */
15238 bool
15239 builtin_hasher::equal (builtin_hash_struct *p1, builtin_hash_struct *p2)
15241 return ((p1->mode[0] == p2->mode[0])
15242 && (p1->mode[1] == p2->mode[1])
15243 && (p1->mode[2] == p2->mode[2])
15244 && (p1->mode[3] == p2->mode[3])
15245 && (p1->uns_p[0] == p2->uns_p[0])
15246 && (p1->uns_p[1] == p2->uns_p[1])
15247 && (p1->uns_p[2] == p2->uns_p[2])
15248 && (p1->uns_p[3] == p2->uns_p[3]));
15251 /* Map types for builtin functions with an explicit return type and up to 3
15252 arguments. Functions with fewer than 3 arguments use VOIDmode as the type
15253 of the argument. */
15254 static tree
15255 builtin_function_type (machine_mode mode_ret, machine_mode mode_arg0,
15256 machine_mode mode_arg1, machine_mode mode_arg2,
15257 enum rs6000_builtins builtin, const char *name)
15259 struct builtin_hash_struct h;
15260 struct builtin_hash_struct *h2;
15261 int num_args = 3;
15262 int i;
15263 tree ret_type = NULL_TREE;
15264 tree arg_type[3] = { NULL_TREE, NULL_TREE, NULL_TREE };
15266 /* Create builtin_hash_table. */
15267 if (builtin_hash_table == NULL)
15268 builtin_hash_table = hash_table<builtin_hasher>::create_ggc (1500);
15270 h.type = NULL_TREE;
15271 h.mode[0] = mode_ret;
15272 h.mode[1] = mode_arg0;
15273 h.mode[2] = mode_arg1;
15274 h.mode[3] = mode_arg2;
15275 h.uns_p[0] = 0;
15276 h.uns_p[1] = 0;
15277 h.uns_p[2] = 0;
15278 h.uns_p[3] = 0;
15280 /* If the builtin is a type that produces unsigned results or takes unsigned
15281 arguments, and it is returned as a decl for the vectorizer (such as
15282 widening multiplies, permute), make sure the arguments and return value
15283 are type correct. */
15284 switch (builtin)
15286 /* unsigned 1 argument functions. */
15287 case CRYPTO_BUILTIN_VSBOX:
15288 case P8V_BUILTIN_VGBBD:
15289 case MISC_BUILTIN_CDTBCD:
15290 case MISC_BUILTIN_CBCDTD:
15291 h.uns_p[0] = 1;
15292 h.uns_p[1] = 1;
15293 break;
15295 /* unsigned 2 argument functions. */
15296 case ALTIVEC_BUILTIN_VMULEUB_UNS:
15297 case ALTIVEC_BUILTIN_VMULEUH_UNS:
15298 case ALTIVEC_BUILTIN_VMULOUB_UNS:
15299 case ALTIVEC_BUILTIN_VMULOUH_UNS:
15300 case CRYPTO_BUILTIN_VCIPHER:
15301 case CRYPTO_BUILTIN_VCIPHERLAST:
15302 case CRYPTO_BUILTIN_VNCIPHER:
15303 case CRYPTO_BUILTIN_VNCIPHERLAST:
15304 case CRYPTO_BUILTIN_VPMSUMB:
15305 case CRYPTO_BUILTIN_VPMSUMH:
15306 case CRYPTO_BUILTIN_VPMSUMW:
15307 case CRYPTO_BUILTIN_VPMSUMD:
15308 case CRYPTO_BUILTIN_VPMSUM:
15309 case MISC_BUILTIN_ADDG6S:
15310 case MISC_BUILTIN_DIVWEU:
15311 case MISC_BUILTIN_DIVWEUO:
15312 case MISC_BUILTIN_DIVDEU:
15313 case MISC_BUILTIN_DIVDEUO:
15314 h.uns_p[0] = 1;
15315 h.uns_p[1] = 1;
15316 h.uns_p[2] = 1;
15317 break;
15319 /* unsigned 3 argument functions. */
15320 case ALTIVEC_BUILTIN_VPERM_16QI_UNS:
15321 case ALTIVEC_BUILTIN_VPERM_8HI_UNS:
15322 case ALTIVEC_BUILTIN_VPERM_4SI_UNS:
15323 case ALTIVEC_BUILTIN_VPERM_2DI_UNS:
15324 case ALTIVEC_BUILTIN_VSEL_16QI_UNS:
15325 case ALTIVEC_BUILTIN_VSEL_8HI_UNS:
15326 case ALTIVEC_BUILTIN_VSEL_4SI_UNS:
15327 case ALTIVEC_BUILTIN_VSEL_2DI_UNS:
15328 case VSX_BUILTIN_VPERM_16QI_UNS:
15329 case VSX_BUILTIN_VPERM_8HI_UNS:
15330 case VSX_BUILTIN_VPERM_4SI_UNS:
15331 case VSX_BUILTIN_VPERM_2DI_UNS:
15332 case VSX_BUILTIN_XXSEL_16QI_UNS:
15333 case VSX_BUILTIN_XXSEL_8HI_UNS:
15334 case VSX_BUILTIN_XXSEL_4SI_UNS:
15335 case VSX_BUILTIN_XXSEL_2DI_UNS:
15336 case CRYPTO_BUILTIN_VPERMXOR:
15337 case CRYPTO_BUILTIN_VPERMXOR_V2DI:
15338 case CRYPTO_BUILTIN_VPERMXOR_V4SI:
15339 case CRYPTO_BUILTIN_VPERMXOR_V8HI:
15340 case CRYPTO_BUILTIN_VPERMXOR_V16QI:
15341 case CRYPTO_BUILTIN_VSHASIGMAW:
15342 case CRYPTO_BUILTIN_VSHASIGMAD:
15343 case CRYPTO_BUILTIN_VSHASIGMA:
15344 h.uns_p[0] = 1;
15345 h.uns_p[1] = 1;
15346 h.uns_p[2] = 1;
15347 h.uns_p[3] = 1;
15348 break;
15350 /* signed permute functions with unsigned char mask. */
15351 case ALTIVEC_BUILTIN_VPERM_16QI:
15352 case ALTIVEC_BUILTIN_VPERM_8HI:
15353 case ALTIVEC_BUILTIN_VPERM_4SI:
15354 case ALTIVEC_BUILTIN_VPERM_4SF:
15355 case ALTIVEC_BUILTIN_VPERM_2DI:
15356 case ALTIVEC_BUILTIN_VPERM_2DF:
15357 case VSX_BUILTIN_VPERM_16QI:
15358 case VSX_BUILTIN_VPERM_8HI:
15359 case VSX_BUILTIN_VPERM_4SI:
15360 case VSX_BUILTIN_VPERM_4SF:
15361 case VSX_BUILTIN_VPERM_2DI:
15362 case VSX_BUILTIN_VPERM_2DF:
15363 h.uns_p[3] = 1;
15364 break;
15366 /* unsigned args, signed return. */
15367 case VSX_BUILTIN_XVCVUXDDP_UNS:
15368 case ALTIVEC_BUILTIN_UNSFLOAT_V4SI_V4SF:
15369 h.uns_p[1] = 1;
15370 break;
15372 /* signed args, unsigned return. */
15373 case VSX_BUILTIN_XVCVDPUXDS_UNS:
15374 case ALTIVEC_BUILTIN_FIXUNS_V4SF_V4SI:
15375 case MISC_BUILTIN_UNPACK_TD:
15376 case MISC_BUILTIN_UNPACK_V1TI:
15377 h.uns_p[0] = 1;
15378 break;
15380 /* unsigned arguments for 128-bit pack instructions. */
15381 case MISC_BUILTIN_PACK_TD:
15382 case MISC_BUILTIN_PACK_V1TI:
15383 h.uns_p[1] = 1;
15384 h.uns_p[2] = 1;
15385 break;
15387 default:
15388 break;
15391 /* Figure out how many args are present. */
15392 while (num_args > 0 && h.mode[num_args] == VOIDmode)
15393 num_args--;
15395 if (num_args == 0)
15396 fatal_error ("internal error: builtin function %s had no type", name);
15398 ret_type = builtin_mode_to_type[h.mode[0]][h.uns_p[0]];
15399 if (!ret_type && h.uns_p[0])
15400 ret_type = builtin_mode_to_type[h.mode[0]][0];
15402 if (!ret_type)
15403 fatal_error ("internal error: builtin function %s had an unexpected "
15404 "return type %s", name, GET_MODE_NAME (h.mode[0]));
15406 for (i = 0; i < (int) ARRAY_SIZE (arg_type); i++)
15407 arg_type[i] = NULL_TREE;
15409 for (i = 0; i < num_args; i++)
15411 int m = (int) h.mode[i+1];
15412 int uns_p = h.uns_p[i+1];
15414 arg_type[i] = builtin_mode_to_type[m][uns_p];
15415 if (!arg_type[i] && uns_p)
15416 arg_type[i] = builtin_mode_to_type[m][0];
15418 if (!arg_type[i])
15419 fatal_error ("internal error: builtin function %s, argument %d "
15420 "had unexpected argument type %s", name, i,
15421 GET_MODE_NAME (m));
15424 builtin_hash_struct **found = builtin_hash_table->find_slot (&h, INSERT);
15425 if (*found == NULL)
15427 h2 = ggc_alloc<builtin_hash_struct> ();
15428 *h2 = h;
15429 *found = h2;
15431 h2->type = build_function_type_list (ret_type, arg_type[0], arg_type[1],
15432 arg_type[2], NULL_TREE);
15435 return (*found)->type;
15438 static void
15439 rs6000_common_init_builtins (void)
15441 const struct builtin_description *d;
15442 size_t i;
15444 tree opaque_ftype_opaque = NULL_TREE;
15445 tree opaque_ftype_opaque_opaque = NULL_TREE;
15446 tree opaque_ftype_opaque_opaque_opaque = NULL_TREE;
15447 tree v2si_ftype_qi = NULL_TREE;
15448 tree v2si_ftype_v2si_qi = NULL_TREE;
15449 tree v2si_ftype_int_qi = NULL_TREE;
15450 HOST_WIDE_INT builtin_mask = rs6000_builtin_mask;
15452 if (!TARGET_PAIRED_FLOAT)
15454 builtin_mode_to_type[V2SImode][0] = opaque_V2SI_type_node;
15455 builtin_mode_to_type[V2SFmode][0] = opaque_V2SF_type_node;
15458 /* Paired and SPE builtins are only available if you build a compiler with
15459 the appropriate options, so only create those builtins with the
15460 appropriate compiler option. Create Altivec and VSX builtins on machines
15461 with at least the general purpose extensions (970 and newer) to allow the
15462 use of the target attribute.. */
15464 if (TARGET_EXTRA_BUILTINS)
15465 builtin_mask |= RS6000_BTM_COMMON;
15467 /* Add the ternary operators. */
15468 d = bdesc_3arg;
15469 for (i = 0; i < ARRAY_SIZE (bdesc_3arg); i++, d++)
15471 tree type;
15472 HOST_WIDE_INT mask = d->mask;
15474 if ((mask & builtin_mask) != mask)
15476 if (TARGET_DEBUG_BUILTIN)
15477 fprintf (stderr, "rs6000_builtin, skip ternary %s\n", d->name);
15478 continue;
15481 if (rs6000_overloaded_builtin_p (d->code))
15483 if (! (type = opaque_ftype_opaque_opaque_opaque))
15484 type = opaque_ftype_opaque_opaque_opaque
15485 = build_function_type_list (opaque_V4SI_type_node,
15486 opaque_V4SI_type_node,
15487 opaque_V4SI_type_node,
15488 opaque_V4SI_type_node,
15489 NULL_TREE);
15491 else
15493 enum insn_code icode = d->icode;
15494 if (d->name == 0)
15496 if (TARGET_DEBUG_BUILTIN)
15497 fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
15498 (long unsigned)i);
15500 continue;
15503 if (icode == CODE_FOR_nothing)
15505 if (TARGET_DEBUG_BUILTIN)
15506 fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
15507 d->name);
15509 continue;
15512 type = builtin_function_type (insn_data[icode].operand[0].mode,
15513 insn_data[icode].operand[1].mode,
15514 insn_data[icode].operand[2].mode,
15515 insn_data[icode].operand[3].mode,
15516 d->code, d->name);
15519 def_builtin (d->name, type, d->code);
15522 /* Add the binary operators. */
15523 d = bdesc_2arg;
15524 for (i = 0; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
15526 machine_mode mode0, mode1, mode2;
15527 tree type;
15528 HOST_WIDE_INT mask = d->mask;
15530 if ((mask & builtin_mask) != mask)
15532 if (TARGET_DEBUG_BUILTIN)
15533 fprintf (stderr, "rs6000_builtin, skip binary %s\n", d->name);
15534 continue;
15537 if (rs6000_overloaded_builtin_p (d->code))
15539 if (! (type = opaque_ftype_opaque_opaque))
15540 type = opaque_ftype_opaque_opaque
15541 = build_function_type_list (opaque_V4SI_type_node,
15542 opaque_V4SI_type_node,
15543 opaque_V4SI_type_node,
15544 NULL_TREE);
15546 else
15548 enum insn_code icode = d->icode;
15549 if (d->name == 0)
15551 if (TARGET_DEBUG_BUILTIN)
15552 fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
15553 (long unsigned)i);
15555 continue;
15558 if (icode == CODE_FOR_nothing)
15560 if (TARGET_DEBUG_BUILTIN)
15561 fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
15562 d->name);
15564 continue;
15567 mode0 = insn_data[icode].operand[0].mode;
15568 mode1 = insn_data[icode].operand[1].mode;
15569 mode2 = insn_data[icode].operand[2].mode;
15571 if (mode0 == V2SImode && mode1 == V2SImode && mode2 == QImode)
15573 if (! (type = v2si_ftype_v2si_qi))
15574 type = v2si_ftype_v2si_qi
15575 = build_function_type_list (opaque_V2SI_type_node,
15576 opaque_V2SI_type_node,
15577 char_type_node,
15578 NULL_TREE);
15581 else if (mode0 == V2SImode && GET_MODE_CLASS (mode1) == MODE_INT
15582 && mode2 == QImode)
15584 if (! (type = v2si_ftype_int_qi))
15585 type = v2si_ftype_int_qi
15586 = build_function_type_list (opaque_V2SI_type_node,
15587 integer_type_node,
15588 char_type_node,
15589 NULL_TREE);
15592 else
15593 type = builtin_function_type (mode0, mode1, mode2, VOIDmode,
15594 d->code, d->name);
15597 def_builtin (d->name, type, d->code);
15600 /* Add the simple unary operators. */
15601 d = bdesc_1arg;
15602 for (i = 0; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
15604 machine_mode mode0, mode1;
15605 tree type;
15606 HOST_WIDE_INT mask = d->mask;
15608 if ((mask & builtin_mask) != mask)
15610 if (TARGET_DEBUG_BUILTIN)
15611 fprintf (stderr, "rs6000_builtin, skip unary %s\n", d->name);
15612 continue;
15615 if (rs6000_overloaded_builtin_p (d->code))
15617 if (! (type = opaque_ftype_opaque))
15618 type = opaque_ftype_opaque
15619 = build_function_type_list (opaque_V4SI_type_node,
15620 opaque_V4SI_type_node,
15621 NULL_TREE);
15623 else
15625 enum insn_code icode = d->icode;
15626 if (d->name == 0)
15628 if (TARGET_DEBUG_BUILTIN)
15629 fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
15630 (long unsigned)i);
15632 continue;
15635 if (icode == CODE_FOR_nothing)
15637 if (TARGET_DEBUG_BUILTIN)
15638 fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
15639 d->name);
15641 continue;
15644 mode0 = insn_data[icode].operand[0].mode;
15645 mode1 = insn_data[icode].operand[1].mode;
15647 if (mode0 == V2SImode && mode1 == QImode)
15649 if (! (type = v2si_ftype_qi))
15650 type = v2si_ftype_qi
15651 = build_function_type_list (opaque_V2SI_type_node,
15652 char_type_node,
15653 NULL_TREE);
15656 else
15657 type = builtin_function_type (mode0, mode1, VOIDmode, VOIDmode,
15658 d->code, d->name);
15661 def_builtin (d->name, type, d->code);
15665 static void
15666 rs6000_init_libfuncs (void)
15668 if (!TARGET_IEEEQUAD)
15669 /* AIX/Darwin/64-bit Linux quad floating point routines. */
15670 if (!TARGET_XL_COMPAT)
15672 set_optab_libfunc (add_optab, TFmode, "__gcc_qadd");
15673 set_optab_libfunc (sub_optab, TFmode, "__gcc_qsub");
15674 set_optab_libfunc (smul_optab, TFmode, "__gcc_qmul");
15675 set_optab_libfunc (sdiv_optab, TFmode, "__gcc_qdiv");
15677 if (!(TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)))
15679 set_optab_libfunc (neg_optab, TFmode, "__gcc_qneg");
15680 set_optab_libfunc (eq_optab, TFmode, "__gcc_qeq");
15681 set_optab_libfunc (ne_optab, TFmode, "__gcc_qne");
15682 set_optab_libfunc (gt_optab, TFmode, "__gcc_qgt");
15683 set_optab_libfunc (ge_optab, TFmode, "__gcc_qge");
15684 set_optab_libfunc (lt_optab, TFmode, "__gcc_qlt");
15685 set_optab_libfunc (le_optab, TFmode, "__gcc_qle");
15687 set_conv_libfunc (sext_optab, TFmode, SFmode, "__gcc_stoq");
15688 set_conv_libfunc (sext_optab, TFmode, DFmode, "__gcc_dtoq");
15689 set_conv_libfunc (trunc_optab, SFmode, TFmode, "__gcc_qtos");
15690 set_conv_libfunc (trunc_optab, DFmode, TFmode, "__gcc_qtod");
15691 set_conv_libfunc (sfix_optab, SImode, TFmode, "__gcc_qtoi");
15692 set_conv_libfunc (ufix_optab, SImode, TFmode, "__gcc_qtou");
15693 set_conv_libfunc (sfloat_optab, TFmode, SImode, "__gcc_itoq");
15694 set_conv_libfunc (ufloat_optab, TFmode, SImode, "__gcc_utoq");
15697 if (!(TARGET_HARD_FLOAT && TARGET_FPRS))
15698 set_optab_libfunc (unord_optab, TFmode, "__gcc_qunord");
15700 else
15702 set_optab_libfunc (add_optab, TFmode, "_xlqadd");
15703 set_optab_libfunc (sub_optab, TFmode, "_xlqsub");
15704 set_optab_libfunc (smul_optab, TFmode, "_xlqmul");
15705 set_optab_libfunc (sdiv_optab, TFmode, "_xlqdiv");
15707 else
15709 /* 32-bit SVR4 quad floating point routines. */
15711 set_optab_libfunc (add_optab, TFmode, "_q_add");
15712 set_optab_libfunc (sub_optab, TFmode, "_q_sub");
15713 set_optab_libfunc (neg_optab, TFmode, "_q_neg");
15714 set_optab_libfunc (smul_optab, TFmode, "_q_mul");
15715 set_optab_libfunc (sdiv_optab, TFmode, "_q_div");
15716 if (TARGET_PPC_GPOPT)
15717 set_optab_libfunc (sqrt_optab, TFmode, "_q_sqrt");
15719 set_optab_libfunc (eq_optab, TFmode, "_q_feq");
15720 set_optab_libfunc (ne_optab, TFmode, "_q_fne");
15721 set_optab_libfunc (gt_optab, TFmode, "_q_fgt");
15722 set_optab_libfunc (ge_optab, TFmode, "_q_fge");
15723 set_optab_libfunc (lt_optab, TFmode, "_q_flt");
15724 set_optab_libfunc (le_optab, TFmode, "_q_fle");
15726 set_conv_libfunc (sext_optab, TFmode, SFmode, "_q_stoq");
15727 set_conv_libfunc (sext_optab, TFmode, DFmode, "_q_dtoq");
15728 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_q_qtos");
15729 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_q_qtod");
15730 set_conv_libfunc (sfix_optab, SImode, TFmode, "_q_qtoi");
15731 set_conv_libfunc (ufix_optab, SImode, TFmode, "_q_qtou");
15732 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_q_itoq");
15733 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_q_utoq");
15738 /* Expand a block clear operation, and return 1 if successful. Return 0
15739 if we should let the compiler generate normal code.
15741 operands[0] is the destination
15742 operands[1] is the length
15743 operands[3] is the alignment */
15746 expand_block_clear (rtx operands[])
15748 rtx orig_dest = operands[0];
15749 rtx bytes_rtx = operands[1];
15750 rtx align_rtx = operands[3];
15751 bool constp = (GET_CODE (bytes_rtx) == CONST_INT);
15752 HOST_WIDE_INT align;
15753 HOST_WIDE_INT bytes;
15754 int offset;
15755 int clear_bytes;
15756 int clear_step;
15758 /* If this is not a fixed size move, just call memcpy */
15759 if (! constp)
15760 return 0;
15762 /* This must be a fixed size alignment */
15763 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15764 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15766 /* Anything to clear? */
15767 bytes = INTVAL (bytes_rtx);
15768 if (bytes <= 0)
15769 return 1;
15771 /* Use the builtin memset after a point, to avoid huge code bloat.
15772 When optimize_size, avoid any significant code bloat; calling
15773 memset is about 4 instructions, so allow for one instruction to
15774 load zero and three to do clearing. */
15775 if (TARGET_ALTIVEC && align >= 128)
15776 clear_step = 16;
15777 else if (TARGET_POWERPC64 && (align >= 64 || !STRICT_ALIGNMENT))
15778 clear_step = 8;
15779 else if (TARGET_SPE && align >= 64)
15780 clear_step = 8;
15781 else
15782 clear_step = 4;
15784 if (optimize_size && bytes > 3 * clear_step)
15785 return 0;
15786 if (! optimize_size && bytes > 8 * clear_step)
15787 return 0;
15789 for (offset = 0; bytes > 0; offset += clear_bytes, bytes -= clear_bytes)
15791 machine_mode mode = BLKmode;
15792 rtx dest;
15794 if (bytes >= 16 && TARGET_ALTIVEC && align >= 128)
15796 clear_bytes = 16;
15797 mode = V4SImode;
15799 else if (bytes >= 8 && TARGET_SPE && align >= 64)
15801 clear_bytes = 8;
15802 mode = V2SImode;
15804 else if (bytes >= 8 && TARGET_POWERPC64
15805 && (align >= 64 || !STRICT_ALIGNMENT))
15807 clear_bytes = 8;
15808 mode = DImode;
15809 if (offset == 0 && align < 64)
15811 rtx addr;
15813 /* If the address form is reg+offset with offset not a
15814 multiple of four, reload into reg indirect form here
15815 rather than waiting for reload. This way we get one
15816 reload, not one per store. */
15817 addr = XEXP (orig_dest, 0);
15818 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15819 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15820 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15822 addr = copy_addr_to_reg (addr);
15823 orig_dest = replace_equiv_address (orig_dest, addr);
15827 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15828 { /* move 4 bytes */
15829 clear_bytes = 4;
15830 mode = SImode;
15832 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15833 { /* move 2 bytes */
15834 clear_bytes = 2;
15835 mode = HImode;
15837 else /* move 1 byte at a time */
15839 clear_bytes = 1;
15840 mode = QImode;
15843 dest = adjust_address (orig_dest, mode, offset);
15845 emit_move_insn (dest, CONST0_RTX (mode));
15848 return 1;
15852 /* Expand a block move operation, and return 1 if successful. Return 0
15853 if we should let the compiler generate normal code.
15855 operands[0] is the destination
15856 operands[1] is the source
15857 operands[2] is the length
15858 operands[3] is the alignment */
15860 #define MAX_MOVE_REG 4
15863 expand_block_move (rtx operands[])
15865 rtx orig_dest = operands[0];
15866 rtx orig_src = operands[1];
15867 rtx bytes_rtx = operands[2];
15868 rtx align_rtx = operands[3];
15869 int constp = (GET_CODE (bytes_rtx) == CONST_INT);
15870 int align;
15871 int bytes;
15872 int offset;
15873 int move_bytes;
15874 rtx stores[MAX_MOVE_REG];
15875 int num_reg = 0;
15877 /* If this is not a fixed size move, just call memcpy */
15878 if (! constp)
15879 return 0;
15881 /* This must be a fixed size alignment */
15882 gcc_assert (GET_CODE (align_rtx) == CONST_INT);
15883 align = INTVAL (align_rtx) * BITS_PER_UNIT;
15885 /* Anything to move? */
15886 bytes = INTVAL (bytes_rtx);
15887 if (bytes <= 0)
15888 return 1;
15890 if (bytes > rs6000_block_move_inline_limit)
15891 return 0;
15893 for (offset = 0; bytes > 0; offset += move_bytes, bytes -= move_bytes)
15895 union {
15896 rtx (*movmemsi) (rtx, rtx, rtx, rtx);
15897 rtx (*mov) (rtx, rtx);
15898 } gen_func;
15899 machine_mode mode = BLKmode;
15900 rtx src, dest;
15902 /* Altivec first, since it will be faster than a string move
15903 when it applies, and usually not significantly larger. */
15904 if (TARGET_ALTIVEC && bytes >= 16 && align >= 128)
15906 move_bytes = 16;
15907 mode = V4SImode;
15908 gen_func.mov = gen_movv4si;
15910 else if (TARGET_SPE && bytes >= 8 && align >= 64)
15912 move_bytes = 8;
15913 mode = V2SImode;
15914 gen_func.mov = gen_movv2si;
15916 else if (TARGET_STRING
15917 && bytes > 24 /* move up to 32 bytes at a time */
15918 && ! fixed_regs[5]
15919 && ! fixed_regs[6]
15920 && ! fixed_regs[7]
15921 && ! fixed_regs[8]
15922 && ! fixed_regs[9]
15923 && ! fixed_regs[10]
15924 && ! fixed_regs[11]
15925 && ! fixed_regs[12])
15927 move_bytes = (bytes > 32) ? 32 : bytes;
15928 gen_func.movmemsi = gen_movmemsi_8reg;
15930 else if (TARGET_STRING
15931 && bytes > 16 /* move up to 24 bytes at a time */
15932 && ! fixed_regs[5]
15933 && ! fixed_regs[6]
15934 && ! fixed_regs[7]
15935 && ! fixed_regs[8]
15936 && ! fixed_regs[9]
15937 && ! fixed_regs[10])
15939 move_bytes = (bytes > 24) ? 24 : bytes;
15940 gen_func.movmemsi = gen_movmemsi_6reg;
15942 else if (TARGET_STRING
15943 && bytes > 8 /* move up to 16 bytes at a time */
15944 && ! fixed_regs[5]
15945 && ! fixed_regs[6]
15946 && ! fixed_regs[7]
15947 && ! fixed_regs[8])
15949 move_bytes = (bytes > 16) ? 16 : bytes;
15950 gen_func.movmemsi = gen_movmemsi_4reg;
15952 else if (bytes >= 8 && TARGET_POWERPC64
15953 && (align >= 64 || !STRICT_ALIGNMENT))
15955 move_bytes = 8;
15956 mode = DImode;
15957 gen_func.mov = gen_movdi;
15958 if (offset == 0 && align < 64)
15960 rtx addr;
15962 /* If the address form is reg+offset with offset not a
15963 multiple of four, reload into reg indirect form here
15964 rather than waiting for reload. This way we get one
15965 reload, not one per load and/or store. */
15966 addr = XEXP (orig_dest, 0);
15967 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15968 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15969 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15971 addr = copy_addr_to_reg (addr);
15972 orig_dest = replace_equiv_address (orig_dest, addr);
15974 addr = XEXP (orig_src, 0);
15975 if ((GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM)
15976 && GET_CODE (XEXP (addr, 1)) == CONST_INT
15977 && (INTVAL (XEXP (addr, 1)) & 3) != 0)
15979 addr = copy_addr_to_reg (addr);
15980 orig_src = replace_equiv_address (orig_src, addr);
15984 else if (TARGET_STRING && bytes > 4 && !TARGET_POWERPC64)
15985 { /* move up to 8 bytes at a time */
15986 move_bytes = (bytes > 8) ? 8 : bytes;
15987 gen_func.movmemsi = gen_movmemsi_2reg;
15989 else if (bytes >= 4 && (align >= 32 || !STRICT_ALIGNMENT))
15990 { /* move 4 bytes */
15991 move_bytes = 4;
15992 mode = SImode;
15993 gen_func.mov = gen_movsi;
15995 else if (bytes >= 2 && (align >= 16 || !STRICT_ALIGNMENT))
15996 { /* move 2 bytes */
15997 move_bytes = 2;
15998 mode = HImode;
15999 gen_func.mov = gen_movhi;
16001 else if (TARGET_STRING && bytes > 1)
16002 { /* move up to 4 bytes at a time */
16003 move_bytes = (bytes > 4) ? 4 : bytes;
16004 gen_func.movmemsi = gen_movmemsi_1reg;
16006 else /* move 1 byte at a time */
16008 move_bytes = 1;
16009 mode = QImode;
16010 gen_func.mov = gen_movqi;
16013 src = adjust_address (orig_src, mode, offset);
16014 dest = adjust_address (orig_dest, mode, offset);
16016 if (mode != BLKmode)
16018 rtx tmp_reg = gen_reg_rtx (mode);
16020 emit_insn ((*gen_func.mov) (tmp_reg, src));
16021 stores[num_reg++] = (*gen_func.mov) (dest, tmp_reg);
16024 if (mode == BLKmode || num_reg >= MAX_MOVE_REG || bytes == move_bytes)
16026 int i;
16027 for (i = 0; i < num_reg; i++)
16028 emit_insn (stores[i]);
16029 num_reg = 0;
16032 if (mode == BLKmode)
16034 /* Move the address into scratch registers. The movmemsi
16035 patterns require zero offset. */
16036 if (!REG_P (XEXP (src, 0)))
16038 rtx src_reg = copy_addr_to_reg (XEXP (src, 0));
16039 src = replace_equiv_address (src, src_reg);
16041 set_mem_size (src, move_bytes);
16043 if (!REG_P (XEXP (dest, 0)))
16045 rtx dest_reg = copy_addr_to_reg (XEXP (dest, 0));
16046 dest = replace_equiv_address (dest, dest_reg);
16048 set_mem_size (dest, move_bytes);
16050 emit_insn ((*gen_func.movmemsi) (dest, src,
16051 GEN_INT (move_bytes & 31),
16052 align_rtx));
16056 return 1;
16060 /* Return a string to perform a load_multiple operation.
16061 operands[0] is the vector.
16062 operands[1] is the source address.
16063 operands[2] is the first destination register. */
16065 const char *
16066 rs6000_output_load_multiple (rtx operands[3])
16068 /* We have to handle the case where the pseudo used to contain the address
16069 is assigned to one of the output registers. */
16070 int i, j;
16071 int words = XVECLEN (operands[0], 0);
16072 rtx xop[10];
16074 if (XVECLEN (operands[0], 0) == 1)
16075 return "lwz %2,0(%1)";
16077 for (i = 0; i < words; i++)
16078 if (refers_to_regno_p (REGNO (operands[2]) + i,
16079 REGNO (operands[2]) + i + 1, operands[1], 0))
16081 if (i == words-1)
16083 xop[0] = GEN_INT (4 * (words-1));
16084 xop[1] = operands[1];
16085 xop[2] = operands[2];
16086 output_asm_insn ("lswi %2,%1,%0\n\tlwz %1,%0(%1)", xop);
16087 return "";
16089 else if (i == 0)
16091 xop[0] = GEN_INT (4 * (words-1));
16092 xop[1] = operands[1];
16093 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
16094 output_asm_insn ("addi %1,%1,4\n\tlswi %2,%1,%0\n\tlwz %1,-4(%1)", xop);
16095 return "";
16097 else
16099 for (j = 0; j < words; j++)
16100 if (j != i)
16102 xop[0] = GEN_INT (j * 4);
16103 xop[1] = operands[1];
16104 xop[2] = gen_rtx_REG (SImode, REGNO (operands[2]) + j);
16105 output_asm_insn ("lwz %2,%0(%1)", xop);
16107 xop[0] = GEN_INT (i * 4);
16108 xop[1] = operands[1];
16109 output_asm_insn ("lwz %1,%0(%1)", xop);
16110 return "";
16114 return "lswi %2,%1,%N0";
16118 /* A validation routine: say whether CODE, a condition code, and MODE
16119 match. The other alternatives either don't make sense or should
16120 never be generated. */
16122 void
16123 validate_condition_mode (enum rtx_code code, machine_mode mode)
16125 gcc_assert ((GET_RTX_CLASS (code) == RTX_COMPARE
16126 || GET_RTX_CLASS (code) == RTX_COMM_COMPARE)
16127 && GET_MODE_CLASS (mode) == MODE_CC);
16129 /* These don't make sense. */
16130 gcc_assert ((code != GT && code != LT && code != GE && code != LE)
16131 || mode != CCUNSmode);
16133 gcc_assert ((code != GTU && code != LTU && code != GEU && code != LEU)
16134 || mode == CCUNSmode);
16136 gcc_assert (mode == CCFPmode
16137 || (code != ORDERED && code != UNORDERED
16138 && code != UNEQ && code != LTGT
16139 && code != UNGT && code != UNLT
16140 && code != UNGE && code != UNLE));
16142 /* These should never be generated except for
16143 flag_finite_math_only. */
16144 gcc_assert (mode != CCFPmode
16145 || flag_finite_math_only
16146 || (code != LE && code != GE
16147 && code != UNEQ && code != LTGT
16148 && code != UNGT && code != UNLT));
16150 /* These are invalid; the information is not there. */
16151 gcc_assert (mode != CCEQmode || code == EQ || code == NE);
16155 /* Return 1 if ANDOP is a mask that has no bits on that are not in the
16156 mask required to convert the result of a rotate insn into a shift
16157 left insn of SHIFTOP bits. Both are known to be SImode CONST_INT. */
16160 includes_lshift_p (rtx shiftop, rtx andop)
16162 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16164 shift_mask <<= INTVAL (shiftop);
16166 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16169 /* Similar, but for right shift. */
16172 includes_rshift_p (rtx shiftop, rtx andop)
16174 unsigned HOST_WIDE_INT shift_mask = ~(unsigned HOST_WIDE_INT) 0;
16176 shift_mask >>= INTVAL (shiftop);
16178 return (INTVAL (andop) & 0xffffffff & ~shift_mask) == 0;
16181 /* Return 1 if ANDOP is a mask suitable for use with an rldic insn
16182 to perform a left shift. It must have exactly SHIFTOP least
16183 significant 0's, then one or more 1's, then zero or more 0's. */
16186 includes_rldic_lshift_p (rtx shiftop, rtx andop)
16188 if (GET_CODE (andop) == CONST_INT)
16190 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16192 c = INTVAL (andop);
16193 if (c == 0 || c == HOST_WIDE_INT_M1U)
16194 return 0;
16196 shift_mask = HOST_WIDE_INT_M1U;
16197 shift_mask <<= INTVAL (shiftop);
16199 /* Find the least significant one bit. */
16200 lsb = c & -c;
16202 /* It must coincide with the LSB of the shift mask. */
16203 if (-lsb != shift_mask)
16204 return 0;
16206 /* Invert to look for the next transition (if any). */
16207 c = ~c;
16209 /* Remove the low group of ones (originally low group of zeros). */
16210 c &= -lsb;
16212 /* Again find the lsb, and check we have all 1's above. */
16213 lsb = c & -c;
16214 return c == -lsb;
16216 else
16217 return 0;
16220 /* Return 1 if ANDOP is a mask suitable for use with an rldicr insn
16221 to perform a left shift. It must have SHIFTOP or more least
16222 significant 0's, with the remainder of the word 1's. */
16225 includes_rldicr_lshift_p (rtx shiftop, rtx andop)
16227 if (GET_CODE (andop) == CONST_INT)
16229 unsigned HOST_WIDE_INT c, lsb, shift_mask;
16231 shift_mask = HOST_WIDE_INT_M1U;
16232 shift_mask <<= INTVAL (shiftop);
16233 c = INTVAL (andop);
16235 /* Find the least significant one bit. */
16236 lsb = c & -c;
16238 /* It must be covered by the shift mask.
16239 This test also rejects c == 0. */
16240 if ((lsb & shift_mask) == 0)
16241 return 0;
16243 /* Check we have all 1's above the transition, and reject all 1's. */
16244 return c == -lsb && lsb != 1;
16246 else
16247 return 0;
16250 /* Return 1 if operands will generate a valid arguments to rlwimi
16251 instruction for insert with right shift in 64-bit mode. The mask may
16252 not start on the first bit or stop on the last bit because wrap-around
16253 effects of instruction do not correspond to semantics of RTL insn. */
16256 insvdi_rshift_rlwimi_p (rtx sizeop, rtx startop, rtx shiftop)
16258 if (INTVAL (startop) > 32
16259 && INTVAL (startop) < 64
16260 && INTVAL (sizeop) > 1
16261 && INTVAL (sizeop) + INTVAL (startop) < 64
16262 && INTVAL (shiftop) > 0
16263 && INTVAL (sizeop) + INTVAL (shiftop) < 32
16264 && (64 - (INTVAL (shiftop) & 63)) >= INTVAL (sizeop))
16265 return 1;
16267 return 0;
16270 /* Return 1 if REGNO (reg1) == REGNO (reg2) - 1 making them candidates
16271 for lfq and stfq insns iff the registers are hard registers. */
16274 registers_ok_for_quad_peep (rtx reg1, rtx reg2)
16276 /* We might have been passed a SUBREG. */
16277 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
16278 return 0;
16280 /* We might have been passed non floating point registers. */
16281 if (!FP_REGNO_P (REGNO (reg1))
16282 || !FP_REGNO_P (REGNO (reg2)))
16283 return 0;
16285 return (REGNO (reg1) == REGNO (reg2) - 1);
16288 /* Return 1 if addr1 and addr2 are suitable for lfq or stfq insn.
16289 addr1 and addr2 must be in consecutive memory locations
16290 (addr2 == addr1 + 8). */
16293 mems_ok_for_quad_peep (rtx mem1, rtx mem2)
16295 rtx addr1, addr2;
16296 unsigned int reg1, reg2;
16297 int offset1, offset2;
16299 /* The mems cannot be volatile. */
16300 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
16301 return 0;
16303 addr1 = XEXP (mem1, 0);
16304 addr2 = XEXP (mem2, 0);
16306 /* Extract an offset (if used) from the first addr. */
16307 if (GET_CODE (addr1) == PLUS)
16309 /* If not a REG, return zero. */
16310 if (GET_CODE (XEXP (addr1, 0)) != REG)
16311 return 0;
16312 else
16314 reg1 = REGNO (XEXP (addr1, 0));
16315 /* The offset must be constant! */
16316 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
16317 return 0;
16318 offset1 = INTVAL (XEXP (addr1, 1));
16321 else if (GET_CODE (addr1) != REG)
16322 return 0;
16323 else
16325 reg1 = REGNO (addr1);
16326 /* This was a simple (mem (reg)) expression. Offset is 0. */
16327 offset1 = 0;
16330 /* And now for the second addr. */
16331 if (GET_CODE (addr2) == PLUS)
16333 /* If not a REG, return zero. */
16334 if (GET_CODE (XEXP (addr2, 0)) != REG)
16335 return 0;
16336 else
16338 reg2 = REGNO (XEXP (addr2, 0));
16339 /* The offset must be constant. */
16340 if (GET_CODE (XEXP (addr2, 1)) != CONST_INT)
16341 return 0;
16342 offset2 = INTVAL (XEXP (addr2, 1));
16345 else if (GET_CODE (addr2) != REG)
16346 return 0;
16347 else
16349 reg2 = REGNO (addr2);
16350 /* This was a simple (mem (reg)) expression. Offset is 0. */
16351 offset2 = 0;
16354 /* Both of these must have the same base register. */
16355 if (reg1 != reg2)
16356 return 0;
16358 /* The offset for the second addr must be 8 more than the first addr. */
16359 if (offset2 != offset1 + 8)
16360 return 0;
16362 /* All the tests passed. addr1 and addr2 are valid for lfq or stfq
16363 instructions. */
16364 return 1;
16369 rs6000_secondary_memory_needed_rtx (machine_mode mode)
16371 static bool eliminated = false;
16372 rtx ret;
16374 if (mode != SDmode || TARGET_NO_SDMODE_STACK)
16375 ret = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
16376 else
16378 rtx mem = cfun->machine->sdmode_stack_slot;
16379 gcc_assert (mem != NULL_RTX);
16381 if (!eliminated)
16383 mem = eliminate_regs (mem, VOIDmode, NULL_RTX);
16384 cfun->machine->sdmode_stack_slot = mem;
16385 eliminated = true;
16387 ret = mem;
16390 if (TARGET_DEBUG_ADDR)
16392 fprintf (stderr, "\nrs6000_secondary_memory_needed_rtx, mode %s, rtx:\n",
16393 GET_MODE_NAME (mode));
16394 if (!ret)
16395 fprintf (stderr, "\tNULL_RTX\n");
16396 else
16397 debug_rtx (ret);
16400 return ret;
16403 /* Return the mode to be used for memory when a secondary memory
16404 location is needed. For SDmode values we need to use DDmode, in
16405 all other cases we can use the same mode. */
16406 machine_mode
16407 rs6000_secondary_memory_needed_mode (machine_mode mode)
16409 if (lra_in_progress && mode == SDmode)
16410 return DDmode;
16411 return mode;
16414 static tree
16415 rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
16417 /* Don't walk into types. */
16418 if (*tp == NULL_TREE || *tp == error_mark_node || TYPE_P (*tp))
16420 *walk_subtrees = 0;
16421 return NULL_TREE;
16424 switch (TREE_CODE (*tp))
16426 case VAR_DECL:
16427 case PARM_DECL:
16428 case FIELD_DECL:
16429 case RESULT_DECL:
16430 case SSA_NAME:
16431 case REAL_CST:
16432 case MEM_REF:
16433 case VIEW_CONVERT_EXPR:
16434 if (TYPE_MODE (TREE_TYPE (*tp)) == SDmode)
16435 return *tp;
16436 break;
16437 default:
16438 break;
16441 return NULL_TREE;
16444 /* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
16445 on traditional floating point registers, and the VMRGOW/VMRGEW instructions
16446 only work on the traditional altivec registers, note if an altivec register
16447 was chosen. */
16449 static enum rs6000_reg_type
16450 register_to_reg_type (rtx reg, bool *is_altivec)
16452 HOST_WIDE_INT regno;
16453 enum reg_class rclass;
16455 if (GET_CODE (reg) == SUBREG)
16456 reg = SUBREG_REG (reg);
16458 if (!REG_P (reg))
16459 return NO_REG_TYPE;
16461 regno = REGNO (reg);
16462 if (regno >= FIRST_PSEUDO_REGISTER)
16464 if (!lra_in_progress && !reload_in_progress && !reload_completed)
16465 return PSEUDO_REG_TYPE;
16467 regno = true_regnum (reg);
16468 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
16469 return PSEUDO_REG_TYPE;
16472 gcc_assert (regno >= 0);
16474 if (is_altivec && ALTIVEC_REGNO_P (regno))
16475 *is_altivec = true;
16477 rclass = rs6000_regno_regclass[regno];
16478 return reg_class_to_reg_type[(int)rclass];
16481 /* Helper function to return the cost of adding a TOC entry address. */
16483 static inline int
16484 rs6000_secondary_reload_toc_costs (addr_mask_type addr_mask)
16486 int ret;
16488 if (TARGET_CMODEL != CMODEL_SMALL)
16489 ret = ((addr_mask & RELOAD_REG_OFFSET) == 0) ? 1 : 2;
16491 else
16492 ret = (TARGET_MINIMAL_TOC) ? 6 : 3;
16494 return ret;
16497 /* Helper function for rs6000_secondary_reload to determine whether the memory
16498 address (ADDR) with a given register class (RCLASS) and machine mode (MODE)
16499 needs reloading. Return negative if the memory is not handled by the memory
16500 helper functions and to try a different reload method, 0 if no additional
16501 instructions are need, and positive to give the extra cost for the
16502 memory. */
16504 static int
16505 rs6000_secondary_reload_memory (rtx addr,
16506 enum reg_class rclass,
16507 enum machine_mode mode)
16509 int extra_cost = 0;
16510 rtx reg, and_arg, plus_arg0, plus_arg1;
16511 addr_mask_type addr_mask;
16512 const char *type = NULL;
16513 const char *fail_msg = NULL;
16515 if (GPR_REG_CLASS_P (rclass))
16516 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
16518 else if (rclass == FLOAT_REGS)
16519 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
16521 else if (rclass == ALTIVEC_REGS)
16522 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
16524 /* For the combined VSX_REGS, turn off Altivec AND -16. */
16525 else if (rclass == VSX_REGS)
16526 addr_mask = (reg_addr[mode].addr_mask[RELOAD_REG_VMX]
16527 & ~RELOAD_REG_AND_M16);
16529 else
16531 if (TARGET_DEBUG_ADDR)
16532 fprintf (stderr,
16533 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16534 "class is not GPR, FPR, VMX\n",
16535 GET_MODE_NAME (mode), reg_class_names[rclass]);
16537 return -1;
16540 /* If the register isn't valid in this register class, just return now. */
16541 if ((addr_mask & RELOAD_REG_VALID) == 0)
16543 if (TARGET_DEBUG_ADDR)
16544 fprintf (stderr,
16545 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16546 "not valid in class\n",
16547 GET_MODE_NAME (mode), reg_class_names[rclass]);
16549 return -1;
16552 switch (GET_CODE (addr))
16554 /* Does the register class supports auto update forms for this mode? We
16555 don't need a scratch register, since the powerpc only supports
16556 PRE_INC, PRE_DEC, and PRE_MODIFY. */
16557 case PRE_INC:
16558 case PRE_DEC:
16559 reg = XEXP (addr, 0);
16560 if (!base_reg_operand (addr, GET_MODE (reg)))
16562 fail_msg = "no base register #1";
16563 extra_cost = -1;
16566 else if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
16568 extra_cost = 1;
16569 type = "update";
16571 break;
16573 case PRE_MODIFY:
16574 reg = XEXP (addr, 0);
16575 plus_arg1 = XEXP (addr, 1);
16576 if (!base_reg_operand (reg, GET_MODE (reg))
16577 || GET_CODE (plus_arg1) != PLUS
16578 || !rtx_equal_p (reg, XEXP (plus_arg1, 0)))
16580 fail_msg = "bad PRE_MODIFY";
16581 extra_cost = -1;
16584 else if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
16586 extra_cost = 1;
16587 type = "update";
16589 break;
16591 /* Do we need to simulate AND -16 to clear the bottom address bits used
16592 in VMX load/stores? Only allow the AND for vector sizes. */
16593 case AND:
16594 and_arg = XEXP (addr, 0);
16595 if (GET_MODE_SIZE (mode) != 16
16596 || GET_CODE (XEXP (addr, 1)) != CONST_INT
16597 || INTVAL (XEXP (addr, 1)) != -16)
16599 fail_msg = "bad Altivec AND #1";
16600 extra_cost = -1;
16603 if (rclass != ALTIVEC_REGS)
16605 if (legitimate_indirect_address_p (and_arg, false))
16606 extra_cost = 1;
16608 else if (legitimate_indexed_address_p (and_arg, false))
16609 extra_cost = 2;
16611 else
16613 fail_msg = "bad Altivec AND #2";
16614 extra_cost = -1;
16617 type = "and";
16619 break;
16621 /* If this is an indirect address, make sure it is a base register. */
16622 case REG:
16623 case SUBREG:
16624 if (!legitimate_indirect_address_p (addr, false))
16626 extra_cost = 1;
16627 type = "move";
16629 break;
16631 /* If this is an indexed address, make sure the register class can handle
16632 indexed addresses for this mode. */
16633 case PLUS:
16634 plus_arg0 = XEXP (addr, 0);
16635 plus_arg1 = XEXP (addr, 1);
16637 /* (plus (plus (reg) (constant)) (constant)) is generated during
16638 push_reload processing, so handle it now. */
16639 if (GET_CODE (plus_arg0) == PLUS && CONST_INT_P (plus_arg1))
16641 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16643 extra_cost = 1;
16644 type = "offset";
16648 else if (!base_reg_operand (plus_arg0, GET_MODE (plus_arg0)))
16650 fail_msg = "no base register #2";
16651 extra_cost = -1;
16654 else if (int_reg_operand (plus_arg1, GET_MODE (plus_arg1)))
16656 if ((addr_mask & RELOAD_REG_INDEXED) == 0
16657 || !legitimate_indexed_address_p (addr, false))
16659 extra_cost = 1;
16660 type = "indexed";
16664 /* Make sure the register class can handle offset addresses. */
16665 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
16667 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16669 extra_cost = 1;
16670 type = "offset";
16674 else
16676 fail_msg = "bad PLUS";
16677 extra_cost = -1;
16680 break;
16682 case LO_SUM:
16683 if (!legitimate_lo_sum_address_p (mode, addr, false))
16685 fail_msg = "bad LO_SUM";
16686 extra_cost = -1;
16689 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16691 extra_cost = 1;
16692 type = "lo_sum";
16694 break;
16696 /* Static addresses need to create a TOC entry. */
16697 case CONST:
16698 case SYMBOL_REF:
16699 case LABEL_REF:
16700 type = "address";
16701 extra_cost = rs6000_secondary_reload_toc_costs (addr_mask);
16702 break;
16704 /* TOC references look like offsetable memory. */
16705 case UNSPEC:
16706 if (TARGET_CMODEL == CMODEL_SMALL || XINT (addr, 1) != UNSPEC_TOCREL)
16708 fail_msg = "bad UNSPEC";
16709 extra_cost = -1;
16712 else if ((addr_mask & RELOAD_REG_OFFSET) == 0)
16714 extra_cost = 1;
16715 type = "toc reference";
16717 break;
16719 default:
16721 fail_msg = "bad address";
16722 extra_cost = -1;
16726 if (TARGET_DEBUG_ADDR /* && extra_cost != 0 */)
16728 if (extra_cost < 0)
16729 fprintf (stderr,
16730 "rs6000_secondary_reload_memory error: mode = %s, "
16731 "class = %s, addr_mask = '%s', %s\n",
16732 GET_MODE_NAME (mode),
16733 reg_class_names[rclass],
16734 rs6000_debug_addr_mask (addr_mask, false),
16735 (fail_msg != NULL) ? fail_msg : "<bad address>");
16737 else
16738 fprintf (stderr,
16739 "rs6000_secondary_reload_memory: mode = %s, class = %s, "
16740 "addr_mask = '%s', extra cost = %d, %s\n",
16741 GET_MODE_NAME (mode),
16742 reg_class_names[rclass],
16743 rs6000_debug_addr_mask (addr_mask, false),
16744 extra_cost,
16745 (type) ? type : "<none>");
16747 debug_rtx (addr);
16750 return extra_cost;
16753 /* Helper function for rs6000_secondary_reload to return true if a move to a
16754 different register classe is really a simple move. */
16756 static bool
16757 rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
16758 enum rs6000_reg_type from_type,
16759 machine_mode mode)
16761 int size;
16763 /* Add support for various direct moves available. In this function, we only
16764 look at cases where we don't need any extra registers, and one or more
16765 simple move insns are issued. At present, 32-bit integers are not allowed
16766 in FPR/VSX registers. Single precision binary floating is not a simple
16767 move because we need to convert to the single precision memory layout.
16768 The 4-byte SDmode can be moved. */
16769 size = GET_MODE_SIZE (mode);
16770 if (TARGET_DIRECT_MOVE
16771 && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
16772 && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16773 || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
16774 return true;
16776 else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
16777 && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
16778 || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16779 return true;
16781 else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
16782 && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
16783 || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
16784 return true;
16786 return false;
16789 /* Power8 helper function for rs6000_secondary_reload, handle all of the
16790 special direct moves that involve allocating an extra register, return the
16791 insn code of the helper function if there is such a function or
16792 CODE_FOR_nothing if not. */
16794 static bool
16795 rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
16796 enum rs6000_reg_type from_type,
16797 machine_mode mode,
16798 secondary_reload_info *sri,
16799 bool altivec_p)
16801 bool ret = false;
16802 enum insn_code icode = CODE_FOR_nothing;
16803 int cost = 0;
16804 int size = GET_MODE_SIZE (mode);
16806 if (TARGET_POWERPC64)
16808 if (size == 16)
16810 /* Handle moving 128-bit values from GPRs to VSX point registers on
16811 power8 when running in 64-bit mode using XXPERMDI to glue the two
16812 64-bit values back together. */
16813 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16815 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16816 icode = reg_addr[mode].reload_vsx_gpr;
16819 /* Handle moving 128-bit values from VSX point registers to GPRs on
16820 power8 when running in 64-bit mode using XXPERMDI to get access to the
16821 bottom 64-bit value. */
16822 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16824 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16825 icode = reg_addr[mode].reload_gpr_vsx;
16829 else if (mode == SFmode)
16831 if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16833 cost = 3; /* xscvdpspn, mfvsrd, and. */
16834 icode = reg_addr[mode].reload_gpr_vsx;
16837 else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16839 cost = 2; /* mtvsrz, xscvspdpn. */
16840 icode = reg_addr[mode].reload_vsx_gpr;
16845 if (TARGET_POWERPC64 && size == 16)
16847 /* Handle moving 128-bit values from GPRs to VSX point registers on
16848 power8 when running in 64-bit mode using XXPERMDI to glue the two
16849 64-bit values back together. */
16850 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
16852 cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
16853 icode = reg_addr[mode].reload_vsx_gpr;
16856 /* Handle moving 128-bit values from VSX point registers to GPRs on
16857 power8 when running in 64-bit mode using XXPERMDI to get access to the
16858 bottom 64-bit value. */
16859 else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
16861 cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
16862 icode = reg_addr[mode].reload_gpr_vsx;
16866 else if (!TARGET_POWERPC64 && size == 8)
16868 /* Handle moving 64-bit values from GPRs to floating point registers on
16869 power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
16870 values back together. Altivec register classes must be handled
16871 specially since a different instruction is used, and the secondary
16872 reload support requires a single instruction class in the scratch
16873 register constraint. However, right now TFmode is not allowed in
16874 Altivec registers, so the pattern will never match. */
16875 if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
16877 cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
16878 icode = reg_addr[mode].reload_fpr_gpr;
16882 if (icode != CODE_FOR_nothing)
16884 ret = true;
16885 if (sri)
16887 sri->icode = icode;
16888 sri->extra_cost = cost;
16892 return ret;
16895 /* Return whether a move between two register classes can be done either
16896 directly (simple move) or via a pattern that uses a single extra temporary
16897 (using power8's direct move in this case. */
16899 static bool
16900 rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
16901 enum rs6000_reg_type from_type,
16902 machine_mode mode,
16903 secondary_reload_info *sri,
16904 bool altivec_p)
16906 /* Fall back to load/store reloads if either type is not a register. */
16907 if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
16908 return false;
16910 /* If we haven't allocated registers yet, assume the move can be done for the
16911 standard register types. */
16912 if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
16913 || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
16914 || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
16915 return true;
16917 /* Moves to the same set of registers is a simple move for non-specialized
16918 registers. */
16919 if (to_type == from_type && IS_STD_REG_TYPE (to_type))
16920 return true;
16922 /* Check whether a simple move can be done directly. */
16923 if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
16925 if (sri)
16927 sri->icode = CODE_FOR_nothing;
16928 sri->extra_cost = 0;
16930 return true;
16933 /* Now check if we can do it in a few steps. */
16934 return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
16935 altivec_p);
16938 /* Inform reload about cases where moving X with a mode MODE to a register in
16939 RCLASS requires an extra scratch or immediate register. Return the class
16940 needed for the immediate register.
16942 For VSX and Altivec, we may need a register to convert sp+offset into
16943 reg+sp.
16945 For misaligned 64-bit gpr loads and stores we need a register to
16946 convert an offset address to indirect. */
16948 static reg_class_t
16949 rs6000_secondary_reload (bool in_p,
16950 rtx x,
16951 reg_class_t rclass_i,
16952 machine_mode mode,
16953 secondary_reload_info *sri)
16955 enum reg_class rclass = (enum reg_class) rclass_i;
16956 reg_class_t ret = ALL_REGS;
16957 enum insn_code icode;
16958 bool default_p = false;
16959 bool done_p = false;
16961 /* Allow subreg of memory before/during reload. */
16962 bool memory_p = (MEM_P (x)
16963 || (!reload_completed && GET_CODE (x) == SUBREG
16964 && MEM_P (SUBREG_REG (x))));
16966 sri->icode = CODE_FOR_nothing;
16967 sri->extra_cost = 0;
16968 icode = ((in_p)
16969 ? reg_addr[mode].reload_load
16970 : reg_addr[mode].reload_store);
16972 if (REG_P (x) || register_operand (x, mode))
16974 enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
16975 bool altivec_p = (rclass == ALTIVEC_REGS);
16976 enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
16978 if (!in_p)
16980 enum rs6000_reg_type exchange = to_type;
16981 to_type = from_type;
16982 from_type = exchange;
16985 /* Can we do a direct move of some sort? */
16986 if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
16987 altivec_p))
16989 icode = (enum insn_code)sri->icode;
16990 default_p = false;
16991 done_p = true;
16992 ret = NO_REGS;
16996 /* Make sure 0.0 is not reloaded or forced into memory. */
16997 if (x == CONST0_RTX (mode) && VSX_REG_CLASS_P (rclass))
16999 ret = NO_REGS;
17000 default_p = false;
17001 done_p = true;
17004 /* If this is a scalar floating point value and we want to load it into the
17005 traditional Altivec registers, do it via a move via a traditional floating
17006 point register. Also make sure that non-zero constants use a FPR. */
17007 if (!done_p && reg_addr[mode].scalar_in_vmx_p
17008 && (rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17009 && (memory_p || (GET_CODE (x) == CONST_DOUBLE)))
17011 ret = FLOAT_REGS;
17012 default_p = false;
17013 done_p = true;
17016 /* Handle reload of load/stores if we have reload helper functions. */
17017 if (!done_p && icode != CODE_FOR_nothing && memory_p)
17019 int extra_cost = rs6000_secondary_reload_memory (XEXP (x, 0), rclass,
17020 mode);
17022 if (extra_cost >= 0)
17024 done_p = true;
17025 ret = NO_REGS;
17026 if (extra_cost > 0)
17028 sri->extra_cost = extra_cost;
17029 sri->icode = icode;
17034 /* Handle unaligned loads and stores of integer registers. */
17035 if (!done_p && TARGET_POWERPC64
17036 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17037 && memory_p
17038 && GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
17040 rtx addr = XEXP (x, 0);
17041 rtx off = address_offset (addr);
17043 if (off != NULL_RTX)
17045 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17046 unsigned HOST_WIDE_INT offset = INTVAL (off);
17048 /* We need a secondary reload when our legitimate_address_p
17049 says the address is good (as otherwise the entire address
17050 will be reloaded), and the offset is not a multiple of
17051 four or we have an address wrap. Address wrap will only
17052 occur for LO_SUMs since legitimate_offset_address_p
17053 rejects addresses for 16-byte mems that will wrap. */
17054 if (GET_CODE (addr) == LO_SUM
17055 ? (1 /* legitimate_address_p allows any offset for lo_sum */
17056 && ((offset & 3) != 0
17057 || ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra))
17058 : (offset + 0x8000 < 0x10000 - extra /* legitimate_address_p */
17059 && (offset & 3) != 0))
17061 if (in_p)
17062 sri->icode = CODE_FOR_reload_di_load;
17063 else
17064 sri->icode = CODE_FOR_reload_di_store;
17065 sri->extra_cost = 2;
17066 ret = NO_REGS;
17067 done_p = true;
17069 else
17070 default_p = true;
17072 else
17073 default_p = true;
17076 if (!done_p && !TARGET_POWERPC64
17077 && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
17078 && memory_p
17079 && GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
17081 rtx addr = XEXP (x, 0);
17082 rtx off = address_offset (addr);
17084 if (off != NULL_RTX)
17086 unsigned int extra = GET_MODE_SIZE (GET_MODE (x)) - UNITS_PER_WORD;
17087 unsigned HOST_WIDE_INT offset = INTVAL (off);
17089 /* We need a secondary reload when our legitimate_address_p
17090 says the address is good (as otherwise the entire address
17091 will be reloaded), and we have a wrap.
17093 legitimate_lo_sum_address_p allows LO_SUM addresses to
17094 have any offset so test for wrap in the low 16 bits.
17096 legitimate_offset_address_p checks for the range
17097 [-0x8000,0x7fff] for mode size of 8 and [-0x8000,0x7ff7]
17098 for mode size of 16. We wrap at [0x7ffc,0x7fff] and
17099 [0x7ff4,0x7fff] respectively, so test for the
17100 intersection of these ranges, [0x7ffc,0x7fff] and
17101 [0x7ff4,0x7ff7] respectively.
17103 Note that the address we see here may have been
17104 manipulated by legitimize_reload_address. */
17105 if (GET_CODE (addr) == LO_SUM
17106 ? ((offset & 0xffff) ^ 0x8000) >= 0x10000 - extra
17107 : offset - (0x8000 - extra) < UNITS_PER_WORD)
17109 if (in_p)
17110 sri->icode = CODE_FOR_reload_si_load;
17111 else
17112 sri->icode = CODE_FOR_reload_si_store;
17113 sri->extra_cost = 2;
17114 ret = NO_REGS;
17115 done_p = true;
17117 else
17118 default_p = true;
17120 else
17121 default_p = true;
17124 if (!done_p)
17125 default_p = true;
17127 if (default_p)
17128 ret = default_secondary_reload (in_p, x, rclass, mode, sri);
17130 gcc_assert (ret != ALL_REGS);
17132 if (TARGET_DEBUG_ADDR)
17134 fprintf (stderr,
17135 "\nrs6000_secondary_reload, return %s, in_p = %s, rclass = %s, "
17136 "mode = %s",
17137 reg_class_names[ret],
17138 in_p ? "true" : "false",
17139 reg_class_names[rclass],
17140 GET_MODE_NAME (mode));
17142 if (reload_completed)
17143 fputs (", after reload", stderr);
17145 if (!done_p)
17146 fputs (", done_p not set", stderr);
17148 if (default_p)
17149 fputs (", default secondary reload", stderr);
17151 if (sri->icode != CODE_FOR_nothing)
17152 fprintf (stderr, ", reload func = %s, extra cost = %d",
17153 insn_data[sri->icode].name, sri->extra_cost);
17155 fputs ("\n", stderr);
17156 debug_rtx (x);
17159 return ret;
17162 /* Better tracing for rs6000_secondary_reload_inner. */
17164 static void
17165 rs6000_secondary_reload_trace (int line, rtx reg, rtx mem, rtx scratch,
17166 bool store_p)
17168 rtx set, clobber;
17170 gcc_assert (reg != NULL_RTX && mem != NULL_RTX && scratch != NULL_RTX);
17172 fprintf (stderr, "rs6000_secondary_reload_inner:%d, type = %s\n", line,
17173 store_p ? "store" : "load");
17175 if (store_p)
17176 set = gen_rtx_SET (VOIDmode, mem, reg);
17177 else
17178 set = gen_rtx_SET (VOIDmode, reg, mem);
17180 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
17181 debug_rtx (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
17184 static void rs6000_secondary_reload_fail (int, rtx, rtx, rtx, bool)
17185 ATTRIBUTE_NORETURN;
17187 static void
17188 rs6000_secondary_reload_fail (int line, rtx reg, rtx mem, rtx scratch,
17189 bool store_p)
17191 rs6000_secondary_reload_trace (line, reg, mem, scratch, store_p);
17192 gcc_unreachable ();
17195 /* Fixup reload addresses for values in GPR, FPR, and VMX registers that have
17196 reload helper functions. These were identified in
17197 rs6000_secondary_reload_memory, and if reload decided to use the secondary
17198 reload, it calls the insns:
17199 reload_<RELOAD:mode>_<P:mptrsize>_store
17200 reload_<RELOAD:mode>_<P:mptrsize>_load
17202 which in turn calls this function, to do whatever is necessary to create
17203 valid addresses. */
17205 void
17206 rs6000_secondary_reload_inner (rtx reg, rtx mem, rtx scratch, bool store_p)
17208 int regno = true_regnum (reg);
17209 machine_mode mode = GET_MODE (reg);
17210 addr_mask_type addr_mask;
17211 rtx addr;
17212 rtx new_addr;
17213 rtx op_reg, op0, op1;
17214 rtx and_op;
17215 rtx cc_clobber;
17216 rtvec rv;
17218 if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER || !MEM_P (mem)
17219 || !base_reg_operand (scratch, GET_MODE (scratch)))
17220 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17222 if (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO))
17223 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_GPR];
17225 else if (IN_RANGE (regno, FIRST_FPR_REGNO, LAST_FPR_REGNO))
17226 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_FPR];
17228 else if (IN_RANGE (regno, FIRST_ALTIVEC_REGNO, LAST_ALTIVEC_REGNO))
17229 addr_mask = reg_addr[mode].addr_mask[RELOAD_REG_VMX];
17231 else
17232 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17234 /* Make sure the mode is valid in this register class. */
17235 if ((addr_mask & RELOAD_REG_VALID) == 0)
17236 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17238 if (TARGET_DEBUG_ADDR)
17239 rs6000_secondary_reload_trace (__LINE__, reg, mem, scratch, store_p);
17241 new_addr = addr = XEXP (mem, 0);
17242 switch (GET_CODE (addr))
17244 /* Does the register class support auto update forms for this mode? If
17245 not, do the update now. We don't need a scratch register, since the
17246 powerpc only supports PRE_INC, PRE_DEC, and PRE_MODIFY. */
17247 case PRE_INC:
17248 case PRE_DEC:
17249 op_reg = XEXP (addr, 0);
17250 if (!base_reg_operand (op_reg, Pmode))
17251 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17253 if ((addr_mask & RELOAD_REG_PRE_INCDEC) == 0)
17255 emit_insn (gen_add2_insn (op_reg, GEN_INT (GET_MODE_SIZE (mode))));
17256 new_addr = op_reg;
17258 break;
17260 case PRE_MODIFY:
17261 op0 = XEXP (addr, 0);
17262 op1 = XEXP (addr, 1);
17263 if (!base_reg_operand (op0, Pmode)
17264 || GET_CODE (op1) != PLUS
17265 || !rtx_equal_p (op0, XEXP (op1, 0)))
17266 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17268 if ((addr_mask & RELOAD_REG_PRE_MODIFY) == 0)
17270 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
17271 new_addr = reg;
17273 break;
17275 /* Do we need to simulate AND -16 to clear the bottom address bits used
17276 in VMX load/stores? */
17277 case AND:
17278 op0 = XEXP (addr, 0);
17279 op1 = XEXP (addr, 1);
17280 if ((addr_mask & RELOAD_REG_AND_M16) == 0)
17282 if (REG_P (op0) || GET_CODE (op0) == SUBREG)
17283 op_reg = op0;
17285 else if (GET_CODE (op1) == PLUS)
17287 emit_insn (gen_rtx_SET (VOIDmode, scratch, op1));
17288 op_reg = scratch;
17291 else
17292 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17294 and_op = gen_rtx_AND (GET_MODE (scratch), op_reg, op1);
17295 cc_clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (CCmode));
17296 rv = gen_rtvec (2, gen_rtx_SET (VOIDmode, scratch, and_op), cc_clobber);
17297 emit_insn (gen_rtx_PARALLEL (VOIDmode, rv));
17298 new_addr = scratch;
17300 break;
17302 /* If this is an indirect address, make sure it is a base register. */
17303 case REG:
17304 case SUBREG:
17305 if (!base_reg_operand (addr, GET_MODE (addr)))
17307 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17308 new_addr = scratch;
17310 break;
17312 /* If this is an indexed address, make sure the register class can handle
17313 indexed addresses for this mode. */
17314 case PLUS:
17315 op0 = XEXP (addr, 0);
17316 op1 = XEXP (addr, 1);
17317 if (!base_reg_operand (op0, Pmode))
17318 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17320 else if (int_reg_operand (op1, Pmode))
17322 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17324 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17325 new_addr = scratch;
17329 /* Make sure the register class can handle offset addresses. */
17330 else if (rs6000_legitimate_offset_address_p (mode, addr, false, true))
17332 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17334 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17335 new_addr = scratch;
17339 else
17340 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17342 break;
17344 case LO_SUM:
17345 op0 = XEXP (addr, 0);
17346 op1 = XEXP (addr, 1);
17347 if (!base_reg_operand (op0, Pmode))
17348 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17350 else if (int_reg_operand (op1, Pmode))
17352 if ((addr_mask & RELOAD_REG_INDEXED) == 0)
17354 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17355 new_addr = scratch;
17359 /* Make sure the register class can handle offset addresses. */
17360 else if (legitimate_lo_sum_address_p (mode, addr, false))
17362 if ((addr_mask & RELOAD_REG_OFFSET) == 0)
17364 emit_insn (gen_rtx_SET (VOIDmode, scratch, addr));
17365 new_addr = scratch;
17369 else
17370 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17372 break;
17374 case SYMBOL_REF:
17375 case CONST:
17376 case LABEL_REF:
17377 if (TARGET_TOC)
17378 emit_insn (gen_rtx_SET (VOIDmode, scratch,
17379 create_TOC_reference (addr, scratch)));
17380 else
17381 rs6000_emit_move (scratch, addr, Pmode);
17383 new_addr = scratch;
17384 break;
17386 default:
17387 rs6000_secondary_reload_fail (__LINE__, reg, mem, scratch, store_p);
17390 /* Adjust the address if it changed. */
17391 if (addr != new_addr)
17393 mem = replace_equiv_address_nv (mem, new_addr);
17394 if (TARGET_DEBUG_ADDR)
17395 fprintf (stderr, "\nrs6000_secondary_reload_inner, mem adjusted.\n");
17398 /* Now create the move. */
17399 if (store_p)
17400 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17401 else
17402 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17404 return;
17407 /* Convert reloads involving 64-bit gprs and misaligned offset
17408 addressing, or multiple 32-bit gprs and offsets that are too large,
17409 to use indirect addressing. */
17411 void
17412 rs6000_secondary_reload_gpr (rtx reg, rtx mem, rtx scratch, bool store_p)
17414 int regno = true_regnum (reg);
17415 enum reg_class rclass;
17416 rtx addr;
17417 rtx scratch_or_premodify = scratch;
17419 if (TARGET_DEBUG_ADDR)
17421 fprintf (stderr, "\nrs6000_secondary_reload_gpr, type = %s\n",
17422 store_p ? "store" : "load");
17423 fprintf (stderr, "reg:\n");
17424 debug_rtx (reg);
17425 fprintf (stderr, "mem:\n");
17426 debug_rtx (mem);
17427 fprintf (stderr, "scratch:\n");
17428 debug_rtx (scratch);
17431 gcc_assert (regno >= 0 && regno < FIRST_PSEUDO_REGISTER);
17432 gcc_assert (GET_CODE (mem) == MEM);
17433 rclass = REGNO_REG_CLASS (regno);
17434 gcc_assert (rclass == GENERAL_REGS || rclass == BASE_REGS);
17435 addr = XEXP (mem, 0);
17437 if (GET_CODE (addr) == PRE_MODIFY)
17439 scratch_or_premodify = XEXP (addr, 0);
17440 gcc_assert (REG_P (scratch_or_premodify));
17441 addr = XEXP (addr, 1);
17443 gcc_assert (GET_CODE (addr) == PLUS || GET_CODE (addr) == LO_SUM);
17445 rs6000_emit_move (scratch_or_premodify, addr, Pmode);
17447 mem = replace_equiv_address_nv (mem, scratch_or_premodify);
17449 /* Now create the move. */
17450 if (store_p)
17451 emit_insn (gen_rtx_SET (VOIDmode, mem, reg));
17452 else
17453 emit_insn (gen_rtx_SET (VOIDmode, reg, mem));
17455 return;
17458 /* Allocate a 64-bit stack slot to be used for copying SDmode values through if
17459 this function has any SDmode references. If we are on a power7 or later, we
17460 don't need the 64-bit stack slot since the LFIWZX and STIFWX instructions
17461 can load/store the value. */
17463 static void
17464 rs6000_alloc_sdmode_stack_slot (void)
17466 tree t;
17467 basic_block bb;
17468 gimple_stmt_iterator gsi;
17470 gcc_assert (cfun->machine->sdmode_stack_slot == NULL_RTX);
17471 /* We use a different approach for dealing with the secondary
17472 memory in LRA. */
17473 if (ira_use_lra_p)
17474 return;
17476 if (TARGET_NO_SDMODE_STACK)
17477 return;
17479 FOR_EACH_BB_FN (bb, cfun)
17480 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
17482 tree ret = walk_gimple_op (gsi_stmt (gsi), rs6000_check_sdmode, NULL);
17483 if (ret)
17485 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17486 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17487 SDmode, 0);
17488 return;
17492 /* Check for any SDmode parameters of the function. */
17493 for (t = DECL_ARGUMENTS (cfun->decl); t; t = DECL_CHAIN (t))
17495 if (TREE_TYPE (t) == error_mark_node)
17496 continue;
17498 if (TYPE_MODE (TREE_TYPE (t)) == SDmode
17499 || TYPE_MODE (DECL_ARG_TYPE (t)) == SDmode)
17501 rtx stack = assign_stack_local (DDmode, GET_MODE_SIZE (DDmode), 0);
17502 cfun->machine->sdmode_stack_slot = adjust_address_nv (stack,
17503 SDmode, 0);
17504 return;
17509 static void
17510 rs6000_instantiate_decls (void)
17512 if (cfun->machine->sdmode_stack_slot != NULL_RTX)
17513 instantiate_decl_rtl (cfun->machine->sdmode_stack_slot);
17516 /* Given an rtx X being reloaded into a reg required to be
17517 in class CLASS, return the class of reg to actually use.
17518 In general this is just CLASS; but on some machines
17519 in some cases it is preferable to use a more restrictive class.
17521 On the RS/6000, we have to return NO_REGS when we want to reload a
17522 floating-point CONST_DOUBLE to force it to be copied to memory.
17524 We also don't want to reload integer values into floating-point
17525 registers if we can at all help it. In fact, this can
17526 cause reload to die, if it tries to generate a reload of CTR
17527 into a FP register and discovers it doesn't have the memory location
17528 required.
17530 ??? Would it be a good idea to have reload do the converse, that is
17531 try to reload floating modes into FP registers if possible?
17534 static enum reg_class
17535 rs6000_preferred_reload_class (rtx x, enum reg_class rclass)
17537 machine_mode mode = GET_MODE (x);
17538 bool is_constant = CONSTANT_P (x);
17540 /* Do VSX tests before handling traditional floaitng point registers. */
17541 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17543 if (is_constant)
17545 /* Zero is always allowed in all VSX registers. */
17546 if (x == CONST0_RTX (mode))
17547 return rclass;
17549 /* If this is a vector constant that can be formed with a few Altivec
17550 instructions, we want altivec registers. */
17551 if (GET_CODE (x) == CONST_VECTOR && easy_vector_constant (x, mode))
17552 return ALTIVEC_REGS;
17554 /* Force constant to memory. */
17555 return NO_REGS;
17558 /* If this is a scalar floating point value, prefer the traditional
17559 floating point registers so that we can use D-form (register+offset)
17560 addressing. */
17561 if (GET_MODE_SIZE (mode) < 16)
17562 return FLOAT_REGS;
17564 /* Prefer the Altivec registers if Altivec is handling the vector
17565 operations (i.e. V16QI, V8HI, and V4SI), or if we prefer Altivec
17566 loads. */
17567 if (VECTOR_UNIT_ALTIVEC_P (mode) || VECTOR_MEM_ALTIVEC_P (mode)
17568 || mode == V1TImode)
17569 return ALTIVEC_REGS;
17571 return rclass;
17574 if (is_constant || GET_CODE (x) == PLUS)
17576 if (reg_class_subset_p (GENERAL_REGS, rclass))
17577 return GENERAL_REGS;
17578 if (reg_class_subset_p (BASE_REGS, rclass))
17579 return BASE_REGS;
17580 return NO_REGS;
17583 if (GET_MODE_CLASS (mode) == MODE_INT && rclass == NON_SPECIAL_REGS)
17584 return GENERAL_REGS;
17586 return rclass;
17589 /* Debug version of rs6000_preferred_reload_class. */
17590 static enum reg_class
17591 rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
17593 enum reg_class ret = rs6000_preferred_reload_class (x, rclass);
17595 fprintf (stderr,
17596 "\nrs6000_preferred_reload_class, return %s, rclass = %s, "
17597 "mode = %s, x:\n",
17598 reg_class_names[ret], reg_class_names[rclass],
17599 GET_MODE_NAME (GET_MODE (x)));
17600 debug_rtx (x);
17602 return ret;
17605 /* If we are copying between FP or AltiVec registers and anything else, we need
17606 a memory location. The exception is when we are targeting ppc64 and the
17607 move to/from fpr to gpr instructions are available. Also, under VSX, you
17608 can copy vector registers from the FP register set to the Altivec register
17609 set and vice versa. */
17611 static bool
17612 rs6000_secondary_memory_needed (enum reg_class from_class,
17613 enum reg_class to_class,
17614 machine_mode mode)
17616 enum rs6000_reg_type from_type, to_type;
17617 bool altivec_p = ((from_class == ALTIVEC_REGS)
17618 || (to_class == ALTIVEC_REGS));
17620 /* If a simple/direct move is available, we don't need secondary memory */
17621 from_type = reg_class_to_reg_type[(int)from_class];
17622 to_type = reg_class_to_reg_type[(int)to_class];
17624 if (rs6000_secondary_reload_move (to_type, from_type, mode,
17625 (secondary_reload_info *)0, altivec_p))
17626 return false;
17628 /* If we have a floating point or vector register class, we need to use
17629 memory to transfer the data. */
17630 if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
17631 return true;
17633 return false;
17636 /* Debug version of rs6000_secondary_memory_needed. */
17637 static bool
17638 rs6000_debug_secondary_memory_needed (enum reg_class from_class,
17639 enum reg_class to_class,
17640 machine_mode mode)
17642 bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
17644 fprintf (stderr,
17645 "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
17646 "to_class = %s, mode = %s\n",
17647 ret ? "true" : "false",
17648 reg_class_names[from_class],
17649 reg_class_names[to_class],
17650 GET_MODE_NAME (mode));
17652 return ret;
17655 /* Return the register class of a scratch register needed to copy IN into
17656 or out of a register in RCLASS in MODE. If it can be done directly,
17657 NO_REGS is returned. */
17659 static enum reg_class
17660 rs6000_secondary_reload_class (enum reg_class rclass, machine_mode mode,
17661 rtx in)
17663 int regno;
17665 if (TARGET_ELF || (DEFAULT_ABI == ABI_DARWIN
17666 #if TARGET_MACHO
17667 && MACHOPIC_INDIRECT
17668 #endif
17671 /* We cannot copy a symbolic operand directly into anything
17672 other than BASE_REGS for TARGET_ELF. So indicate that a
17673 register from BASE_REGS is needed as an intermediate
17674 register.
17676 On Darwin, pic addresses require a load from memory, which
17677 needs a base register. */
17678 if (rclass != BASE_REGS
17679 && (GET_CODE (in) == SYMBOL_REF
17680 || GET_CODE (in) == HIGH
17681 || GET_CODE (in) == LABEL_REF
17682 || GET_CODE (in) == CONST))
17683 return BASE_REGS;
17686 if (GET_CODE (in) == REG)
17688 regno = REGNO (in);
17689 if (regno >= FIRST_PSEUDO_REGISTER)
17691 regno = true_regnum (in);
17692 if (regno >= FIRST_PSEUDO_REGISTER)
17693 regno = -1;
17696 else if (GET_CODE (in) == SUBREG)
17698 regno = true_regnum (in);
17699 if (regno >= FIRST_PSEUDO_REGISTER)
17700 regno = -1;
17702 else
17703 regno = -1;
17705 /* If we have VSX register moves, prefer moving scalar values between
17706 Altivec registers and GPR by going via an FPR (and then via memory)
17707 instead of reloading the secondary memory address for Altivec moves. */
17708 if (TARGET_VSX
17709 && GET_MODE_SIZE (mode) < 16
17710 && (((rclass == GENERAL_REGS || rclass == BASE_REGS)
17711 && (regno >= 0 && ALTIVEC_REGNO_P (regno)))
17712 || ((rclass == VSX_REGS || rclass == ALTIVEC_REGS)
17713 && (regno >= 0 && INT_REGNO_P (regno)))))
17714 return FLOAT_REGS;
17716 /* We can place anything into GENERAL_REGS and can put GENERAL_REGS
17717 into anything. */
17718 if (rclass == GENERAL_REGS || rclass == BASE_REGS
17719 || (regno >= 0 && INT_REGNO_P (regno)))
17720 return NO_REGS;
17722 /* Constants, memory, and VSX registers can go into VSX registers (both the
17723 traditional floating point and the altivec registers). */
17724 if (rclass == VSX_REGS
17725 && (regno == -1 || VSX_REGNO_P (regno)))
17726 return NO_REGS;
17728 /* Constants, memory, and FP registers can go into FP registers. */
17729 if ((regno == -1 || FP_REGNO_P (regno))
17730 && (rclass == FLOAT_REGS || rclass == NON_SPECIAL_REGS))
17731 return (mode != SDmode || lra_in_progress) ? NO_REGS : GENERAL_REGS;
17733 /* Memory, and AltiVec registers can go into AltiVec registers. */
17734 if ((regno == -1 || ALTIVEC_REGNO_P (regno))
17735 && rclass == ALTIVEC_REGS)
17736 return NO_REGS;
17738 /* We can copy among the CR registers. */
17739 if ((rclass == CR_REGS || rclass == CR0_REGS)
17740 && regno >= 0 && CR_REGNO_P (regno))
17741 return NO_REGS;
17743 /* Otherwise, we need GENERAL_REGS. */
17744 return GENERAL_REGS;
17747 /* Debug version of rs6000_secondary_reload_class. */
17748 static enum reg_class
17749 rs6000_debug_secondary_reload_class (enum reg_class rclass,
17750 machine_mode mode, rtx in)
17752 enum reg_class ret = rs6000_secondary_reload_class (rclass, mode, in);
17753 fprintf (stderr,
17754 "\nrs6000_secondary_reload_class, return %s, rclass = %s, "
17755 "mode = %s, input rtx:\n",
17756 reg_class_names[ret], reg_class_names[rclass],
17757 GET_MODE_NAME (mode));
17758 debug_rtx (in);
17760 return ret;
17763 /* Return nonzero if for CLASS a mode change from FROM to TO is invalid. */
17765 static bool
17766 rs6000_cannot_change_mode_class (machine_mode from,
17767 machine_mode to,
17768 enum reg_class rclass)
17770 unsigned from_size = GET_MODE_SIZE (from);
17771 unsigned to_size = GET_MODE_SIZE (to);
17773 if (from_size != to_size)
17775 enum reg_class xclass = (TARGET_VSX) ? VSX_REGS : FLOAT_REGS;
17777 if (reg_classes_intersect_p (xclass, rclass))
17779 unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
17780 unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
17782 /* Don't allow 64-bit types to overlap with 128-bit types that take a
17783 single register under VSX because the scalar part of the register
17784 is in the upper 64-bits, and not the lower 64-bits. Types like
17785 TFmode/TDmode that take 2 scalar register can overlap. 128-bit
17786 IEEE floating point can't overlap, and neither can small
17787 values. */
17789 if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
17790 return true;
17792 /* TDmode in floating-mode registers must always go into a register
17793 pair with the most significant word in the even-numbered register
17794 to match ISA requirements. In little-endian mode, this does not
17795 match subreg numbering, so we cannot allow subregs. */
17796 if (!BYTES_BIG_ENDIAN && (to == TDmode || from == TDmode))
17797 return true;
17799 if (from_size < 8 || to_size < 8)
17800 return true;
17802 if (from_size == 8 && (8 * to_nregs) != to_size)
17803 return true;
17805 if (to_size == 8 && (8 * from_nregs) != from_size)
17806 return true;
17808 return false;
17810 else
17811 return false;
17814 if (TARGET_E500_DOUBLE
17815 && ((((to) == DFmode) + ((from) == DFmode)) == 1
17816 || (((to) == TFmode) + ((from) == TFmode)) == 1
17817 || (((to) == DDmode) + ((from) == DDmode)) == 1
17818 || (((to) == TDmode) + ((from) == TDmode)) == 1
17819 || (((to) == DImode) + ((from) == DImode)) == 1))
17820 return true;
17822 /* Since the VSX register set includes traditional floating point registers
17823 and altivec registers, just check for the size being different instead of
17824 trying to check whether the modes are vector modes. Otherwise it won't
17825 allow say DF and DI to change classes. For types like TFmode and TDmode
17826 that take 2 64-bit registers, rather than a single 128-bit register, don't
17827 allow subregs of those types to other 128 bit types. */
17828 if (TARGET_VSX && VSX_REG_CLASS_P (rclass))
17830 unsigned num_regs = (from_size + 15) / 16;
17831 if (hard_regno_nregs[FIRST_FPR_REGNO][to] > num_regs
17832 || hard_regno_nregs[FIRST_FPR_REGNO][from] > num_regs)
17833 return true;
17835 return (from_size != 8 && from_size != 16);
17838 if (TARGET_ALTIVEC && rclass == ALTIVEC_REGS
17839 && (ALTIVEC_VECTOR_MODE (from) + ALTIVEC_VECTOR_MODE (to)) == 1)
17840 return true;
17842 if (TARGET_SPE && (SPE_VECTOR_MODE (from) + SPE_VECTOR_MODE (to)) == 1
17843 && reg_classes_intersect_p (GENERAL_REGS, rclass))
17844 return true;
17846 return false;
17849 /* Debug version of rs6000_cannot_change_mode_class. */
17850 static bool
17851 rs6000_debug_cannot_change_mode_class (machine_mode from,
17852 machine_mode to,
17853 enum reg_class rclass)
17855 bool ret = rs6000_cannot_change_mode_class (from, to, rclass);
17857 fprintf (stderr,
17858 "rs6000_cannot_change_mode_class, return %s, from = %s, "
17859 "to = %s, rclass = %s\n",
17860 ret ? "true" : "false",
17861 GET_MODE_NAME (from), GET_MODE_NAME (to),
17862 reg_class_names[rclass]);
17864 return ret;
17867 /* Return a string to do a move operation of 128 bits of data. */
17869 const char *
17870 rs6000_output_move_128bit (rtx operands[])
17872 rtx dest = operands[0];
17873 rtx src = operands[1];
17874 machine_mode mode = GET_MODE (dest);
17875 int dest_regno;
17876 int src_regno;
17877 bool dest_gpr_p, dest_fp_p, dest_vmx_p, dest_vsx_p;
17878 bool src_gpr_p, src_fp_p, src_vmx_p, src_vsx_p;
17880 if (REG_P (dest))
17882 dest_regno = REGNO (dest);
17883 dest_gpr_p = INT_REGNO_P (dest_regno);
17884 dest_fp_p = FP_REGNO_P (dest_regno);
17885 dest_vmx_p = ALTIVEC_REGNO_P (dest_regno);
17886 dest_vsx_p = dest_fp_p | dest_vmx_p;
17888 else
17890 dest_regno = -1;
17891 dest_gpr_p = dest_fp_p = dest_vmx_p = dest_vsx_p = false;
17894 if (REG_P (src))
17896 src_regno = REGNO (src);
17897 src_gpr_p = INT_REGNO_P (src_regno);
17898 src_fp_p = FP_REGNO_P (src_regno);
17899 src_vmx_p = ALTIVEC_REGNO_P (src_regno);
17900 src_vsx_p = src_fp_p | src_vmx_p;
17902 else
17904 src_regno = -1;
17905 src_gpr_p = src_fp_p = src_vmx_p = src_vsx_p = false;
17908 /* Register moves. */
17909 if (dest_regno >= 0 && src_regno >= 0)
17911 if (dest_gpr_p)
17913 if (src_gpr_p)
17914 return "#";
17916 else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
17917 return "#";
17920 else if (TARGET_VSX && dest_vsx_p)
17922 if (src_vsx_p)
17923 return "xxlor %x0,%x1,%x1";
17925 else if (TARGET_DIRECT_MOVE && src_gpr_p)
17926 return "#";
17929 else if (TARGET_ALTIVEC && dest_vmx_p && src_vmx_p)
17930 return "vor %0,%1,%1";
17932 else if (dest_fp_p && src_fp_p)
17933 return "#";
17936 /* Loads. */
17937 else if (dest_regno >= 0 && MEM_P (src))
17939 if (dest_gpr_p)
17941 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17942 return "lq %0,%1";
17943 else
17944 return "#";
17947 else if (TARGET_ALTIVEC && dest_vmx_p
17948 && altivec_indexed_or_indirect_operand (src, mode))
17949 return "lvx %0,%y1";
17951 else if (TARGET_VSX && dest_vsx_p)
17953 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17954 return "lxvw4x %x0,%y1";
17955 else
17956 return "lxvd2x %x0,%y1";
17959 else if (TARGET_ALTIVEC && dest_vmx_p)
17960 return "lvx %0,%y1";
17962 else if (dest_fp_p)
17963 return "#";
17966 /* Stores. */
17967 else if (src_regno >= 0 && MEM_P (dest))
17969 if (src_gpr_p)
17971 if (TARGET_QUAD_MEMORY && quad_load_store_p (dest, src))
17972 return "stq %1,%0";
17973 else
17974 return "#";
17977 else if (TARGET_ALTIVEC && src_vmx_p
17978 && altivec_indexed_or_indirect_operand (src, mode))
17979 return "stvx %1,%y0";
17981 else if (TARGET_VSX && src_vsx_p)
17983 if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
17984 return "stxvw4x %x1,%y0";
17985 else
17986 return "stxvd2x %x1,%y0";
17989 else if (TARGET_ALTIVEC && src_vmx_p)
17990 return "stvx %1,%y0";
17992 else if (src_fp_p)
17993 return "#";
17996 /* Constants. */
17997 else if (dest_regno >= 0
17998 && (GET_CODE (src) == CONST_INT
17999 || GET_CODE (src) == CONST_WIDE_INT
18000 || GET_CODE (src) == CONST_DOUBLE
18001 || GET_CODE (src) == CONST_VECTOR))
18003 if (dest_gpr_p)
18004 return "#";
18006 else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
18007 return "xxlxor %x0,%x0,%x0";
18009 else if (TARGET_ALTIVEC && dest_vmx_p)
18010 return output_vec_const_move (operands);
18013 if (TARGET_DEBUG_ADDR)
18015 fprintf (stderr, "\n===== Bad 128 bit move:\n");
18016 debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
18019 gcc_unreachable ();
18022 /* Validate a 128-bit move. */
18023 bool
18024 rs6000_move_128bit_ok_p (rtx operands[])
18026 machine_mode mode = GET_MODE (operands[0]);
18027 return (gpc_reg_operand (operands[0], mode)
18028 || gpc_reg_operand (operands[1], mode));
18031 /* Return true if a 128-bit move needs to be split. */
18032 bool
18033 rs6000_split_128bit_ok_p (rtx operands[])
18035 if (!reload_completed)
18036 return false;
18038 if (!gpr_or_gpr_p (operands[0], operands[1]))
18039 return false;
18041 if (quad_load_store_p (operands[0], operands[1]))
18042 return false;
18044 return true;
18048 /* Given a comparison operation, return the bit number in CCR to test. We
18049 know this is a valid comparison.
18051 SCC_P is 1 if this is for an scc. That means that %D will have been
18052 used instead of %C, so the bits will be in different places.
18054 Return -1 if OP isn't a valid comparison for some reason. */
18057 ccr_bit (rtx op, int scc_p)
18059 enum rtx_code code = GET_CODE (op);
18060 machine_mode cc_mode;
18061 int cc_regnum;
18062 int base_bit;
18063 rtx reg;
18065 if (!COMPARISON_P (op))
18066 return -1;
18068 reg = XEXP (op, 0);
18070 gcc_assert (GET_CODE (reg) == REG && CR_REGNO_P (REGNO (reg)));
18072 cc_mode = GET_MODE (reg);
18073 cc_regnum = REGNO (reg);
18074 base_bit = 4 * (cc_regnum - CR0_REGNO);
18076 validate_condition_mode (code, cc_mode);
18078 /* When generating a sCOND operation, only positive conditions are
18079 allowed. */
18080 gcc_assert (!scc_p
18081 || code == EQ || code == GT || code == LT || code == UNORDERED
18082 || code == GTU || code == LTU);
18084 switch (code)
18086 case NE:
18087 return scc_p ? base_bit + 3 : base_bit + 2;
18088 case EQ:
18089 return base_bit + 2;
18090 case GT: case GTU: case UNLE:
18091 return base_bit + 1;
18092 case LT: case LTU: case UNGE:
18093 return base_bit;
18094 case ORDERED: case UNORDERED:
18095 return base_bit + 3;
18097 case GE: case GEU:
18098 /* If scc, we will have done a cror to put the bit in the
18099 unordered position. So test that bit. For integer, this is ! LT
18100 unless this is an scc insn. */
18101 return scc_p ? base_bit + 3 : base_bit;
18103 case LE: case LEU:
18104 return scc_p ? base_bit + 3 : base_bit + 1;
18106 default:
18107 gcc_unreachable ();
18111 /* Return the GOT register. */
18114 rs6000_got_register (rtx value ATTRIBUTE_UNUSED)
18116 /* The second flow pass currently (June 1999) can't update
18117 regs_ever_live without disturbing other parts of the compiler, so
18118 update it here to make the prolog/epilogue code happy. */
18119 if (!can_create_pseudo_p ()
18120 && !df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
18121 df_set_regs_ever_live (RS6000_PIC_OFFSET_TABLE_REGNUM, true);
18123 crtl->uses_pic_offset_table = 1;
18125 return pic_offset_table_rtx;
18128 static rs6000_stack_t stack_info;
18130 /* Function to init struct machine_function.
18131 This will be called, via a pointer variable,
18132 from push_function_context. */
18134 static struct machine_function *
18135 rs6000_init_machine_status (void)
18137 stack_info.reload_completed = 0;
18138 return ggc_cleared_alloc<machine_function> ();
18141 #define INT_P(X) (GET_CODE (X) == CONST_INT && GET_MODE (X) == VOIDmode)
18144 extract_MB (rtx op)
18146 int i;
18147 unsigned long val = INTVAL (op);
18149 /* If the high bit is zero, the value is the first 1 bit we find
18150 from the left. */
18151 if ((val & 0x80000000) == 0)
18153 gcc_assert (val & 0xffffffff);
18155 i = 1;
18156 while (((val <<= 1) & 0x80000000) == 0)
18157 ++i;
18158 return i;
18161 /* If the high bit is set and the low bit is not, or the mask is all
18162 1's, the value is zero. */
18163 if ((val & 1) == 0 || (val & 0xffffffff) == 0xffffffff)
18164 return 0;
18166 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18167 from the right. */
18168 i = 31;
18169 while (((val >>= 1) & 1) != 0)
18170 --i;
18172 return i;
18176 extract_ME (rtx op)
18178 int i;
18179 unsigned long val = INTVAL (op);
18181 /* If the low bit is zero, the value is the first 1 bit we find from
18182 the right. */
18183 if ((val & 1) == 0)
18185 gcc_assert (val & 0xffffffff);
18187 i = 30;
18188 while (((val >>= 1) & 1) == 0)
18189 --i;
18191 return i;
18194 /* If the low bit is set and the high bit is not, or the mask is all
18195 1's, the value is 31. */
18196 if ((val & 0x80000000) == 0 || (val & 0xffffffff) == 0xffffffff)
18197 return 31;
18199 /* Otherwise we have a wrap-around mask. Look for the first 0 bit
18200 from the left. */
18201 i = 0;
18202 while (((val <<= 1) & 0x80000000) != 0)
18203 ++i;
18205 return i;
18208 /* Write out a function code label. */
18210 void
18211 rs6000_output_function_entry (FILE *file, const char *fname)
18213 if (fname[0] != '.')
18215 switch (DEFAULT_ABI)
18217 default:
18218 gcc_unreachable ();
18220 case ABI_AIX:
18221 if (DOT_SYMBOLS)
18222 putc ('.', file);
18223 else
18224 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "L.");
18225 break;
18227 case ABI_ELFv2:
18228 case ABI_V4:
18229 case ABI_DARWIN:
18230 break;
18234 RS6000_OUTPUT_BASENAME (file, fname);
18237 /* Print an operand. Recognize special options, documented below. */
18239 #if TARGET_ELF
18240 #define SMALL_DATA_RELOC ((rs6000_sdata == SDATA_EABI) ? "sda21" : "sdarel")
18241 #define SMALL_DATA_REG ((rs6000_sdata == SDATA_EABI) ? 0 : 13)
18242 #else
18243 #define SMALL_DATA_RELOC "sda21"
18244 #define SMALL_DATA_REG 0
18245 #endif
18247 void
18248 print_operand (FILE *file, rtx x, int code)
18250 int i;
18251 unsigned HOST_WIDE_INT uval;
18253 switch (code)
18255 /* %a is output_address. */
18257 case 'b':
18258 /* If constant, low-order 16 bits of constant, unsigned.
18259 Otherwise, write normally. */
18260 if (INT_P (x))
18261 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
18262 else
18263 print_operand (file, x, 0);
18264 return;
18266 case 'B':
18267 /* If the low-order bit is zero, write 'r'; otherwise, write 'l'
18268 for 64-bit mask direction. */
18269 putc (((INTVAL (x) & 1) == 0 ? 'r' : 'l'), file);
18270 return;
18272 /* %c is output_addr_const if a CONSTANT_ADDRESS_P, otherwise
18273 output_operand. */
18275 case 'D':
18276 /* Like 'J' but get to the GT bit only. */
18277 gcc_assert (REG_P (x));
18279 /* Bit 1 is GT bit. */
18280 i = 4 * (REGNO (x) - CR0_REGNO) + 1;
18282 /* Add one for shift count in rlinm for scc. */
18283 fprintf (file, "%d", i + 1);
18284 return;
18286 case 'e':
18287 /* If the low 16 bits are 0, but some other bit is set, write 's'. */
18288 if (! INT_P (x))
18290 output_operand_lossage ("invalid %%e value");
18291 return;
18294 uval = INTVAL (x);
18295 if ((uval & 0xffff) == 0 && uval != 0)
18296 putc ('s', file);
18297 return;
18299 case 'E':
18300 /* X is a CR register. Print the number of the EQ bit of the CR */
18301 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18302 output_operand_lossage ("invalid %%E value");
18303 else
18304 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO) + 2);
18305 return;
18307 case 'f':
18308 /* X is a CR register. Print the shift count needed to move it
18309 to the high-order four bits. */
18310 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18311 output_operand_lossage ("invalid %%f value");
18312 else
18313 fprintf (file, "%d", 4 * (REGNO (x) - CR0_REGNO));
18314 return;
18316 case 'F':
18317 /* Similar, but print the count for the rotate in the opposite
18318 direction. */
18319 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18320 output_operand_lossage ("invalid %%F value");
18321 else
18322 fprintf (file, "%d", 32 - 4 * (REGNO (x) - CR0_REGNO));
18323 return;
18325 case 'G':
18326 /* X is a constant integer. If it is negative, print "m",
18327 otherwise print "z". This is to make an aze or ame insn. */
18328 if (GET_CODE (x) != CONST_INT)
18329 output_operand_lossage ("invalid %%G value");
18330 else if (INTVAL (x) >= 0)
18331 putc ('z', file);
18332 else
18333 putc ('m', file);
18334 return;
18336 case 'h':
18337 /* If constant, output low-order five bits. Otherwise, write
18338 normally. */
18339 if (INT_P (x))
18340 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 31);
18341 else
18342 print_operand (file, x, 0);
18343 return;
18345 case 'H':
18346 /* If constant, output low-order six bits. Otherwise, write
18347 normally. */
18348 if (INT_P (x))
18349 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 63);
18350 else
18351 print_operand (file, x, 0);
18352 return;
18354 case 'I':
18355 /* Print `i' if this is a constant, else nothing. */
18356 if (INT_P (x))
18357 putc ('i', file);
18358 return;
18360 case 'j':
18361 /* Write the bit number in CCR for jump. */
18362 i = ccr_bit (x, 0);
18363 if (i == -1)
18364 output_operand_lossage ("invalid %%j code");
18365 else
18366 fprintf (file, "%d", i);
18367 return;
18369 case 'J':
18370 /* Similar, but add one for shift count in rlinm for scc and pass
18371 scc flag to `ccr_bit'. */
18372 i = ccr_bit (x, 1);
18373 if (i == -1)
18374 output_operand_lossage ("invalid %%J code");
18375 else
18376 /* If we want bit 31, write a shift count of zero, not 32. */
18377 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18378 return;
18380 case 'k':
18381 /* X must be a constant. Write the 1's complement of the
18382 constant. */
18383 if (! INT_P (x))
18384 output_operand_lossage ("invalid %%k value");
18385 else
18386 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ~ INTVAL (x));
18387 return;
18389 case 'K':
18390 /* X must be a symbolic constant on ELF. Write an
18391 expression suitable for an 'addi' that adds in the low 16
18392 bits of the MEM. */
18393 if (GET_CODE (x) == CONST)
18395 if (GET_CODE (XEXP (x, 0)) != PLUS
18396 || (GET_CODE (XEXP (XEXP (x, 0), 0)) != SYMBOL_REF
18397 && GET_CODE (XEXP (XEXP (x, 0), 0)) != LABEL_REF)
18398 || GET_CODE (XEXP (XEXP (x, 0), 1)) != CONST_INT)
18399 output_operand_lossage ("invalid %%K value");
18401 print_operand_address (file, x);
18402 fputs ("@l", file);
18403 return;
18405 /* %l is output_asm_label. */
18407 case 'L':
18408 /* Write second word of DImode or DFmode reference. Works on register
18409 or non-indexed memory only. */
18410 if (REG_P (x))
18411 fputs (reg_names[REGNO (x) + 1], file);
18412 else if (MEM_P (x))
18414 /* Handle possible auto-increment. Since it is pre-increment and
18415 we have already done it, we can just use an offset of word. */
18416 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18417 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18418 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18419 UNITS_PER_WORD));
18420 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18421 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0),
18422 UNITS_PER_WORD));
18423 else
18424 output_address (XEXP (adjust_address_nv (x, SImode,
18425 UNITS_PER_WORD),
18426 0));
18428 if (small_data_operand (x, GET_MODE (x)))
18429 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18430 reg_names[SMALL_DATA_REG]);
18432 return;
18434 case 'm':
18435 /* MB value for a mask operand. */
18436 if (! mask_operand (x, SImode))
18437 output_operand_lossage ("invalid %%m value");
18439 fprintf (file, "%d", extract_MB (x));
18440 return;
18442 case 'M':
18443 /* ME value for a mask operand. */
18444 if (! mask_operand (x, SImode))
18445 output_operand_lossage ("invalid %%M value");
18447 fprintf (file, "%d", extract_ME (x));
18448 return;
18450 /* %n outputs the negative of its operand. */
18452 case 'N':
18453 /* Write the number of elements in the vector times 4. */
18454 if (GET_CODE (x) != PARALLEL)
18455 output_operand_lossage ("invalid %%N value");
18456 else
18457 fprintf (file, "%d", XVECLEN (x, 0) * 4);
18458 return;
18460 case 'O':
18461 /* Similar, but subtract 1 first. */
18462 if (GET_CODE (x) != PARALLEL)
18463 output_operand_lossage ("invalid %%O value");
18464 else
18465 fprintf (file, "%d", (XVECLEN (x, 0) - 1) * 4);
18466 return;
18468 case 'p':
18469 /* X is a CONST_INT that is a power of two. Output the logarithm. */
18470 if (! INT_P (x)
18471 || INTVAL (x) < 0
18472 || (i = exact_log2 (INTVAL (x))) < 0)
18473 output_operand_lossage ("invalid %%p value");
18474 else
18475 fprintf (file, "%d", i);
18476 return;
18478 case 'P':
18479 /* The operand must be an indirect memory reference. The result
18480 is the register name. */
18481 if (GET_CODE (x) != MEM || GET_CODE (XEXP (x, 0)) != REG
18482 || REGNO (XEXP (x, 0)) >= 32)
18483 output_operand_lossage ("invalid %%P value");
18484 else
18485 fputs (reg_names[REGNO (XEXP (x, 0))], file);
18486 return;
18488 case 'q':
18489 /* This outputs the logical code corresponding to a boolean
18490 expression. The expression may have one or both operands
18491 negated (if one, only the first one). For condition register
18492 logical operations, it will also treat the negated
18493 CR codes as NOTs, but not handle NOTs of them. */
18495 const char *const *t = 0;
18496 const char *s;
18497 enum rtx_code code = GET_CODE (x);
18498 static const char * const tbl[3][3] = {
18499 { "and", "andc", "nor" },
18500 { "or", "orc", "nand" },
18501 { "xor", "eqv", "xor" } };
18503 if (code == AND)
18504 t = tbl[0];
18505 else if (code == IOR)
18506 t = tbl[1];
18507 else if (code == XOR)
18508 t = tbl[2];
18509 else
18510 output_operand_lossage ("invalid %%q value");
18512 if (GET_CODE (XEXP (x, 0)) != NOT)
18513 s = t[0];
18514 else
18516 if (GET_CODE (XEXP (x, 1)) == NOT)
18517 s = t[2];
18518 else
18519 s = t[1];
18522 fputs (s, file);
18524 return;
18526 case 'Q':
18527 if (! TARGET_MFCRF)
18528 return;
18529 fputc (',', file);
18530 /* FALLTHRU */
18532 case 'R':
18533 /* X is a CR register. Print the mask for `mtcrf'. */
18534 if (GET_CODE (x) != REG || ! CR_REGNO_P (REGNO (x)))
18535 output_operand_lossage ("invalid %%R value");
18536 else
18537 fprintf (file, "%d", 128 >> (REGNO (x) - CR0_REGNO));
18538 return;
18540 case 's':
18541 /* Low 5 bits of 32 - value */
18542 if (! INT_P (x))
18543 output_operand_lossage ("invalid %%s value");
18544 else
18545 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (32 - INTVAL (x)) & 31);
18546 return;
18548 case 'S':
18549 /* PowerPC64 mask position. All 0's is excluded.
18550 CONST_INT 32-bit mask is considered sign-extended so any
18551 transition must occur within the CONST_INT, not on the boundary. */
18552 if (! mask64_operand (x, DImode))
18553 output_operand_lossage ("invalid %%S value");
18555 uval = INTVAL (x);
18557 if (uval & 1) /* Clear Left */
18559 #if HOST_BITS_PER_WIDE_INT > 64
18560 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18561 #endif
18562 i = 64;
18564 else /* Clear Right */
18566 uval = ~uval;
18567 #if HOST_BITS_PER_WIDE_INT > 64
18568 uval &= ((unsigned HOST_WIDE_INT) 1 << 64) - 1;
18569 #endif
18570 i = 63;
18572 while (uval != 0)
18573 --i, uval >>= 1;
18574 gcc_assert (i >= 0);
18575 fprintf (file, "%d", i);
18576 return;
18578 case 't':
18579 /* Like 'J' but get to the OVERFLOW/UNORDERED bit. */
18580 gcc_assert (REG_P (x) && GET_MODE (x) == CCmode);
18582 /* Bit 3 is OV bit. */
18583 i = 4 * (REGNO (x) - CR0_REGNO) + 3;
18585 /* If we want bit 31, write a shift count of zero, not 32. */
18586 fprintf (file, "%d", i == 31 ? 0 : i + 1);
18587 return;
18589 case 'T':
18590 /* Print the symbolic name of a branch target register. */
18591 if (GET_CODE (x) != REG || (REGNO (x) != LR_REGNO
18592 && REGNO (x) != CTR_REGNO))
18593 output_operand_lossage ("invalid %%T value");
18594 else if (REGNO (x) == LR_REGNO)
18595 fputs ("lr", file);
18596 else
18597 fputs ("ctr", file);
18598 return;
18600 case 'u':
18601 /* High-order or low-order 16 bits of constant, whichever is non-zero,
18602 for use in unsigned operand. */
18603 if (! INT_P (x))
18605 output_operand_lossage ("invalid %%u value");
18606 return;
18609 uval = INTVAL (x);
18610 if ((uval & 0xffff) == 0)
18611 uval >>= 16;
18613 fprintf (file, HOST_WIDE_INT_PRINT_HEX, uval & 0xffff);
18614 return;
18616 case 'v':
18617 /* High-order 16 bits of constant for use in signed operand. */
18618 if (! INT_P (x))
18619 output_operand_lossage ("invalid %%v value");
18620 else
18621 fprintf (file, HOST_WIDE_INT_PRINT_HEX,
18622 (INTVAL (x) >> 16) & 0xffff);
18623 return;
18625 case 'U':
18626 /* Print `u' if this has an auto-increment or auto-decrement. */
18627 if (MEM_P (x)
18628 && (GET_CODE (XEXP (x, 0)) == PRE_INC
18629 || GET_CODE (XEXP (x, 0)) == PRE_DEC
18630 || GET_CODE (XEXP (x, 0)) == PRE_MODIFY))
18631 putc ('u', file);
18632 return;
18634 case 'V':
18635 /* Print the trap code for this operand. */
18636 switch (GET_CODE (x))
18638 case EQ:
18639 fputs ("eq", file); /* 4 */
18640 break;
18641 case NE:
18642 fputs ("ne", file); /* 24 */
18643 break;
18644 case LT:
18645 fputs ("lt", file); /* 16 */
18646 break;
18647 case LE:
18648 fputs ("le", file); /* 20 */
18649 break;
18650 case GT:
18651 fputs ("gt", file); /* 8 */
18652 break;
18653 case GE:
18654 fputs ("ge", file); /* 12 */
18655 break;
18656 case LTU:
18657 fputs ("llt", file); /* 2 */
18658 break;
18659 case LEU:
18660 fputs ("lle", file); /* 6 */
18661 break;
18662 case GTU:
18663 fputs ("lgt", file); /* 1 */
18664 break;
18665 case GEU:
18666 fputs ("lge", file); /* 5 */
18667 break;
18668 default:
18669 gcc_unreachable ();
18671 break;
18673 case 'w':
18674 /* If constant, low-order 16 bits of constant, signed. Otherwise, write
18675 normally. */
18676 if (INT_P (x))
18677 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
18678 ((INTVAL (x) & 0xffff) ^ 0x8000) - 0x8000);
18679 else
18680 print_operand (file, x, 0);
18681 return;
18683 case 'W':
18684 /* MB value for a PowerPC64 rldic operand. */
18685 i = clz_hwi (INTVAL (x));
18687 fprintf (file, "%d", i);
18688 return;
18690 case 'x':
18691 /* X is a FPR or Altivec register used in a VSX context. */
18692 if (GET_CODE (x) != REG || !VSX_REGNO_P (REGNO (x)))
18693 output_operand_lossage ("invalid %%x value");
18694 else
18696 int reg = REGNO (x);
18697 int vsx_reg = (FP_REGNO_P (reg)
18698 ? reg - 32
18699 : reg - FIRST_ALTIVEC_REGNO + 32);
18701 #ifdef TARGET_REGNAMES
18702 if (TARGET_REGNAMES)
18703 fprintf (file, "%%vs%d", vsx_reg);
18704 else
18705 #endif
18706 fprintf (file, "%d", vsx_reg);
18708 return;
18710 case 'X':
18711 if (MEM_P (x)
18712 && (legitimate_indexed_address_p (XEXP (x, 0), 0)
18713 || (GET_CODE (XEXP (x, 0)) == PRE_MODIFY
18714 && legitimate_indexed_address_p (XEXP (XEXP (x, 0), 1), 0))))
18715 putc ('x', file);
18716 return;
18718 case 'Y':
18719 /* Like 'L', for third word of TImode/PTImode */
18720 if (REG_P (x))
18721 fputs (reg_names[REGNO (x) + 2], file);
18722 else if (MEM_P (x))
18724 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18725 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18726 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18727 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18728 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 8));
18729 else
18730 output_address (XEXP (adjust_address_nv (x, SImode, 8), 0));
18731 if (small_data_operand (x, GET_MODE (x)))
18732 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18733 reg_names[SMALL_DATA_REG]);
18735 return;
18737 case 'z':
18738 /* X is a SYMBOL_REF. Write out the name preceded by a
18739 period and without any trailing data in brackets. Used for function
18740 names. If we are configured for System V (or the embedded ABI) on
18741 the PowerPC, do not emit the period, since those systems do not use
18742 TOCs and the like. */
18743 gcc_assert (GET_CODE (x) == SYMBOL_REF);
18745 /* For macho, check to see if we need a stub. */
18746 if (TARGET_MACHO)
18748 const char *name = XSTR (x, 0);
18749 #if TARGET_MACHO
18750 if (darwin_emit_branch_islands
18751 && MACHOPIC_INDIRECT
18752 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
18753 name = machopic_indirection_name (x, /*stub_p=*/true);
18754 #endif
18755 assemble_name (file, name);
18757 else if (!DOT_SYMBOLS)
18758 assemble_name (file, XSTR (x, 0));
18759 else
18760 rs6000_output_function_entry (file, XSTR (x, 0));
18761 return;
18763 case 'Z':
18764 /* Like 'L', for last word of TImode/PTImode. */
18765 if (REG_P (x))
18766 fputs (reg_names[REGNO (x) + 3], file);
18767 else if (MEM_P (x))
18769 if (GET_CODE (XEXP (x, 0)) == PRE_INC
18770 || GET_CODE (XEXP (x, 0)) == PRE_DEC)
18771 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18772 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18773 output_address (plus_constant (Pmode, XEXP (XEXP (x, 0), 0), 12));
18774 else
18775 output_address (XEXP (adjust_address_nv (x, SImode, 12), 0));
18776 if (small_data_operand (x, GET_MODE (x)))
18777 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18778 reg_names[SMALL_DATA_REG]);
18780 return;
18782 /* Print AltiVec or SPE memory operand. */
18783 case 'y':
18785 rtx tmp;
18787 gcc_assert (MEM_P (x));
18789 tmp = XEXP (x, 0);
18791 /* Ugly hack because %y is overloaded. */
18792 if ((TARGET_SPE || TARGET_E500_DOUBLE)
18793 && (GET_MODE_SIZE (GET_MODE (x)) == 8
18794 || GET_MODE (x) == TFmode
18795 || GET_MODE (x) == TImode
18796 || GET_MODE (x) == PTImode))
18798 /* Handle [reg]. */
18799 if (REG_P (tmp))
18801 fprintf (file, "0(%s)", reg_names[REGNO (tmp)]);
18802 break;
18804 /* Handle [reg+UIMM]. */
18805 else if (GET_CODE (tmp) == PLUS &&
18806 GET_CODE (XEXP (tmp, 1)) == CONST_INT)
18808 int x;
18810 gcc_assert (REG_P (XEXP (tmp, 0)));
18812 x = INTVAL (XEXP (tmp, 1));
18813 fprintf (file, "%d(%s)", x, reg_names[REGNO (XEXP (tmp, 0))]);
18814 break;
18817 /* Fall through. Must be [reg+reg]. */
18819 if (VECTOR_MEM_ALTIVEC_P (GET_MODE (x))
18820 && GET_CODE (tmp) == AND
18821 && GET_CODE (XEXP (tmp, 1)) == CONST_INT
18822 && INTVAL (XEXP (tmp, 1)) == -16)
18823 tmp = XEXP (tmp, 0);
18824 else if (VECTOR_MEM_VSX_P (GET_MODE (x))
18825 && GET_CODE (tmp) == PRE_MODIFY)
18826 tmp = XEXP (tmp, 1);
18827 if (REG_P (tmp))
18828 fprintf (file, "0,%s", reg_names[REGNO (tmp)]);
18829 else
18831 if (GET_CODE (tmp) != PLUS
18832 || !REG_P (XEXP (tmp, 0))
18833 || !REG_P (XEXP (tmp, 1)))
18835 output_operand_lossage ("invalid %%y value, try using the 'Z' constraint");
18836 break;
18839 if (REGNO (XEXP (tmp, 0)) == 0)
18840 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 1)) ],
18841 reg_names[ REGNO (XEXP (tmp, 0)) ]);
18842 else
18843 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (tmp, 0)) ],
18844 reg_names[ REGNO (XEXP (tmp, 1)) ]);
18846 break;
18849 case 0:
18850 if (REG_P (x))
18851 fprintf (file, "%s", reg_names[REGNO (x)]);
18852 else if (MEM_P (x))
18854 /* We need to handle PRE_INC and PRE_DEC here, since we need to
18855 know the width from the mode. */
18856 if (GET_CODE (XEXP (x, 0)) == PRE_INC)
18857 fprintf (file, "%d(%s)", GET_MODE_SIZE (GET_MODE (x)),
18858 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18859 else if (GET_CODE (XEXP (x, 0)) == PRE_DEC)
18860 fprintf (file, "%d(%s)", - GET_MODE_SIZE (GET_MODE (x)),
18861 reg_names[REGNO (XEXP (XEXP (x, 0), 0))]);
18862 else if (GET_CODE (XEXP (x, 0)) == PRE_MODIFY)
18863 output_address (XEXP (XEXP (x, 0), 1));
18864 else
18865 output_address (XEXP (x, 0));
18867 else
18869 if (toc_relative_expr_p (x, false))
18870 /* This hack along with a corresponding hack in
18871 rs6000_output_addr_const_extra arranges to output addends
18872 where the assembler expects to find them. eg.
18873 (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 4)
18874 without this hack would be output as "x@toc+4". We
18875 want "x+4@toc". */
18876 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18877 else
18878 output_addr_const (file, x);
18880 return;
18882 case '&':
18883 if (const char *name = get_some_local_dynamic_name ())
18884 assemble_name (file, name);
18885 else
18886 output_operand_lossage ("'%%&' used without any "
18887 "local dynamic TLS references");
18888 return;
18890 default:
18891 output_operand_lossage ("invalid %%xn code");
18895 /* Print the address of an operand. */
18897 void
18898 print_operand_address (FILE *file, rtx x)
18900 if (REG_P (x))
18901 fprintf (file, "0(%s)", reg_names[ REGNO (x) ]);
18902 else if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == CONST
18903 || GET_CODE (x) == LABEL_REF)
18905 output_addr_const (file, x);
18906 if (small_data_operand (x, GET_MODE (x)))
18907 fprintf (file, "@%s(%s)", SMALL_DATA_RELOC,
18908 reg_names[SMALL_DATA_REG]);
18909 else
18910 gcc_assert (!TARGET_TOC);
18912 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18913 && REG_P (XEXP (x, 1)))
18915 if (REGNO (XEXP (x, 0)) == 0)
18916 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 1)) ],
18917 reg_names[ REGNO (XEXP (x, 0)) ]);
18918 else
18919 fprintf (file, "%s,%s", reg_names[ REGNO (XEXP (x, 0)) ],
18920 reg_names[ REGNO (XEXP (x, 1)) ]);
18922 else if (GET_CODE (x) == PLUS && REG_P (XEXP (x, 0))
18923 && GET_CODE (XEXP (x, 1)) == CONST_INT)
18924 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
18925 INTVAL (XEXP (x, 1)), reg_names[ REGNO (XEXP (x, 0)) ]);
18926 #if TARGET_MACHO
18927 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18928 && CONSTANT_P (XEXP (x, 1)))
18930 fprintf (file, "lo16(");
18931 output_addr_const (file, XEXP (x, 1));
18932 fprintf (file, ")(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18934 #endif
18935 #if TARGET_ELF
18936 else if (GET_CODE (x) == LO_SUM && REG_P (XEXP (x, 0))
18937 && CONSTANT_P (XEXP (x, 1)))
18939 output_addr_const (file, XEXP (x, 1));
18940 fprintf (file, "@l(%s)", reg_names[ REGNO (XEXP (x, 0)) ]);
18942 #endif
18943 else if (toc_relative_expr_p (x, false))
18945 /* This hack along with a corresponding hack in
18946 rs6000_output_addr_const_extra arranges to output addends
18947 where the assembler expects to find them. eg.
18948 (lo_sum (reg 9)
18949 . (plus (unspec [(symbol_ref ("x")) (reg 2)] tocrel) 8))
18950 without this hack would be output as "x@toc+8@l(9)". We
18951 want "x+8@toc@l(9)". */
18952 output_addr_const (file, CONST_CAST_RTX (tocrel_base));
18953 if (GET_CODE (x) == LO_SUM)
18954 fprintf (file, "@l(%s)", reg_names[REGNO (XEXP (x, 0))]);
18955 else
18956 fprintf (file, "(%s)", reg_names[REGNO (XVECEXP (tocrel_base, 0, 1))]);
18958 else
18959 gcc_unreachable ();
18962 /* Implement TARGET_OUTPUT_ADDR_CONST_EXTRA. */
18964 static bool
18965 rs6000_output_addr_const_extra (FILE *file, rtx x)
18967 if (GET_CODE (x) == UNSPEC)
18968 switch (XINT (x, 1))
18970 case UNSPEC_TOCREL:
18971 gcc_checking_assert (GET_CODE (XVECEXP (x, 0, 0)) == SYMBOL_REF
18972 && REG_P (XVECEXP (x, 0, 1))
18973 && REGNO (XVECEXP (x, 0, 1)) == TOC_REGISTER);
18974 output_addr_const (file, XVECEXP (x, 0, 0));
18975 if (x == tocrel_base && tocrel_offset != const0_rtx)
18977 if (INTVAL (tocrel_offset) >= 0)
18978 fprintf (file, "+");
18979 output_addr_const (file, CONST_CAST_RTX (tocrel_offset));
18981 if (!TARGET_AIX || (TARGET_ELF && TARGET_MINIMAL_TOC))
18983 putc ('-', file);
18984 assemble_name (file, toc_label_name);
18986 else if (TARGET_ELF)
18987 fputs ("@toc", file);
18988 return true;
18990 #if TARGET_MACHO
18991 case UNSPEC_MACHOPIC_OFFSET:
18992 output_addr_const (file, XVECEXP (x, 0, 0));
18993 putc ('-', file);
18994 machopic_output_function_base_name (file);
18995 return true;
18996 #endif
18998 return false;
19001 /* Target hook for assembling integer objects. The PowerPC version has
19002 to handle fixup entries for relocatable code if RELOCATABLE_NEEDS_FIXUP
19003 is defined. It also needs to handle DI-mode objects on 64-bit
19004 targets. */
19006 static bool
19007 rs6000_assemble_integer (rtx x, unsigned int size, int aligned_p)
19009 #ifdef RELOCATABLE_NEEDS_FIXUP
19010 /* Special handling for SI values. */
19011 if (RELOCATABLE_NEEDS_FIXUP && size == 4 && aligned_p)
19013 static int recurse = 0;
19015 /* For -mrelocatable, we mark all addresses that need to be fixed up in
19016 the .fixup section. Since the TOC section is already relocated, we
19017 don't need to mark it here. We used to skip the text section, but it
19018 should never be valid for relocated addresses to be placed in the text
19019 section. */
19020 if (TARGET_RELOCATABLE
19021 && in_section != toc_section
19022 && !recurse
19023 && !CONST_SCALAR_INT_P (x)
19024 && CONSTANT_P (x))
19026 char buf[256];
19028 recurse = 1;
19029 ASM_GENERATE_INTERNAL_LABEL (buf, "LCP", fixuplabelno);
19030 fixuplabelno++;
19031 ASM_OUTPUT_LABEL (asm_out_file, buf);
19032 fprintf (asm_out_file, "\t.long\t(");
19033 output_addr_const (asm_out_file, x);
19034 fprintf (asm_out_file, ")@fixup\n");
19035 fprintf (asm_out_file, "\t.section\t\".fixup\",\"aw\"\n");
19036 ASM_OUTPUT_ALIGN (asm_out_file, 2);
19037 fprintf (asm_out_file, "\t.long\t");
19038 assemble_name (asm_out_file, buf);
19039 fprintf (asm_out_file, "\n\t.previous\n");
19040 recurse = 0;
19041 return true;
19043 /* Remove initial .'s to turn a -mcall-aixdesc function
19044 address into the address of the descriptor, not the function
19045 itself. */
19046 else if (GET_CODE (x) == SYMBOL_REF
19047 && XSTR (x, 0)[0] == '.'
19048 && DEFAULT_ABI == ABI_AIX)
19050 const char *name = XSTR (x, 0);
19051 while (*name == '.')
19052 name++;
19054 fprintf (asm_out_file, "\t.long\t%s\n", name);
19055 return true;
19058 #endif /* RELOCATABLE_NEEDS_FIXUP */
19059 return default_assemble_integer (x, size, aligned_p);
19062 #if defined (HAVE_GAS_HIDDEN) && !TARGET_MACHO
19063 /* Emit an assembler directive to set symbol visibility for DECL to
19064 VISIBILITY_TYPE. */
19066 static void
19067 rs6000_assemble_visibility (tree decl, int vis)
19069 if (TARGET_XCOFF)
19070 return;
19072 /* Functions need to have their entry point symbol visibility set as
19073 well as their descriptor symbol visibility. */
19074 if (DEFAULT_ABI == ABI_AIX
19075 && DOT_SYMBOLS
19076 && TREE_CODE (decl) == FUNCTION_DECL)
19078 static const char * const visibility_types[] = {
19079 NULL, "internal", "hidden", "protected"
19082 const char *name, *type;
19084 name = ((* targetm.strip_name_encoding)
19085 (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl))));
19086 type = visibility_types[vis];
19088 fprintf (asm_out_file, "\t.%s\t%s\n", type, name);
19089 fprintf (asm_out_file, "\t.%s\t.%s\n", type, name);
19091 else
19092 default_assemble_visibility (decl, vis);
19094 #endif
19096 enum rtx_code
19097 rs6000_reverse_condition (machine_mode mode, enum rtx_code code)
19099 /* Reversal of FP compares takes care -- an ordered compare
19100 becomes an unordered compare and vice versa. */
19101 if (mode == CCFPmode
19102 && (!flag_finite_math_only
19103 || code == UNLT || code == UNLE || code == UNGT || code == UNGE
19104 || code == UNEQ || code == LTGT))
19105 return reverse_condition_maybe_unordered (code);
19106 else
19107 return reverse_condition (code);
19110 /* Generate a compare for CODE. Return a brand-new rtx that
19111 represents the result of the compare. */
19113 static rtx
19114 rs6000_generate_compare (rtx cmp, machine_mode mode)
19116 machine_mode comp_mode;
19117 rtx compare_result;
19118 enum rtx_code code = GET_CODE (cmp);
19119 rtx op0 = XEXP (cmp, 0);
19120 rtx op1 = XEXP (cmp, 1);
19122 if (FLOAT_MODE_P (mode))
19123 comp_mode = CCFPmode;
19124 else if (code == GTU || code == LTU
19125 || code == GEU || code == LEU)
19126 comp_mode = CCUNSmode;
19127 else if ((code == EQ || code == NE)
19128 && unsigned_reg_p (op0)
19129 && (unsigned_reg_p (op1)
19130 || (CONST_INT_P (op1) && INTVAL (op1) != 0)))
19131 /* These are unsigned values, perhaps there will be a later
19132 ordering compare that can be shared with this one. */
19133 comp_mode = CCUNSmode;
19134 else
19135 comp_mode = CCmode;
19137 /* If we have an unsigned compare, make sure we don't have a signed value as
19138 an immediate. */
19139 if (comp_mode == CCUNSmode && GET_CODE (op1) == CONST_INT
19140 && INTVAL (op1) < 0)
19142 op0 = copy_rtx_if_shared (op0);
19143 op1 = force_reg (GET_MODE (op0), op1);
19144 cmp = gen_rtx_fmt_ee (code, GET_MODE (cmp), op0, op1);
19147 /* First, the compare. */
19148 compare_result = gen_reg_rtx (comp_mode);
19150 /* E500 FP compare instructions on the GPRs. Yuck! */
19151 if ((!TARGET_FPRS && TARGET_HARD_FLOAT)
19152 && FLOAT_MODE_P (mode))
19154 rtx cmp, or_result, compare_result2;
19155 machine_mode op_mode = GET_MODE (op0);
19156 bool reverse_p;
19158 if (op_mode == VOIDmode)
19159 op_mode = GET_MODE (op1);
19161 /* First reverse the condition codes that aren't directly supported. */
19162 switch (code)
19164 case NE:
19165 case UNLT:
19166 case UNLE:
19167 case UNGT:
19168 case UNGE:
19169 code = reverse_condition_maybe_unordered (code);
19170 reverse_p = true;
19171 break;
19173 case EQ:
19174 case LT:
19175 case LE:
19176 case GT:
19177 case GE:
19178 reverse_p = false;
19179 break;
19181 default:
19182 gcc_unreachable ();
19185 /* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
19186 This explains the following mess. */
19188 switch (code)
19190 case EQ:
19191 switch (op_mode)
19193 case SFmode:
19194 cmp = (flag_finite_math_only && !flag_trapping_math)
19195 ? gen_tstsfeq_gpr (compare_result, op0, op1)
19196 : gen_cmpsfeq_gpr (compare_result, op0, op1);
19197 break;
19199 case DFmode:
19200 cmp = (flag_finite_math_only && !flag_trapping_math)
19201 ? gen_tstdfeq_gpr (compare_result, op0, op1)
19202 : gen_cmpdfeq_gpr (compare_result, op0, op1);
19203 break;
19205 case TFmode:
19206 cmp = (flag_finite_math_only && !flag_trapping_math)
19207 ? gen_tsttfeq_gpr (compare_result, op0, op1)
19208 : gen_cmptfeq_gpr (compare_result, op0, op1);
19209 break;
19211 default:
19212 gcc_unreachable ();
19214 break;
19216 case GT:
19217 case GE:
19218 switch (op_mode)
19220 case SFmode:
19221 cmp = (flag_finite_math_only && !flag_trapping_math)
19222 ? gen_tstsfgt_gpr (compare_result, op0, op1)
19223 : gen_cmpsfgt_gpr (compare_result, op0, op1);
19224 break;
19226 case DFmode:
19227 cmp = (flag_finite_math_only && !flag_trapping_math)
19228 ? gen_tstdfgt_gpr (compare_result, op0, op1)
19229 : gen_cmpdfgt_gpr (compare_result, op0, op1);
19230 break;
19232 case TFmode:
19233 cmp = (flag_finite_math_only && !flag_trapping_math)
19234 ? gen_tsttfgt_gpr (compare_result, op0, op1)
19235 : gen_cmptfgt_gpr (compare_result, op0, op1);
19236 break;
19238 default:
19239 gcc_unreachable ();
19241 break;
19243 case LT:
19244 case LE:
19245 switch (op_mode)
19247 case SFmode:
19248 cmp = (flag_finite_math_only && !flag_trapping_math)
19249 ? gen_tstsflt_gpr (compare_result, op0, op1)
19250 : gen_cmpsflt_gpr (compare_result, op0, op1);
19251 break;
19253 case DFmode:
19254 cmp = (flag_finite_math_only && !flag_trapping_math)
19255 ? gen_tstdflt_gpr (compare_result, op0, op1)
19256 : gen_cmpdflt_gpr (compare_result, op0, op1);
19257 break;
19259 case TFmode:
19260 cmp = (flag_finite_math_only && !flag_trapping_math)
19261 ? gen_tsttflt_gpr (compare_result, op0, op1)
19262 : gen_cmptflt_gpr (compare_result, op0, op1);
19263 break;
19265 default:
19266 gcc_unreachable ();
19268 break;
19270 default:
19271 gcc_unreachable ();
19274 /* Synthesize LE and GE from LT/GT || EQ. */
19275 if (code == LE || code == GE)
19277 emit_insn (cmp);
19279 compare_result2 = gen_reg_rtx (CCFPmode);
19281 /* Do the EQ. */
19282 switch (op_mode)
19284 case SFmode:
19285 cmp = (flag_finite_math_only && !flag_trapping_math)
19286 ? gen_tstsfeq_gpr (compare_result2, op0, op1)
19287 : gen_cmpsfeq_gpr (compare_result2, op0, op1);
19288 break;
19290 case DFmode:
19291 cmp = (flag_finite_math_only && !flag_trapping_math)
19292 ? gen_tstdfeq_gpr (compare_result2, op0, op1)
19293 : gen_cmpdfeq_gpr (compare_result2, op0, op1);
19294 break;
19296 case TFmode:
19297 cmp = (flag_finite_math_only && !flag_trapping_math)
19298 ? gen_tsttfeq_gpr (compare_result2, op0, op1)
19299 : gen_cmptfeq_gpr (compare_result2, op0, op1);
19300 break;
19302 default:
19303 gcc_unreachable ();
19306 emit_insn (cmp);
19308 /* OR them together. */
19309 or_result = gen_reg_rtx (CCFPmode);
19310 cmp = gen_e500_cr_ior_compare (or_result, compare_result,
19311 compare_result2);
19312 compare_result = or_result;
19315 code = reverse_p ? NE : EQ;
19317 emit_insn (cmp);
19319 else
19321 /* Generate XLC-compatible TFmode compare as PARALLEL with extra
19322 CLOBBERs to match cmptf_internal2 pattern. */
19323 if (comp_mode == CCFPmode && TARGET_XL_COMPAT
19324 && GET_MODE (op0) == TFmode
19325 && !TARGET_IEEEQUAD
19326 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
19327 emit_insn (gen_rtx_PARALLEL (VOIDmode,
19328 gen_rtvec (10,
19329 gen_rtx_SET (VOIDmode,
19330 compare_result,
19331 gen_rtx_COMPARE (comp_mode, op0, op1)),
19332 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19333 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19334 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19335 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19336 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19337 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19338 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19339 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (DFmode)),
19340 gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (Pmode)))));
19341 else if (GET_CODE (op1) == UNSPEC
19342 && XINT (op1, 1) == UNSPEC_SP_TEST)
19344 rtx op1b = XVECEXP (op1, 0, 0);
19345 comp_mode = CCEQmode;
19346 compare_result = gen_reg_rtx (CCEQmode);
19347 if (TARGET_64BIT)
19348 emit_insn (gen_stack_protect_testdi (compare_result, op0, op1b));
19349 else
19350 emit_insn (gen_stack_protect_testsi (compare_result, op0, op1b));
19352 else
19353 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
19354 gen_rtx_COMPARE (comp_mode, op0, op1)));
19357 /* Some kinds of FP comparisons need an OR operation;
19358 under flag_finite_math_only we don't bother. */
19359 if (FLOAT_MODE_P (mode)
19360 && !flag_finite_math_only
19361 && !(TARGET_HARD_FLOAT && !TARGET_FPRS)
19362 && (code == LE || code == GE
19363 || code == UNEQ || code == LTGT
19364 || code == UNGT || code == UNLT))
19366 enum rtx_code or1, or2;
19367 rtx or1_rtx, or2_rtx, compare2_rtx;
19368 rtx or_result = gen_reg_rtx (CCEQmode);
19370 switch (code)
19372 case LE: or1 = LT; or2 = EQ; break;
19373 case GE: or1 = GT; or2 = EQ; break;
19374 case UNEQ: or1 = UNORDERED; or2 = EQ; break;
19375 case LTGT: or1 = LT; or2 = GT; break;
19376 case UNGT: or1 = UNORDERED; or2 = GT; break;
19377 case UNLT: or1 = UNORDERED; or2 = LT; break;
19378 default: gcc_unreachable ();
19380 validate_condition_mode (or1, comp_mode);
19381 validate_condition_mode (or2, comp_mode);
19382 or1_rtx = gen_rtx_fmt_ee (or1, SImode, compare_result, const0_rtx);
19383 or2_rtx = gen_rtx_fmt_ee (or2, SImode, compare_result, const0_rtx);
19384 compare2_rtx = gen_rtx_COMPARE (CCEQmode,
19385 gen_rtx_IOR (SImode, or1_rtx, or2_rtx),
19386 const_true_rtx);
19387 emit_insn (gen_rtx_SET (VOIDmode, or_result, compare2_rtx));
19389 compare_result = or_result;
19390 code = EQ;
19393 validate_condition_mode (code, GET_MODE (compare_result));
19395 return gen_rtx_fmt_ee (code, VOIDmode, compare_result, const0_rtx);
19399 /* Emit the RTL for an sISEL pattern. */
19401 void
19402 rs6000_emit_sISEL (machine_mode mode ATTRIBUTE_UNUSED, rtx operands[])
19404 rs6000_emit_int_cmove (operands[0], operands[1], const1_rtx, const0_rtx);
19407 void
19408 rs6000_emit_sCOND (machine_mode mode, rtx operands[])
19410 rtx condition_rtx;
19411 machine_mode op_mode;
19412 enum rtx_code cond_code;
19413 rtx result = operands[0];
19415 if (TARGET_ISEL && (mode == SImode || mode == DImode))
19417 rs6000_emit_sISEL (mode, operands);
19418 return;
19421 condition_rtx = rs6000_generate_compare (operands[1], mode);
19422 cond_code = GET_CODE (condition_rtx);
19424 if (FLOAT_MODE_P (mode)
19425 && !TARGET_FPRS && TARGET_HARD_FLOAT)
19427 rtx t;
19429 PUT_MODE (condition_rtx, SImode);
19430 t = XEXP (condition_rtx, 0);
19432 gcc_assert (cond_code == NE || cond_code == EQ);
19434 if (cond_code == NE)
19435 emit_insn (gen_e500_flip_gt_bit (t, t));
19437 emit_insn (gen_move_from_CR_gt_bit (result, t));
19438 return;
19441 if (cond_code == NE
19442 || cond_code == GE || cond_code == LE
19443 || cond_code == GEU || cond_code == LEU
19444 || cond_code == ORDERED || cond_code == UNGE || cond_code == UNLE)
19446 rtx not_result = gen_reg_rtx (CCEQmode);
19447 rtx not_op, rev_cond_rtx;
19448 machine_mode cc_mode;
19450 cc_mode = GET_MODE (XEXP (condition_rtx, 0));
19452 rev_cond_rtx = gen_rtx_fmt_ee (rs6000_reverse_condition (cc_mode, cond_code),
19453 SImode, XEXP (condition_rtx, 0), const0_rtx);
19454 not_op = gen_rtx_COMPARE (CCEQmode, rev_cond_rtx, const0_rtx);
19455 emit_insn (gen_rtx_SET (VOIDmode, not_result, not_op));
19456 condition_rtx = gen_rtx_EQ (VOIDmode, not_result, const0_rtx);
19459 op_mode = GET_MODE (XEXP (operands[1], 0));
19460 if (op_mode == VOIDmode)
19461 op_mode = GET_MODE (XEXP (operands[1], 1));
19463 if (TARGET_POWERPC64 && (op_mode == DImode || FLOAT_MODE_P (mode)))
19465 PUT_MODE (condition_rtx, DImode);
19466 convert_move (result, condition_rtx, 0);
19468 else
19470 PUT_MODE (condition_rtx, SImode);
19471 emit_insn (gen_rtx_SET (VOIDmode, result, condition_rtx));
19475 /* Emit a branch of kind CODE to location LOC. */
19477 void
19478 rs6000_emit_cbranch (machine_mode mode, rtx operands[])
19480 rtx condition_rtx, loc_ref;
19482 condition_rtx = rs6000_generate_compare (operands[0], mode);
19483 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
19484 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
19485 gen_rtx_IF_THEN_ELSE (VOIDmode, condition_rtx,
19486 loc_ref, pc_rtx)));
19489 /* Return the string to output a conditional branch to LABEL, which is
19490 the operand template of the label, or NULL if the branch is really a
19491 conditional return.
19493 OP is the conditional expression. XEXP (OP, 0) is assumed to be a
19494 condition code register and its mode specifies what kind of
19495 comparison we made.
19497 REVERSED is nonzero if we should reverse the sense of the comparison.
19499 INSN is the insn. */
19501 char *
19502 output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
19504 static char string[64];
19505 enum rtx_code code = GET_CODE (op);
19506 rtx cc_reg = XEXP (op, 0);
19507 machine_mode mode = GET_MODE (cc_reg);
19508 int cc_regno = REGNO (cc_reg) - CR0_REGNO;
19509 int need_longbranch = label != NULL && get_attr_length (insn) == 8;
19510 int really_reversed = reversed ^ need_longbranch;
19511 char *s = string;
19512 const char *ccode;
19513 const char *pred;
19514 rtx note;
19516 validate_condition_mode (code, mode);
19518 /* Work out which way this really branches. We could use
19519 reverse_condition_maybe_unordered here always but this
19520 makes the resulting assembler clearer. */
19521 if (really_reversed)
19523 /* Reversal of FP compares takes care -- an ordered compare
19524 becomes an unordered compare and vice versa. */
19525 if (mode == CCFPmode)
19526 code = reverse_condition_maybe_unordered (code);
19527 else
19528 code = reverse_condition (code);
19531 if ((!TARGET_FPRS && TARGET_HARD_FLOAT) && mode == CCFPmode)
19533 /* The efscmp/tst* instructions twiddle bit 2, which maps nicely
19534 to the GT bit. */
19535 switch (code)
19537 case EQ:
19538 /* Opposite of GT. */
19539 code = GT;
19540 break;
19542 case NE:
19543 code = UNLE;
19544 break;
19546 default:
19547 gcc_unreachable ();
19551 switch (code)
19553 /* Not all of these are actually distinct opcodes, but
19554 we distinguish them for clarity of the resulting assembler. */
19555 case NE: case LTGT:
19556 ccode = "ne"; break;
19557 case EQ: case UNEQ:
19558 ccode = "eq"; break;
19559 case GE: case GEU:
19560 ccode = "ge"; break;
19561 case GT: case GTU: case UNGT:
19562 ccode = "gt"; break;
19563 case LE: case LEU:
19564 ccode = "le"; break;
19565 case LT: case LTU: case UNLT:
19566 ccode = "lt"; break;
19567 case UNORDERED: ccode = "un"; break;
19568 case ORDERED: ccode = "nu"; break;
19569 case UNGE: ccode = "nl"; break;
19570 case UNLE: ccode = "ng"; break;
19571 default:
19572 gcc_unreachable ();
19575 /* Maybe we have a guess as to how likely the branch is. */
19576 pred = "";
19577 note = find_reg_note (insn, REG_BR_PROB, NULL_RTX);
19578 if (note != NULL_RTX)
19580 /* PROB is the difference from 50%. */
19581 int prob = XINT (note, 0) - REG_BR_PROB_BASE / 2;
19583 /* Only hint for highly probable/improbable branches on newer
19584 cpus as static prediction overrides processor dynamic
19585 prediction. For older cpus we may as well always hint, but
19586 assume not taken for branches that are very close to 50% as a
19587 mispredicted taken branch is more expensive than a
19588 mispredicted not-taken branch. */
19589 if (rs6000_always_hint
19590 || (abs (prob) > REG_BR_PROB_BASE / 100 * 48
19591 && br_prob_note_reliable_p (note)))
19593 if (abs (prob) > REG_BR_PROB_BASE / 20
19594 && ((prob > 0) ^ need_longbranch))
19595 pred = "+";
19596 else
19597 pred = "-";
19601 if (label == NULL)
19602 s += sprintf (s, "b%slr%s ", ccode, pred);
19603 else
19604 s += sprintf (s, "b%s%s ", ccode, pred);
19606 /* We need to escape any '%' characters in the reg_names string.
19607 Assume they'd only be the first character.... */
19608 if (reg_names[cc_regno + CR0_REGNO][0] == '%')
19609 *s++ = '%';
19610 s += sprintf (s, "%s", reg_names[cc_regno + CR0_REGNO]);
19612 if (label != NULL)
19614 /* If the branch distance was too far, we may have to use an
19615 unconditional branch to go the distance. */
19616 if (need_longbranch)
19617 s += sprintf (s, ",$+8\n\tb %s", label);
19618 else
19619 s += sprintf (s, ",%s", label);
19622 return string;
19625 /* Return the string to flip the GT bit on a CR. */
19626 char *
19627 output_e500_flip_gt_bit (rtx dst, rtx src)
19629 static char string[64];
19630 int a, b;
19632 gcc_assert (GET_CODE (dst) == REG && CR_REGNO_P (REGNO (dst))
19633 && GET_CODE (src) == REG && CR_REGNO_P (REGNO (src)));
19635 /* GT bit. */
19636 a = 4 * (REGNO (dst) - CR0_REGNO) + 1;
19637 b = 4 * (REGNO (src) - CR0_REGNO) + 1;
19639 sprintf (string, "crnot %d,%d", a, b);
19640 return string;
19643 /* Return insn for VSX or Altivec comparisons. */
19645 static rtx
19646 rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
19648 rtx mask;
19649 machine_mode mode = GET_MODE (op0);
19651 switch (code)
19653 default:
19654 break;
19656 case GE:
19657 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
19658 return NULL_RTX;
19660 case EQ:
19661 case GT:
19662 case GTU:
19663 case ORDERED:
19664 case UNORDERED:
19665 case UNEQ:
19666 case LTGT:
19667 mask = gen_reg_rtx (mode);
19668 emit_insn (gen_rtx_SET (VOIDmode,
19669 mask,
19670 gen_rtx_fmt_ee (code, mode, op0, op1)));
19671 return mask;
19674 return NULL_RTX;
19677 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
19678 DMODE is expected destination mode. This is a recursive function. */
19680 static rtx
19681 rs6000_emit_vector_compare (enum rtx_code rcode,
19682 rtx op0, rtx op1,
19683 machine_mode dmode)
19685 rtx mask;
19686 bool swap_operands = false;
19687 bool try_again = false;
19689 gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
19690 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
19692 /* See if the comparison works as is. */
19693 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19694 if (mask)
19695 return mask;
19697 switch (rcode)
19699 case LT:
19700 rcode = GT;
19701 swap_operands = true;
19702 try_again = true;
19703 break;
19704 case LTU:
19705 rcode = GTU;
19706 swap_operands = true;
19707 try_again = true;
19708 break;
19709 case NE:
19710 case UNLE:
19711 case UNLT:
19712 case UNGE:
19713 case UNGT:
19714 /* Invert condition and try again.
19715 e.g., A != B becomes ~(A==B). */
19717 enum rtx_code rev_code;
19718 enum insn_code nor_code;
19719 rtx mask2;
19721 rev_code = reverse_condition_maybe_unordered (rcode);
19722 if (rev_code == UNKNOWN)
19723 return NULL_RTX;
19725 nor_code = optab_handler (one_cmpl_optab, dmode);
19726 if (nor_code == CODE_FOR_nothing)
19727 return NULL_RTX;
19729 mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
19730 if (!mask2)
19731 return NULL_RTX;
19733 mask = gen_reg_rtx (dmode);
19734 emit_insn (GEN_FCN (nor_code) (mask, mask2));
19735 return mask;
19737 break;
19738 case GE:
19739 case GEU:
19740 case LE:
19741 case LEU:
19742 /* Try GT/GTU/LT/LTU OR EQ */
19744 rtx c_rtx, eq_rtx;
19745 enum insn_code ior_code;
19746 enum rtx_code new_code;
19748 switch (rcode)
19750 case GE:
19751 new_code = GT;
19752 break;
19754 case GEU:
19755 new_code = GTU;
19756 break;
19758 case LE:
19759 new_code = LT;
19760 break;
19762 case LEU:
19763 new_code = LTU;
19764 break;
19766 default:
19767 gcc_unreachable ();
19770 ior_code = optab_handler (ior_optab, dmode);
19771 if (ior_code == CODE_FOR_nothing)
19772 return NULL_RTX;
19774 c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
19775 if (!c_rtx)
19776 return NULL_RTX;
19778 eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
19779 if (!eq_rtx)
19780 return NULL_RTX;
19782 mask = gen_reg_rtx (dmode);
19783 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
19784 return mask;
19786 break;
19787 default:
19788 return NULL_RTX;
19791 if (try_again)
19793 if (swap_operands)
19795 rtx tmp;
19796 tmp = op0;
19797 op0 = op1;
19798 op1 = tmp;
19801 mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
19802 if (mask)
19803 return mask;
19806 /* You only get two chances. */
19807 return NULL_RTX;
19810 /* Emit vector conditional expression. DEST is destination. OP_TRUE and
19811 OP_FALSE are two VEC_COND_EXPR operands. CC_OP0 and CC_OP1 are the two
19812 operands for the relation operation COND. */
19815 rs6000_emit_vector_cond_expr (rtx dest, rtx op_true, rtx op_false,
19816 rtx cond, rtx cc_op0, rtx cc_op1)
19818 machine_mode dest_mode = GET_MODE (dest);
19819 machine_mode mask_mode = GET_MODE (cc_op0);
19820 enum rtx_code rcode = GET_CODE (cond);
19821 machine_mode cc_mode = CCmode;
19822 rtx mask;
19823 rtx cond2;
19824 rtx tmp;
19825 bool invert_move = false;
19827 if (VECTOR_UNIT_NONE_P (dest_mode))
19828 return 0;
19830 gcc_assert (GET_MODE_SIZE (dest_mode) == GET_MODE_SIZE (mask_mode)
19831 && GET_MODE_NUNITS (dest_mode) == GET_MODE_NUNITS (mask_mode));
19833 switch (rcode)
19835 /* Swap operands if we can, and fall back to doing the operation as
19836 specified, and doing a NOR to invert the test. */
19837 case NE:
19838 case UNLE:
19839 case UNLT:
19840 case UNGE:
19841 case UNGT:
19842 /* Invert condition and try again.
19843 e.g., A = (B != C) ? D : E becomes A = (B == C) ? E : D. */
19844 invert_move = true;
19845 rcode = reverse_condition_maybe_unordered (rcode);
19846 if (rcode == UNKNOWN)
19847 return 0;
19848 break;
19850 /* Mark unsigned tests with CCUNSmode. */
19851 case GTU:
19852 case GEU:
19853 case LTU:
19854 case LEU:
19855 cc_mode = CCUNSmode;
19856 break;
19858 default:
19859 break;
19862 /* Get the vector mask for the given relational operations. */
19863 mask = rs6000_emit_vector_compare (rcode, cc_op0, cc_op1, mask_mode);
19865 if (!mask)
19866 return 0;
19868 if (invert_move)
19870 tmp = op_true;
19871 op_true = op_false;
19872 op_false = tmp;
19875 cond2 = gen_rtx_fmt_ee (NE, cc_mode, gen_lowpart (dest_mode, mask),
19876 CONST0_RTX (dest_mode));
19877 emit_insn (gen_rtx_SET (VOIDmode,
19878 dest,
19879 gen_rtx_IF_THEN_ELSE (dest_mode,
19880 cond2,
19881 op_true,
19882 op_false)));
19883 return 1;
19886 /* Emit a conditional move: move TRUE_COND to DEST if OP of the
19887 operands of the last comparison is nonzero/true, FALSE_COND if it
19888 is zero/false. Return 0 if the hardware has no such operation. */
19891 rs6000_emit_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
19893 enum rtx_code code = GET_CODE (op);
19894 rtx op0 = XEXP (op, 0);
19895 rtx op1 = XEXP (op, 1);
19896 REAL_VALUE_TYPE c1;
19897 machine_mode compare_mode = GET_MODE (op0);
19898 machine_mode result_mode = GET_MODE (dest);
19899 rtx temp;
19900 bool is_against_zero;
19902 /* These modes should always match. */
19903 if (GET_MODE (op1) != compare_mode
19904 /* In the isel case however, we can use a compare immediate, so
19905 op1 may be a small constant. */
19906 && (!TARGET_ISEL || !short_cint_operand (op1, VOIDmode)))
19907 return 0;
19908 if (GET_MODE (true_cond) != result_mode)
19909 return 0;
19910 if (GET_MODE (false_cond) != result_mode)
19911 return 0;
19913 /* Don't allow using floating point comparisons for integer results for
19914 now. */
19915 if (FLOAT_MODE_P (compare_mode) && !FLOAT_MODE_P (result_mode))
19916 return 0;
19918 /* First, work out if the hardware can do this at all, or
19919 if it's too slow.... */
19920 if (!FLOAT_MODE_P (compare_mode))
19922 if (TARGET_ISEL)
19923 return rs6000_emit_int_cmove (dest, op, true_cond, false_cond);
19924 return 0;
19926 else if (TARGET_HARD_FLOAT && !TARGET_FPRS
19927 && SCALAR_FLOAT_MODE_P (compare_mode))
19928 return 0;
19930 is_against_zero = op1 == CONST0_RTX (compare_mode);
19932 /* A floating-point subtract might overflow, underflow, or produce
19933 an inexact result, thus changing the floating-point flags, so it
19934 can't be generated if we care about that. It's safe if one side
19935 of the construct is zero, since then no subtract will be
19936 generated. */
19937 if (SCALAR_FLOAT_MODE_P (compare_mode)
19938 && flag_trapping_math && ! is_against_zero)
19939 return 0;
19941 /* Eliminate half of the comparisons by switching operands, this
19942 makes the remaining code simpler. */
19943 if (code == UNLT || code == UNGT || code == UNORDERED || code == NE
19944 || code == LTGT || code == LT || code == UNLE)
19946 code = reverse_condition_maybe_unordered (code);
19947 temp = true_cond;
19948 true_cond = false_cond;
19949 false_cond = temp;
19952 /* UNEQ and LTGT take four instructions for a comparison with zero,
19953 it'll probably be faster to use a branch here too. */
19954 if (code == UNEQ && HONOR_NANS (compare_mode))
19955 return 0;
19957 if (GET_CODE (op1) == CONST_DOUBLE)
19958 REAL_VALUE_FROM_CONST_DOUBLE (c1, op1);
19960 /* We're going to try to implement comparisons by performing
19961 a subtract, then comparing against zero. Unfortunately,
19962 Inf - Inf is NaN which is not zero, and so if we don't
19963 know that the operand is finite and the comparison
19964 would treat EQ different to UNORDERED, we can't do it. */
19965 if (HONOR_INFINITIES (compare_mode)
19966 && code != GT && code != UNGE
19967 && (GET_CODE (op1) != CONST_DOUBLE || real_isinf (&c1))
19968 /* Constructs of the form (a OP b ? a : b) are safe. */
19969 && ((! rtx_equal_p (op0, false_cond) && ! rtx_equal_p (op1, false_cond))
19970 || (! rtx_equal_p (op0, true_cond)
19971 && ! rtx_equal_p (op1, true_cond))))
19972 return 0;
19974 /* At this point we know we can use fsel. */
19976 /* Reduce the comparison to a comparison against zero. */
19977 if (! is_against_zero)
19979 temp = gen_reg_rtx (compare_mode);
19980 emit_insn (gen_rtx_SET (VOIDmode, temp,
19981 gen_rtx_MINUS (compare_mode, op0, op1)));
19982 op0 = temp;
19983 op1 = CONST0_RTX (compare_mode);
19986 /* If we don't care about NaNs we can reduce some of the comparisons
19987 down to faster ones. */
19988 if (! HONOR_NANS (compare_mode))
19989 switch (code)
19991 case GT:
19992 code = LE;
19993 temp = true_cond;
19994 true_cond = false_cond;
19995 false_cond = temp;
19996 break;
19997 case UNGE:
19998 code = GE;
19999 break;
20000 case UNEQ:
20001 code = EQ;
20002 break;
20003 default:
20004 break;
20007 /* Now, reduce everything down to a GE. */
20008 switch (code)
20010 case GE:
20011 break;
20013 case LE:
20014 temp = gen_reg_rtx (compare_mode);
20015 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20016 op0 = temp;
20017 break;
20019 case ORDERED:
20020 temp = gen_reg_rtx (compare_mode);
20021 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_ABS (compare_mode, op0)));
20022 op0 = temp;
20023 break;
20025 case EQ:
20026 temp = gen_reg_rtx (compare_mode);
20027 emit_insn (gen_rtx_SET (VOIDmode, temp,
20028 gen_rtx_NEG (compare_mode,
20029 gen_rtx_ABS (compare_mode, op0))));
20030 op0 = temp;
20031 break;
20033 case UNGE:
20034 /* a UNGE 0 <-> (a GE 0 || -a UNLT 0) */
20035 temp = gen_reg_rtx (result_mode);
20036 emit_insn (gen_rtx_SET (VOIDmode, temp,
20037 gen_rtx_IF_THEN_ELSE (result_mode,
20038 gen_rtx_GE (VOIDmode,
20039 op0, op1),
20040 true_cond, false_cond)));
20041 false_cond = true_cond;
20042 true_cond = temp;
20044 temp = gen_reg_rtx (compare_mode);
20045 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20046 op0 = temp;
20047 break;
20049 case GT:
20050 /* a GT 0 <-> (a GE 0 && -a UNLT 0) */
20051 temp = gen_reg_rtx (result_mode);
20052 emit_insn (gen_rtx_SET (VOIDmode, temp,
20053 gen_rtx_IF_THEN_ELSE (result_mode,
20054 gen_rtx_GE (VOIDmode,
20055 op0, op1),
20056 true_cond, false_cond)));
20057 true_cond = false_cond;
20058 false_cond = temp;
20060 temp = gen_reg_rtx (compare_mode);
20061 emit_insn (gen_rtx_SET (VOIDmode, temp, gen_rtx_NEG (compare_mode, op0)));
20062 op0 = temp;
20063 break;
20065 default:
20066 gcc_unreachable ();
20069 emit_insn (gen_rtx_SET (VOIDmode, dest,
20070 gen_rtx_IF_THEN_ELSE (result_mode,
20071 gen_rtx_GE (VOIDmode,
20072 op0, op1),
20073 true_cond, false_cond)));
20074 return 1;
20077 /* Same as above, but for ints (isel). */
20079 static int
20080 rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
20082 rtx condition_rtx, cr;
20083 machine_mode mode = GET_MODE (dest);
20084 enum rtx_code cond_code;
20085 rtx (*isel_func) (rtx, rtx, rtx, rtx, rtx);
20086 bool signedp;
20088 if (mode != SImode && (!TARGET_POWERPC64 || mode != DImode))
20089 return 0;
20091 /* We still have to do the compare, because isel doesn't do a
20092 compare, it just looks at the CRx bits set by a previous compare
20093 instruction. */
20094 condition_rtx = rs6000_generate_compare (op, mode);
20095 cond_code = GET_CODE (condition_rtx);
20096 cr = XEXP (condition_rtx, 0);
20097 signedp = GET_MODE (cr) == CCmode;
20099 isel_func = (mode == SImode
20100 ? (signedp ? gen_isel_signed_si : gen_isel_unsigned_si)
20101 : (signedp ? gen_isel_signed_di : gen_isel_unsigned_di));
20103 switch (cond_code)
20105 case LT: case GT: case LTU: case GTU: case EQ:
20106 /* isel handles these directly. */
20107 break;
20109 default:
20110 /* We need to swap the sense of the comparison. */
20112 rtx t = true_cond;
20113 true_cond = false_cond;
20114 false_cond = t;
20115 PUT_CODE (condition_rtx, reverse_condition (cond_code));
20117 break;
20120 false_cond = force_reg (mode, false_cond);
20121 if (true_cond != const0_rtx)
20122 true_cond = force_reg (mode, true_cond);
20124 emit_insn (isel_func (dest, condition_rtx, true_cond, false_cond, cr));
20126 return 1;
20129 const char *
20130 output_isel (rtx *operands)
20132 enum rtx_code code;
20134 code = GET_CODE (operands[1]);
20136 if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
20138 gcc_assert (GET_CODE (operands[2]) == REG
20139 && GET_CODE (operands[3]) == REG);
20140 PUT_CODE (operands[1], reverse_condition (code));
20141 return "isel %0,%3,%2,%j1";
20144 return "isel %0,%2,%3,%j1";
20147 void
20148 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
20150 machine_mode mode = GET_MODE (op0);
20151 enum rtx_code c;
20152 rtx target;
20154 /* VSX/altivec have direct min/max insns. */
20155 if ((code == SMAX || code == SMIN)
20156 && (VECTOR_UNIT_ALTIVEC_OR_VSX_P (mode)
20157 || (mode == SFmode && VECTOR_UNIT_VSX_P (DFmode))))
20159 emit_insn (gen_rtx_SET (VOIDmode,
20160 dest,
20161 gen_rtx_fmt_ee (code, mode, op0, op1)));
20162 return;
20165 if (code == SMAX || code == SMIN)
20166 c = GE;
20167 else
20168 c = GEU;
20170 if (code == SMAX || code == UMAX)
20171 target = emit_conditional_move (dest, c, op0, op1, mode,
20172 op0, op1, mode, 0);
20173 else
20174 target = emit_conditional_move (dest, c, op0, op1, mode,
20175 op1, op0, mode, 0);
20176 gcc_assert (target);
20177 if (target != dest)
20178 emit_move_insn (dest, target);
20181 /* A subroutine of the atomic operation splitters. Jump to LABEL if
20182 COND is true. Mark the jump as unlikely to be taken. */
20184 static void
20185 emit_unlikely_jump (rtx cond, rtx label)
20187 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
20188 rtx x;
20190 x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
20191 x = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, x));
20192 add_int_reg_note (x, REG_BR_PROB, very_unlikely);
20195 /* A subroutine of the atomic operation splitters. Emit a load-locked
20196 instruction in MODE. For QI/HImode, possibly use a pattern than includes
20197 the zero_extend operation. */
20199 static void
20200 emit_load_locked (machine_mode mode, rtx reg, rtx mem)
20202 rtx (*fn) (rtx, rtx) = NULL;
20204 switch (mode)
20206 case QImode:
20207 fn = gen_load_lockedqi;
20208 break;
20209 case HImode:
20210 fn = gen_load_lockedhi;
20211 break;
20212 case SImode:
20213 if (GET_MODE (mem) == QImode)
20214 fn = gen_load_lockedqi_si;
20215 else if (GET_MODE (mem) == HImode)
20216 fn = gen_load_lockedhi_si;
20217 else
20218 fn = gen_load_lockedsi;
20219 break;
20220 case DImode:
20221 fn = gen_load_lockeddi;
20222 break;
20223 case TImode:
20224 fn = gen_load_lockedti;
20225 break;
20226 default:
20227 gcc_unreachable ();
20229 emit_insn (fn (reg, mem));
20232 /* A subroutine of the atomic operation splitters. Emit a store-conditional
20233 instruction in MODE. */
20235 static void
20236 emit_store_conditional (machine_mode mode, rtx res, rtx mem, rtx val)
20238 rtx (*fn) (rtx, rtx, rtx) = NULL;
20240 switch (mode)
20242 case QImode:
20243 fn = gen_store_conditionalqi;
20244 break;
20245 case HImode:
20246 fn = gen_store_conditionalhi;
20247 break;
20248 case SImode:
20249 fn = gen_store_conditionalsi;
20250 break;
20251 case DImode:
20252 fn = gen_store_conditionaldi;
20253 break;
20254 case TImode:
20255 fn = gen_store_conditionalti;
20256 break;
20257 default:
20258 gcc_unreachable ();
20261 /* Emit sync before stwcx. to address PPC405 Erratum. */
20262 if (PPC405_ERRATUM77)
20263 emit_insn (gen_hwsync ());
20265 emit_insn (fn (res, mem, val));
20268 /* Expand barriers before and after a load_locked/store_cond sequence. */
20270 static rtx
20271 rs6000_pre_atomic_barrier (rtx mem, enum memmodel model)
20273 rtx addr = XEXP (mem, 0);
20274 int strict_p = (reload_in_progress || reload_completed);
20276 if (!legitimate_indirect_address_p (addr, strict_p)
20277 && !legitimate_indexed_address_p (addr, strict_p))
20279 addr = force_reg (Pmode, addr);
20280 mem = replace_equiv_address_nv (mem, addr);
20283 switch (model)
20285 case MEMMODEL_RELAXED:
20286 case MEMMODEL_CONSUME:
20287 case MEMMODEL_ACQUIRE:
20288 break;
20289 case MEMMODEL_RELEASE:
20290 case MEMMODEL_ACQ_REL:
20291 emit_insn (gen_lwsync ());
20292 break;
20293 case MEMMODEL_SEQ_CST:
20294 emit_insn (gen_hwsync ());
20295 break;
20296 default:
20297 gcc_unreachable ();
20299 return mem;
20302 static void
20303 rs6000_post_atomic_barrier (enum memmodel model)
20305 switch (model)
20307 case MEMMODEL_RELAXED:
20308 case MEMMODEL_CONSUME:
20309 case MEMMODEL_RELEASE:
20310 break;
20311 case MEMMODEL_ACQUIRE:
20312 case MEMMODEL_ACQ_REL:
20313 case MEMMODEL_SEQ_CST:
20314 emit_insn (gen_isync ());
20315 break;
20316 default:
20317 gcc_unreachable ();
20321 /* A subroutine of the various atomic expanders. For sub-word operations,
20322 we must adjust things to operate on SImode. Given the original MEM,
20323 return a new aligned memory. Also build and return the quantities by
20324 which to shift and mask. */
20326 static rtx
20327 rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
20329 rtx addr, align, shift, mask, mem;
20330 HOST_WIDE_INT shift_mask;
20331 machine_mode mode = GET_MODE (orig_mem);
20333 /* For smaller modes, we have to implement this via SImode. */
20334 shift_mask = (mode == QImode ? 0x18 : 0x10);
20336 addr = XEXP (orig_mem, 0);
20337 addr = force_reg (GET_MODE (addr), addr);
20339 /* Aligned memory containing subword. Generate a new memory. We
20340 do not want any of the existing MEM_ATTR data, as we're now
20341 accessing memory outside the original object. */
20342 align = expand_simple_binop (Pmode, AND, addr, GEN_INT (-4),
20343 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20344 mem = gen_rtx_MEM (SImode, align);
20345 MEM_VOLATILE_P (mem) = MEM_VOLATILE_P (orig_mem);
20346 if (MEM_ALIAS_SET (orig_mem) == ALIAS_SET_MEMORY_BARRIER)
20347 set_mem_alias_set (mem, ALIAS_SET_MEMORY_BARRIER);
20349 /* Shift amount for subword relative to aligned word. */
20350 shift = gen_reg_rtx (SImode);
20351 addr = gen_lowpart (SImode, addr);
20352 emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
20353 if (BYTES_BIG_ENDIAN)
20354 shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
20355 shift, 1, OPTAB_LIB_WIDEN);
20356 *pshift = shift;
20358 /* Mask for insertion. */
20359 mask = expand_simple_binop (SImode, ASHIFT, GEN_INT (GET_MODE_MASK (mode)),
20360 shift, NULL_RTX, 1, OPTAB_LIB_WIDEN);
20361 *pmask = mask;
20363 return mem;
20366 /* A subroutine of the various atomic expanders. For sub-word operands,
20367 combine OLDVAL and NEWVAL via MASK. Returns a new pseduo. */
20369 static rtx
20370 rs6000_mask_atomic_subword (rtx oldval, rtx newval, rtx mask)
20372 rtx x;
20374 x = gen_reg_rtx (SImode);
20375 emit_insn (gen_rtx_SET (VOIDmode, x,
20376 gen_rtx_AND (SImode,
20377 gen_rtx_NOT (SImode, mask),
20378 oldval)));
20380 x = expand_simple_binop (SImode, IOR, newval, x, x, 1, OPTAB_LIB_WIDEN);
20382 return x;
20385 /* A subroutine of the various atomic expanders. For sub-word operands,
20386 extract WIDE to NARROW via SHIFT. */
20388 static void
20389 rs6000_finish_atomic_subword (rtx narrow, rtx wide, rtx shift)
20391 wide = expand_simple_binop (SImode, LSHIFTRT, wide, shift,
20392 wide, 1, OPTAB_LIB_WIDEN);
20393 emit_move_insn (narrow, gen_lowpart (GET_MODE (narrow), wide));
20396 /* Expand an atomic compare and swap operation. */
20398 void
20399 rs6000_expand_atomic_compare_and_swap (rtx operands[])
20401 rtx boolval, retval, mem, oldval, newval, cond;
20402 rtx label1, label2, x, mask, shift;
20403 machine_mode mode, orig_mode;
20404 enum memmodel mod_s, mod_f;
20405 bool is_weak;
20407 boolval = operands[0];
20408 retval = operands[1];
20409 mem = operands[2];
20410 oldval = operands[3];
20411 newval = operands[4];
20412 is_weak = (INTVAL (operands[5]) != 0);
20413 mod_s = (enum memmodel) INTVAL (operands[6]);
20414 mod_f = (enum memmodel) INTVAL (operands[7]);
20415 orig_mode = mode = GET_MODE (mem);
20417 mask = shift = NULL_RTX;
20418 if (mode == QImode || mode == HImode)
20420 /* Before power8, we didn't have access to lbarx/lharx, so generate a
20421 lwarx and shift/mask operations. With power8, we need to do the
20422 comparison in SImode, but the store is still done in QI/HImode. */
20423 oldval = convert_modes (SImode, mode, oldval, 1);
20425 if (!TARGET_SYNC_HI_QI)
20427 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20429 /* Shift and mask OLDVAL into position with the word. */
20430 oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
20431 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20433 /* Shift and mask NEWVAL into position within the word. */
20434 newval = convert_modes (SImode, mode, newval, 1);
20435 newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
20436 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20439 /* Prepare to adjust the return value. */
20440 retval = gen_reg_rtx (SImode);
20441 mode = SImode;
20443 else if (reg_overlap_mentioned_p (retval, oldval))
20444 oldval = copy_to_reg (oldval);
20446 mem = rs6000_pre_atomic_barrier (mem, mod_s);
20448 label1 = NULL_RTX;
20449 if (!is_weak)
20451 label1 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20452 emit_label (XEXP (label1, 0));
20454 label2 = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20456 emit_load_locked (mode, retval, mem);
20458 x = retval;
20459 if (mask)
20461 x = expand_simple_binop (SImode, AND, retval, mask,
20462 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20465 cond = gen_reg_rtx (CCmode);
20466 /* If we have TImode, synthesize a comparison. */
20467 if (mode != TImode)
20468 x = gen_rtx_COMPARE (CCmode, x, oldval);
20469 else
20471 rtx xor1_result = gen_reg_rtx (DImode);
20472 rtx xor2_result = gen_reg_rtx (DImode);
20473 rtx or_result = gen_reg_rtx (DImode);
20474 rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
20475 rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
20476 rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
20477 rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
20479 emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
20480 emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
20481 emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
20482 x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
20485 emit_insn (gen_rtx_SET (VOIDmode, cond, x));
20487 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20488 emit_unlikely_jump (x, label2);
20490 x = newval;
20491 if (mask)
20492 x = rs6000_mask_atomic_subword (retval, newval, mask);
20494 emit_store_conditional (orig_mode, cond, mem, x);
20496 if (!is_weak)
20498 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20499 emit_unlikely_jump (x, label1);
20502 if (mod_f != MEMMODEL_RELAXED)
20503 emit_label (XEXP (label2, 0));
20505 rs6000_post_atomic_barrier (mod_s);
20507 if (mod_f == MEMMODEL_RELAXED)
20508 emit_label (XEXP (label2, 0));
20510 if (shift)
20511 rs6000_finish_atomic_subword (operands[1], retval, shift);
20512 else if (mode != GET_MODE (operands[1]))
20513 convert_move (operands[1], retval, 1);
20515 /* In all cases, CR0 contains EQ on success, and NE on failure. */
20516 x = gen_rtx_EQ (SImode, cond, const0_rtx);
20517 emit_insn (gen_rtx_SET (VOIDmode, boolval, x));
20520 /* Expand an atomic exchange operation. */
20522 void
20523 rs6000_expand_atomic_exchange (rtx operands[])
20525 rtx retval, mem, val, cond;
20526 machine_mode mode;
20527 enum memmodel model;
20528 rtx label, x, mask, shift;
20530 retval = operands[0];
20531 mem = operands[1];
20532 val = operands[2];
20533 model = (enum memmodel) INTVAL (operands[3]);
20534 mode = GET_MODE (mem);
20536 mask = shift = NULL_RTX;
20537 if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
20539 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20541 /* Shift and mask VAL into position with the word. */
20542 val = convert_modes (SImode, mode, val, 1);
20543 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20544 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20546 /* Prepare to adjust the return value. */
20547 retval = gen_reg_rtx (SImode);
20548 mode = SImode;
20551 mem = rs6000_pre_atomic_barrier (mem, model);
20553 label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
20554 emit_label (XEXP (label, 0));
20556 emit_load_locked (mode, retval, mem);
20558 x = val;
20559 if (mask)
20560 x = rs6000_mask_atomic_subword (retval, val, mask);
20562 cond = gen_reg_rtx (CCmode);
20563 emit_store_conditional (mode, cond, mem, x);
20565 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20566 emit_unlikely_jump (x, label);
20568 rs6000_post_atomic_barrier (model);
20570 if (shift)
20571 rs6000_finish_atomic_subword (operands[0], retval, shift);
20574 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
20575 to perform. MEM is the memory on which to operate. VAL is the second
20576 operand of the binary operator. BEFORE and AFTER are optional locations to
20577 return the value of MEM either before of after the operation. MODEL_RTX
20578 is a CONST_INT containing the memory model to use. */
20580 void
20581 rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
20582 rtx orig_before, rtx orig_after, rtx model_rtx)
20584 enum memmodel model = (enum memmodel) INTVAL (model_rtx);
20585 machine_mode mode = GET_MODE (mem);
20586 machine_mode store_mode = mode;
20587 rtx label, x, cond, mask, shift;
20588 rtx before = orig_before, after = orig_after;
20590 mask = shift = NULL_RTX;
20591 /* On power8, we want to use SImode for the operation. On previous systems,
20592 use the operation in a subword and shift/mask to get the proper byte or
20593 halfword. */
20594 if (mode == QImode || mode == HImode)
20596 if (TARGET_SYNC_HI_QI)
20598 val = convert_modes (SImode, mode, val, 1);
20600 /* Prepare to adjust the return value. */
20601 before = gen_reg_rtx (SImode);
20602 if (after)
20603 after = gen_reg_rtx (SImode);
20604 mode = SImode;
20606 else
20608 mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
20610 /* Shift and mask VAL into position with the word. */
20611 val = convert_modes (SImode, mode, val, 1);
20612 val = expand_simple_binop (SImode, ASHIFT, val, shift,
20613 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20615 switch (code)
20617 case IOR:
20618 case XOR:
20619 /* We've already zero-extended VAL. That is sufficient to
20620 make certain that it does not affect other bits. */
20621 mask = NULL;
20622 break;
20624 case AND:
20625 /* If we make certain that all of the other bits in VAL are
20626 set, that will be sufficient to not affect other bits. */
20627 x = gen_rtx_NOT (SImode, mask);
20628 x = gen_rtx_IOR (SImode, x, val);
20629 emit_insn (gen_rtx_SET (VOIDmode, val, x));
20630 mask = NULL;
20631 break;
20633 case NOT:
20634 case PLUS:
20635 case MINUS:
20636 /* These will all affect bits outside the field and need
20637 adjustment via MASK within the loop. */
20638 break;
20640 default:
20641 gcc_unreachable ();
20644 /* Prepare to adjust the return value. */
20645 before = gen_reg_rtx (SImode);
20646 if (after)
20647 after = gen_reg_rtx (SImode);
20648 store_mode = mode = SImode;
20652 mem = rs6000_pre_atomic_barrier (mem, model);
20654 label = gen_label_rtx ();
20655 emit_label (label);
20656 label = gen_rtx_LABEL_REF (VOIDmode, label);
20658 if (before == NULL_RTX)
20659 before = gen_reg_rtx (mode);
20661 emit_load_locked (mode, before, mem);
20663 if (code == NOT)
20665 x = expand_simple_binop (mode, AND, before, val,
20666 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20667 after = expand_simple_unop (mode, NOT, x, after, 1);
20669 else
20671 after = expand_simple_binop (mode, code, before, val,
20672 after, 1, OPTAB_LIB_WIDEN);
20675 x = after;
20676 if (mask)
20678 x = expand_simple_binop (SImode, AND, after, mask,
20679 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20680 x = rs6000_mask_atomic_subword (before, x, mask);
20682 else if (store_mode != mode)
20683 x = convert_modes (store_mode, mode, x, 1);
20685 cond = gen_reg_rtx (CCmode);
20686 emit_store_conditional (store_mode, cond, mem, x);
20688 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
20689 emit_unlikely_jump (x, label);
20691 rs6000_post_atomic_barrier (model);
20693 if (shift)
20695 /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
20696 then do the calcuations in a SImode register. */
20697 if (orig_before)
20698 rs6000_finish_atomic_subword (orig_before, before, shift);
20699 if (orig_after)
20700 rs6000_finish_atomic_subword (orig_after, after, shift);
20702 else if (store_mode != mode)
20704 /* QImode/HImode on machines with lbarx/lharx where we do the native
20705 operation and then do the calcuations in a SImode register. */
20706 if (orig_before)
20707 convert_move (orig_before, before, 1);
20708 if (orig_after)
20709 convert_move (orig_after, after, 1);
20711 else if (orig_after && after != orig_after)
20712 emit_move_insn (orig_after, after);
20715 /* Emit instructions to move SRC to DST. Called by splitters for
20716 multi-register moves. It will emit at most one instruction for
20717 each register that is accessed; that is, it won't emit li/lis pairs
20718 (or equivalent for 64-bit code). One of SRC or DST must be a hard
20719 register. */
20721 void
20722 rs6000_split_multireg_move (rtx dst, rtx src)
20724 /* The register number of the first register being moved. */
20725 int reg;
20726 /* The mode that is to be moved. */
20727 machine_mode mode;
20728 /* The mode that the move is being done in, and its size. */
20729 machine_mode reg_mode;
20730 int reg_mode_size;
20731 /* The number of registers that will be moved. */
20732 int nregs;
20734 reg = REG_P (dst) ? REGNO (dst) : REGNO (src);
20735 mode = GET_MODE (dst);
20736 nregs = hard_regno_nregs[reg][mode];
20737 if (FP_REGNO_P (reg))
20738 reg_mode = DECIMAL_FLOAT_MODE_P (mode) ? DDmode :
20739 ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT) ? DFmode : SFmode);
20740 else if (ALTIVEC_REGNO_P (reg))
20741 reg_mode = V16QImode;
20742 else if (TARGET_E500_DOUBLE && mode == TFmode)
20743 reg_mode = DFmode;
20744 else
20745 reg_mode = word_mode;
20746 reg_mode_size = GET_MODE_SIZE (reg_mode);
20748 gcc_assert (reg_mode_size * nregs == GET_MODE_SIZE (mode));
20750 /* TDmode residing in FP registers is special, since the ISA requires that
20751 the lower-numbered word of a register pair is always the most significant
20752 word, even in little-endian mode. This does not match the usual subreg
20753 semantics, so we cannnot use simplify_gen_subreg in those cases. Access
20754 the appropriate constituent registers "by hand" in little-endian mode.
20756 Note we do not need to check for destructive overlap here since TDmode
20757 can only reside in even/odd register pairs. */
20758 if (FP_REGNO_P (reg) && DECIMAL_FLOAT_MODE_P (mode) && !BYTES_BIG_ENDIAN)
20760 rtx p_src, p_dst;
20761 int i;
20763 for (i = 0; i < nregs; i++)
20765 if (REG_P (src) && FP_REGNO_P (REGNO (src)))
20766 p_src = gen_rtx_REG (reg_mode, REGNO (src) + nregs - 1 - i);
20767 else
20768 p_src = simplify_gen_subreg (reg_mode, src, mode,
20769 i * reg_mode_size);
20771 if (REG_P (dst) && FP_REGNO_P (REGNO (dst)))
20772 p_dst = gen_rtx_REG (reg_mode, REGNO (dst) + nregs - 1 - i);
20773 else
20774 p_dst = simplify_gen_subreg (reg_mode, dst, mode,
20775 i * reg_mode_size);
20777 emit_insn (gen_rtx_SET (VOIDmode, p_dst, p_src));
20780 return;
20783 if (REG_P (src) && REG_P (dst) && (REGNO (src) < REGNO (dst)))
20785 /* Move register range backwards, if we might have destructive
20786 overlap. */
20787 int i;
20788 for (i = nregs - 1; i >= 0; i--)
20789 emit_insn (gen_rtx_SET (VOIDmode,
20790 simplify_gen_subreg (reg_mode, dst, mode,
20791 i * reg_mode_size),
20792 simplify_gen_subreg (reg_mode, src, mode,
20793 i * reg_mode_size)));
20795 else
20797 int i;
20798 int j = -1;
20799 bool used_update = false;
20800 rtx restore_basereg = NULL_RTX;
20802 if (MEM_P (src) && INT_REGNO_P (reg))
20804 rtx breg;
20806 if (GET_CODE (XEXP (src, 0)) == PRE_INC
20807 || GET_CODE (XEXP (src, 0)) == PRE_DEC)
20809 rtx delta_rtx;
20810 breg = XEXP (XEXP (src, 0), 0);
20811 delta_rtx = (GET_CODE (XEXP (src, 0)) == PRE_INC
20812 ? GEN_INT (GET_MODE_SIZE (GET_MODE (src)))
20813 : GEN_INT (-GET_MODE_SIZE (GET_MODE (src))));
20814 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20815 src = replace_equiv_address (src, breg);
20817 else if (! rs6000_offsettable_memref_p (src, reg_mode))
20819 if (GET_CODE (XEXP (src, 0)) == PRE_MODIFY)
20821 rtx basereg = XEXP (XEXP (src, 0), 0);
20822 if (TARGET_UPDATE)
20824 rtx ndst = simplify_gen_subreg (reg_mode, dst, mode, 0);
20825 emit_insn (gen_rtx_SET (VOIDmode, ndst,
20826 gen_rtx_MEM (reg_mode, XEXP (src, 0))));
20827 used_update = true;
20829 else
20830 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20831 XEXP (XEXP (src, 0), 1)));
20832 src = replace_equiv_address (src, basereg);
20834 else
20836 rtx basereg = gen_rtx_REG (Pmode, reg);
20837 emit_insn (gen_rtx_SET (VOIDmode, basereg, XEXP (src, 0)));
20838 src = replace_equiv_address (src, basereg);
20842 breg = XEXP (src, 0);
20843 if (GET_CODE (breg) == PLUS || GET_CODE (breg) == LO_SUM)
20844 breg = XEXP (breg, 0);
20846 /* If the base register we are using to address memory is
20847 also a destination reg, then change that register last. */
20848 if (REG_P (breg)
20849 && REGNO (breg) >= REGNO (dst)
20850 && REGNO (breg) < REGNO (dst) + nregs)
20851 j = REGNO (breg) - REGNO (dst);
20853 else if (MEM_P (dst) && INT_REGNO_P (reg))
20855 rtx breg;
20857 if (GET_CODE (XEXP (dst, 0)) == PRE_INC
20858 || GET_CODE (XEXP (dst, 0)) == PRE_DEC)
20860 rtx delta_rtx;
20861 breg = XEXP (XEXP (dst, 0), 0);
20862 delta_rtx = (GET_CODE (XEXP (dst, 0)) == PRE_INC
20863 ? GEN_INT (GET_MODE_SIZE (GET_MODE (dst)))
20864 : GEN_INT (-GET_MODE_SIZE (GET_MODE (dst))));
20866 /* We have to update the breg before doing the store.
20867 Use store with update, if available. */
20869 if (TARGET_UPDATE)
20871 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20872 emit_insn (TARGET_32BIT
20873 ? (TARGET_POWERPC64
20874 ? gen_movdi_si_update (breg, breg, delta_rtx, nsrc)
20875 : gen_movsi_update (breg, breg, delta_rtx, nsrc))
20876 : gen_movdi_di_update (breg, breg, delta_rtx, nsrc));
20877 used_update = true;
20879 else
20880 emit_insn (gen_add3_insn (breg, breg, delta_rtx));
20881 dst = replace_equiv_address (dst, breg);
20883 else if (!rs6000_offsettable_memref_p (dst, reg_mode)
20884 && GET_CODE (XEXP (dst, 0)) != LO_SUM)
20886 if (GET_CODE (XEXP (dst, 0)) == PRE_MODIFY)
20888 rtx basereg = XEXP (XEXP (dst, 0), 0);
20889 if (TARGET_UPDATE)
20891 rtx nsrc = simplify_gen_subreg (reg_mode, src, mode, 0);
20892 emit_insn (gen_rtx_SET (VOIDmode,
20893 gen_rtx_MEM (reg_mode, XEXP (dst, 0)), nsrc));
20894 used_update = true;
20896 else
20897 emit_insn (gen_rtx_SET (VOIDmode, basereg,
20898 XEXP (XEXP (dst, 0), 1)));
20899 dst = replace_equiv_address (dst, basereg);
20901 else
20903 rtx basereg = XEXP (XEXP (dst, 0), 0);
20904 rtx offsetreg = XEXP (XEXP (dst, 0), 1);
20905 gcc_assert (GET_CODE (XEXP (dst, 0)) == PLUS
20906 && REG_P (basereg)
20907 && REG_P (offsetreg)
20908 && REGNO (basereg) != REGNO (offsetreg));
20909 if (REGNO (basereg) == 0)
20911 rtx tmp = offsetreg;
20912 offsetreg = basereg;
20913 basereg = tmp;
20915 emit_insn (gen_add3_insn (basereg, basereg, offsetreg));
20916 restore_basereg = gen_sub3_insn (basereg, basereg, offsetreg);
20917 dst = replace_equiv_address (dst, basereg);
20920 else if (GET_CODE (XEXP (dst, 0)) != LO_SUM)
20921 gcc_assert (rs6000_offsettable_memref_p (dst, reg_mode));
20924 for (i = 0; i < nregs; i++)
20926 /* Calculate index to next subword. */
20927 ++j;
20928 if (j == nregs)
20929 j = 0;
20931 /* If compiler already emitted move of first word by
20932 store with update, no need to do anything. */
20933 if (j == 0 && used_update)
20934 continue;
20936 emit_insn (gen_rtx_SET (VOIDmode,
20937 simplify_gen_subreg (reg_mode, dst, mode,
20938 j * reg_mode_size),
20939 simplify_gen_subreg (reg_mode, src, mode,
20940 j * reg_mode_size)));
20942 if (restore_basereg != NULL_RTX)
20943 emit_insn (restore_basereg);
20948 /* This page contains routines that are used to determine what the
20949 function prologue and epilogue code will do and write them out. */
20951 static inline bool
20952 save_reg_p (int r)
20954 return !call_used_regs[r] && df_regs_ever_live_p (r);
20957 /* Return the first fixed-point register that is required to be
20958 saved. 32 if none. */
20961 first_reg_to_save (void)
20963 int first_reg;
20965 /* Find lowest numbered live register. */
20966 for (first_reg = 13; first_reg <= 31; first_reg++)
20967 if (save_reg_p (first_reg))
20968 break;
20970 if (first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM
20971 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
20972 || (DEFAULT_ABI == ABI_DARWIN && flag_pic)
20973 || (TARGET_TOC && TARGET_MINIMAL_TOC))
20974 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))
20975 first_reg = RS6000_PIC_OFFSET_TABLE_REGNUM;
20977 #if TARGET_MACHO
20978 if (flag_pic
20979 && crtl->uses_pic_offset_table
20980 && first_reg > RS6000_PIC_OFFSET_TABLE_REGNUM)
20981 return RS6000_PIC_OFFSET_TABLE_REGNUM;
20982 #endif
20984 return first_reg;
20987 /* Similar, for FP regs. */
20990 first_fp_reg_to_save (void)
20992 int first_reg;
20994 /* Find lowest numbered live register. */
20995 for (first_reg = 14 + 32; first_reg <= 63; first_reg++)
20996 if (save_reg_p (first_reg))
20997 break;
20999 return first_reg;
21002 /* Similar, for AltiVec regs. */
21004 static int
21005 first_altivec_reg_to_save (void)
21007 int i;
21009 /* Stack frame remains as is unless we are in AltiVec ABI. */
21010 if (! TARGET_ALTIVEC_ABI)
21011 return LAST_ALTIVEC_REGNO + 1;
21013 /* On Darwin, the unwind routines are compiled without
21014 TARGET_ALTIVEC, and use save_world to save/restore the
21015 altivec registers when necessary. */
21016 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21017 && ! TARGET_ALTIVEC)
21018 return FIRST_ALTIVEC_REGNO + 20;
21020 /* Find lowest numbered live register. */
21021 for (i = FIRST_ALTIVEC_REGNO + 20; i <= LAST_ALTIVEC_REGNO; ++i)
21022 if (save_reg_p (i))
21023 break;
21025 return i;
21028 /* Return a 32-bit mask of the AltiVec registers we need to set in
21029 VRSAVE. Bit n of the return value is 1 if Vn is live. The MSB in
21030 the 32-bit word is 0. */
21032 static unsigned int
21033 compute_vrsave_mask (void)
21035 unsigned int i, mask = 0;
21037 /* On Darwin, the unwind routines are compiled without
21038 TARGET_ALTIVEC, and use save_world to save/restore the
21039 call-saved altivec registers when necessary. */
21040 if (DEFAULT_ABI == ABI_DARWIN && crtl->calls_eh_return
21041 && ! TARGET_ALTIVEC)
21042 mask |= 0xFFF;
21044 /* First, find out if we use _any_ altivec registers. */
21045 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
21046 if (df_regs_ever_live_p (i))
21047 mask |= ALTIVEC_REG_BIT (i);
21049 if (mask == 0)
21050 return mask;
21052 /* Next, remove the argument registers from the set. These must
21053 be in the VRSAVE mask set by the caller, so we don't need to add
21054 them in again. More importantly, the mask we compute here is
21055 used to generate CLOBBERs in the set_vrsave insn, and we do not
21056 wish the argument registers to die. */
21057 for (i = crtl->args.info.vregno - 1; i >= ALTIVEC_ARG_MIN_REG; --i)
21058 mask &= ~ALTIVEC_REG_BIT (i);
21060 /* Similarly, remove the return value from the set. */
21062 bool yes = false;
21063 diddle_return_value (is_altivec_return_reg, &yes);
21064 if (yes)
21065 mask &= ~ALTIVEC_REG_BIT (ALTIVEC_ARG_RETURN);
21068 return mask;
21071 /* For a very restricted set of circumstances, we can cut down the
21072 size of prologues/epilogues by calling our own save/restore-the-world
21073 routines. */
21075 static void
21076 compute_save_world_info (rs6000_stack_t *info_ptr)
21078 info_ptr->world_save_p = 1;
21079 info_ptr->world_save_p
21080 = (WORLD_SAVE_P (info_ptr)
21081 && DEFAULT_ABI == ABI_DARWIN
21082 && !cfun->has_nonlocal_label
21083 && info_ptr->first_fp_reg_save == FIRST_SAVED_FP_REGNO
21084 && info_ptr->first_gp_reg_save == FIRST_SAVED_GP_REGNO
21085 && info_ptr->first_altivec_reg_save == FIRST_SAVED_ALTIVEC_REGNO
21086 && info_ptr->cr_save_p);
21088 /* This will not work in conjunction with sibcalls. Make sure there
21089 are none. (This check is expensive, but seldom executed.) */
21090 if (WORLD_SAVE_P (info_ptr))
21092 rtx_insn *insn;
21093 for (insn = get_last_insn_anywhere (); insn; insn = PREV_INSN (insn))
21094 if (CALL_P (insn) && SIBLING_CALL_P (insn))
21096 info_ptr->world_save_p = 0;
21097 break;
21101 if (WORLD_SAVE_P (info_ptr))
21103 /* Even if we're not touching VRsave, make sure there's room on the
21104 stack for it, if it looks like we're calling SAVE_WORLD, which
21105 will attempt to save it. */
21106 info_ptr->vrsave_size = 4;
21108 /* If we are going to save the world, we need to save the link register too. */
21109 info_ptr->lr_save_p = 1;
21111 /* "Save" the VRsave register too if we're saving the world. */
21112 if (info_ptr->vrsave_mask == 0)
21113 info_ptr->vrsave_mask = compute_vrsave_mask ();
21115 /* Because the Darwin register save/restore routines only handle
21116 F14 .. F31 and V20 .. V31 as per the ABI, perform a consistency
21117 check. */
21118 gcc_assert (info_ptr->first_fp_reg_save >= FIRST_SAVED_FP_REGNO
21119 && (info_ptr->first_altivec_reg_save
21120 >= FIRST_SAVED_ALTIVEC_REGNO));
21122 return;
21126 static void
21127 is_altivec_return_reg (rtx reg, void *xyes)
21129 bool *yes = (bool *) xyes;
21130 if (REGNO (reg) == ALTIVEC_ARG_RETURN)
21131 *yes = true;
21135 /* Look for user-defined global regs in the range FIRST to LAST-1.
21136 We should not restore these, and so cannot use lmw or out-of-line
21137 restore functions if there are any. We also can't save them
21138 (well, emit frame notes for them), because frame unwinding during
21139 exception handling will restore saved registers. */
21141 static bool
21142 global_regs_p (unsigned first, unsigned last)
21144 while (first < last)
21145 if (global_regs[first++])
21146 return true;
21147 return false;
21150 /* Determine the strategy for savings/restoring registers. */
21152 enum {
21153 SAVRES_MULTIPLE = 0x1,
21154 SAVE_INLINE_FPRS = 0x2,
21155 SAVE_INLINE_GPRS = 0x4,
21156 REST_INLINE_FPRS = 0x8,
21157 REST_INLINE_GPRS = 0x10,
21158 SAVE_NOINLINE_GPRS_SAVES_LR = 0x20,
21159 SAVE_NOINLINE_FPRS_SAVES_LR = 0x40,
21160 REST_NOINLINE_FPRS_DOESNT_RESTORE_LR = 0x80,
21161 SAVE_INLINE_VRS = 0x100,
21162 REST_INLINE_VRS = 0x200
21165 static int
21166 rs6000_savres_strategy (rs6000_stack_t *info,
21167 bool using_static_chain_p)
21169 int strategy = 0;
21170 bool lr_save_p;
21172 if (TARGET_MULTIPLE
21173 && !TARGET_POWERPC64
21174 && !(TARGET_SPE_ABI && info->spe_64bit_regs_used)
21175 && info->first_gp_reg_save < 31
21176 && !global_regs_p (info->first_gp_reg_save, 32))
21177 strategy |= SAVRES_MULTIPLE;
21179 if (crtl->calls_eh_return
21180 || cfun->machine->ra_need_lr)
21181 strategy |= (SAVE_INLINE_FPRS | REST_INLINE_FPRS
21182 | SAVE_INLINE_GPRS | REST_INLINE_GPRS
21183 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21185 if (info->first_fp_reg_save == 64
21186 /* The out-of-line FP routines use double-precision stores;
21187 we can't use those routines if we don't have such stores. */
21188 || (TARGET_HARD_FLOAT && !TARGET_DOUBLE_FLOAT)
21189 || global_regs_p (info->first_fp_reg_save, 64))
21190 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21192 if (info->first_gp_reg_save == 32
21193 || (!(strategy & SAVRES_MULTIPLE)
21194 && global_regs_p (info->first_gp_reg_save, 32)))
21195 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21197 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO + 1
21198 || global_regs_p (info->first_altivec_reg_save, LAST_ALTIVEC_REGNO + 1))
21199 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21201 /* Define cutoff for using out-of-line functions to save registers. */
21202 if (DEFAULT_ABI == ABI_V4 || TARGET_ELF)
21204 if (!optimize_size)
21206 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21207 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21208 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21210 else
21212 /* Prefer out-of-line restore if it will exit. */
21213 if (info->first_fp_reg_save > 61)
21214 strategy |= SAVE_INLINE_FPRS;
21215 if (info->first_gp_reg_save > 29)
21217 if (info->first_fp_reg_save == 64)
21218 strategy |= SAVE_INLINE_GPRS;
21219 else
21220 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21222 if (info->first_altivec_reg_save == LAST_ALTIVEC_REGNO)
21223 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21226 else if (DEFAULT_ABI == ABI_DARWIN)
21228 if (info->first_fp_reg_save > 60)
21229 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21230 if (info->first_gp_reg_save > 29)
21231 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21232 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21234 else
21236 gcc_checking_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
21237 if (info->first_fp_reg_save > 61)
21238 strategy |= SAVE_INLINE_FPRS | REST_INLINE_FPRS;
21239 strategy |= SAVE_INLINE_GPRS | REST_INLINE_GPRS;
21240 strategy |= SAVE_INLINE_VRS | REST_INLINE_VRS;
21243 /* Don't bother to try to save things out-of-line if r11 is occupied
21244 by the static chain. It would require too much fiddling and the
21245 static chain is rarely used anyway. FPRs are saved w.r.t the stack
21246 pointer on Darwin, and AIX uses r1 or r12. */
21247 if (using_static_chain_p
21248 && (DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN))
21249 strategy |= ((DEFAULT_ABI == ABI_DARWIN ? 0 : SAVE_INLINE_FPRS)
21250 | SAVE_INLINE_GPRS
21251 | SAVE_INLINE_VRS | REST_INLINE_VRS);
21253 /* We can only use the out-of-line routines to restore if we've
21254 saved all the registers from first_fp_reg_save in the prologue.
21255 Otherwise, we risk loading garbage. */
21256 if ((strategy & (SAVE_INLINE_FPRS | REST_INLINE_FPRS)) == SAVE_INLINE_FPRS)
21258 int i;
21260 for (i = info->first_fp_reg_save; i < 64; i++)
21261 if (!save_reg_p (i))
21263 strategy |= REST_INLINE_FPRS;
21264 break;
21268 /* If we are going to use store multiple, then don't even bother
21269 with the out-of-line routines, since the store-multiple
21270 instruction will always be smaller. */
21271 if ((strategy & SAVRES_MULTIPLE))
21272 strategy |= SAVE_INLINE_GPRS;
21274 /* info->lr_save_p isn't yet set if the only reason lr needs to be
21275 saved is an out-of-line save or restore. Set up the value for
21276 the next test (excluding out-of-line gpr restore). */
21277 lr_save_p = (info->lr_save_p
21278 || !(strategy & SAVE_INLINE_GPRS)
21279 || !(strategy & SAVE_INLINE_FPRS)
21280 || !(strategy & SAVE_INLINE_VRS)
21281 || !(strategy & REST_INLINE_FPRS)
21282 || !(strategy & REST_INLINE_VRS));
21284 /* The situation is more complicated with load multiple. We'd
21285 prefer to use the out-of-line routines for restores, since the
21286 "exit" out-of-line routines can handle the restore of LR and the
21287 frame teardown. However if doesn't make sense to use the
21288 out-of-line routine if that is the only reason we'd need to save
21289 LR, and we can't use the "exit" out-of-line gpr restore if we
21290 have saved some fprs; In those cases it is advantageous to use
21291 load multiple when available. */
21292 if ((strategy & SAVRES_MULTIPLE)
21293 && (!lr_save_p
21294 || info->first_fp_reg_save != 64))
21295 strategy |= REST_INLINE_GPRS;
21297 /* Saving CR interferes with the exit routines used on the SPE, so
21298 just punt here. */
21299 if (TARGET_SPE_ABI
21300 && info->spe_64bit_regs_used
21301 && info->cr_save_p)
21302 strategy |= REST_INLINE_GPRS;
21304 /* We can only use load multiple or the out-of-line routines to
21305 restore if we've used store multiple or out-of-line routines
21306 in the prologue, i.e. if we've saved all the registers from
21307 first_gp_reg_save. Otherwise, we risk loading garbage. */
21308 if ((strategy & (SAVE_INLINE_GPRS | REST_INLINE_GPRS | SAVRES_MULTIPLE))
21309 == SAVE_INLINE_GPRS)
21311 int i;
21313 for (i = info->first_gp_reg_save; i < 32; i++)
21314 if (!save_reg_p (i))
21316 strategy |= REST_INLINE_GPRS;
21317 break;
21321 if (TARGET_ELF && TARGET_64BIT)
21323 if (!(strategy & SAVE_INLINE_FPRS))
21324 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21325 else if (!(strategy & SAVE_INLINE_GPRS)
21326 && info->first_fp_reg_save == 64)
21327 strategy |= SAVE_NOINLINE_GPRS_SAVES_LR;
21329 else if (TARGET_AIX && !(strategy & REST_INLINE_FPRS))
21330 strategy |= REST_NOINLINE_FPRS_DOESNT_RESTORE_LR;
21332 if (TARGET_MACHO && !(strategy & SAVE_INLINE_FPRS))
21333 strategy |= SAVE_NOINLINE_FPRS_SAVES_LR;
21335 return strategy;
21338 /* Calculate the stack information for the current function. This is
21339 complicated by having two separate calling sequences, the AIX calling
21340 sequence and the V.4 calling sequence.
21342 AIX (and Darwin/Mac OS X) stack frames look like:
21343 32-bit 64-bit
21344 SP----> +---------------------------------------+
21345 | back chain to caller | 0 0
21346 +---------------------------------------+
21347 | saved CR | 4 8 (8-11)
21348 +---------------------------------------+
21349 | saved LR | 8 16
21350 +---------------------------------------+
21351 | reserved for compilers | 12 24
21352 +---------------------------------------+
21353 | reserved for binders | 16 32
21354 +---------------------------------------+
21355 | saved TOC pointer | 20 40
21356 +---------------------------------------+
21357 | Parameter save area (P) | 24 48
21358 +---------------------------------------+
21359 | Alloca space (A) | 24+P etc.
21360 +---------------------------------------+
21361 | Local variable space (L) | 24+P+A
21362 +---------------------------------------+
21363 | Float/int conversion temporary (X) | 24+P+A+L
21364 +---------------------------------------+
21365 | Save area for AltiVec registers (W) | 24+P+A+L+X
21366 +---------------------------------------+
21367 | AltiVec alignment padding (Y) | 24+P+A+L+X+W
21368 +---------------------------------------+
21369 | Save area for VRSAVE register (Z) | 24+P+A+L+X+W+Y
21370 +---------------------------------------+
21371 | Save area for GP registers (G) | 24+P+A+X+L+X+W+Y+Z
21372 +---------------------------------------+
21373 | Save area for FP registers (F) | 24+P+A+X+L+X+W+Y+Z+G
21374 +---------------------------------------+
21375 old SP->| back chain to caller's caller |
21376 +---------------------------------------+
21378 The required alignment for AIX configurations is two words (i.e., 8
21379 or 16 bytes).
21381 The ELFv2 ABI is a variant of the AIX ABI. Stack frames look like:
21383 SP----> +---------------------------------------+
21384 | Back chain to caller | 0
21385 +---------------------------------------+
21386 | Save area for CR | 8
21387 +---------------------------------------+
21388 | Saved LR | 16
21389 +---------------------------------------+
21390 | Saved TOC pointer | 24
21391 +---------------------------------------+
21392 | Parameter save area (P) | 32
21393 +---------------------------------------+
21394 | Alloca space (A) | 32+P
21395 +---------------------------------------+
21396 | Local variable space (L) | 32+P+A
21397 +---------------------------------------+
21398 | Save area for AltiVec registers (W) | 32+P+A+L
21399 +---------------------------------------+
21400 | AltiVec alignment padding (Y) | 32+P+A+L+W
21401 +---------------------------------------+
21402 | Save area for GP registers (G) | 32+P+A+L+W+Y
21403 +---------------------------------------+
21404 | Save area for FP registers (F) | 32+P+A+L+W+Y+G
21405 +---------------------------------------+
21406 old SP->| back chain to caller's caller | 32+P+A+L+W+Y+G+F
21407 +---------------------------------------+
21410 V.4 stack frames look like:
21412 SP----> +---------------------------------------+
21413 | back chain to caller | 0
21414 +---------------------------------------+
21415 | caller's saved LR | 4
21416 +---------------------------------------+
21417 | Parameter save area (P) | 8
21418 +---------------------------------------+
21419 | Alloca space (A) | 8+P
21420 +---------------------------------------+
21421 | Varargs save area (V) | 8+P+A
21422 +---------------------------------------+
21423 | Local variable space (L) | 8+P+A+V
21424 +---------------------------------------+
21425 | Float/int conversion temporary (X) | 8+P+A+V+L
21426 +---------------------------------------+
21427 | Save area for AltiVec registers (W) | 8+P+A+V+L+X
21428 +---------------------------------------+
21429 | AltiVec alignment padding (Y) | 8+P+A+V+L+X+W
21430 +---------------------------------------+
21431 | Save area for VRSAVE register (Z) | 8+P+A+V+L+X+W+Y
21432 +---------------------------------------+
21433 | SPE: area for 64-bit GP registers |
21434 +---------------------------------------+
21435 | SPE alignment padding |
21436 +---------------------------------------+
21437 | saved CR (C) | 8+P+A+V+L+X+W+Y+Z
21438 +---------------------------------------+
21439 | Save area for GP registers (G) | 8+P+A+V+L+X+W+Y+Z+C
21440 +---------------------------------------+
21441 | Save area for FP registers (F) | 8+P+A+V+L+X+W+Y+Z+C+G
21442 +---------------------------------------+
21443 old SP->| back chain to caller's caller |
21444 +---------------------------------------+
21446 The required alignment for V.4 is 16 bytes, or 8 bytes if -meabi is
21447 given. (But note below and in sysv4.h that we require only 8 and
21448 may round up the size of our stack frame anyways. The historical
21449 reason is early versions of powerpc-linux which didn't properly
21450 align the stack at program startup. A happy side-effect is that
21451 -mno-eabi libraries can be used with -meabi programs.)
21453 The EABI configuration defaults to the V.4 layout. However,
21454 the stack alignment requirements may differ. If -mno-eabi is not
21455 given, the required stack alignment is 8 bytes; if -mno-eabi is
21456 given, the required alignment is 16 bytes. (But see V.4 comment
21457 above.) */
21459 #ifndef ABI_STACK_BOUNDARY
21460 #define ABI_STACK_BOUNDARY STACK_BOUNDARY
21461 #endif
21463 static rs6000_stack_t *
21464 rs6000_stack_info (void)
21466 rs6000_stack_t *info_ptr = &stack_info;
21467 int reg_size = TARGET_32BIT ? 4 : 8;
21468 int ehrd_size;
21469 int ehcr_size;
21470 int save_align;
21471 int first_gp;
21472 HOST_WIDE_INT non_fixed_size;
21473 bool using_static_chain_p;
21475 if (reload_completed && info_ptr->reload_completed)
21476 return info_ptr;
21478 memset (info_ptr, 0, sizeof (*info_ptr));
21479 info_ptr->reload_completed = reload_completed;
21481 if (TARGET_SPE)
21483 /* Cache value so we don't rescan instruction chain over and over. */
21484 if (cfun->machine->insn_chain_scanned_p == 0)
21485 cfun->machine->insn_chain_scanned_p
21486 = spe_func_has_64bit_regs_p () + 1;
21487 info_ptr->spe_64bit_regs_used = cfun->machine->insn_chain_scanned_p - 1;
21490 /* Select which calling sequence. */
21491 info_ptr->abi = DEFAULT_ABI;
21493 /* Calculate which registers need to be saved & save area size. */
21494 info_ptr->first_gp_reg_save = first_reg_to_save ();
21495 /* Assume that we will have to save RS6000_PIC_OFFSET_TABLE_REGNUM,
21496 even if it currently looks like we won't. Reload may need it to
21497 get at a constant; if so, it will have already created a constant
21498 pool entry for it. */
21499 if (((TARGET_TOC && TARGET_MINIMAL_TOC)
21500 || (flag_pic == 1 && DEFAULT_ABI == ABI_V4)
21501 || (flag_pic && DEFAULT_ABI == ABI_DARWIN))
21502 && crtl->uses_const_pool
21503 && info_ptr->first_gp_reg_save > RS6000_PIC_OFFSET_TABLE_REGNUM)
21504 first_gp = RS6000_PIC_OFFSET_TABLE_REGNUM;
21505 else
21506 first_gp = info_ptr->first_gp_reg_save;
21508 info_ptr->gp_size = reg_size * (32 - first_gp);
21510 /* For the SPE, we have an additional upper 32-bits on each GPR.
21511 Ideally we should save the entire 64-bits only when the upper
21512 half is used in SIMD instructions. Since we only record
21513 registers live (not the size they are used in), this proves
21514 difficult because we'd have to traverse the instruction chain at
21515 the right time, taking reload into account. This is a real pain,
21516 so we opt to save the GPRs in 64-bits always if but one register
21517 gets used in 64-bits. Otherwise, all the registers in the frame
21518 get saved in 32-bits.
21520 So... since when we save all GPRs (except the SP) in 64-bits, the
21521 traditional GP save area will be empty. */
21522 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21523 info_ptr->gp_size = 0;
21525 info_ptr->first_fp_reg_save = first_fp_reg_to_save ();
21526 info_ptr->fp_size = 8 * (64 - info_ptr->first_fp_reg_save);
21528 info_ptr->first_altivec_reg_save = first_altivec_reg_to_save ();
21529 info_ptr->altivec_size = 16 * (LAST_ALTIVEC_REGNO + 1
21530 - info_ptr->first_altivec_reg_save);
21532 /* Does this function call anything? */
21533 info_ptr->calls_p = (! crtl->is_leaf
21534 || cfun->machine->ra_needs_full_frame);
21536 /* Determine if we need to save the condition code registers. */
21537 if (df_regs_ever_live_p (CR2_REGNO)
21538 || df_regs_ever_live_p (CR3_REGNO)
21539 || df_regs_ever_live_p (CR4_REGNO))
21541 info_ptr->cr_save_p = 1;
21542 if (DEFAULT_ABI == ABI_V4)
21543 info_ptr->cr_size = reg_size;
21546 /* If the current function calls __builtin_eh_return, then we need
21547 to allocate stack space for registers that will hold data for
21548 the exception handler. */
21549 if (crtl->calls_eh_return)
21551 unsigned int i;
21552 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM; ++i)
21553 continue;
21555 /* SPE saves EH registers in 64-bits. */
21556 ehrd_size = i * (TARGET_SPE_ABI
21557 && info_ptr->spe_64bit_regs_used != 0
21558 ? UNITS_PER_SPE_WORD : UNITS_PER_WORD);
21560 else
21561 ehrd_size = 0;
21563 /* In the ELFv2 ABI, we also need to allocate space for separate
21564 CR field save areas if the function calls __builtin_eh_return. */
21565 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
21567 /* This hard-codes that we have three call-saved CR fields. */
21568 ehcr_size = 3 * reg_size;
21569 /* We do *not* use the regular CR save mechanism. */
21570 info_ptr->cr_save_p = 0;
21572 else
21573 ehcr_size = 0;
21575 /* Determine various sizes. */
21576 info_ptr->reg_size = reg_size;
21577 info_ptr->fixed_size = RS6000_SAVE_AREA;
21578 info_ptr->vars_size = RS6000_ALIGN (get_frame_size (), 8);
21579 info_ptr->parm_size = RS6000_ALIGN (crtl->outgoing_args_size,
21580 TARGET_ALTIVEC ? 16 : 8);
21581 if (FRAME_GROWS_DOWNWARD)
21582 info_ptr->vars_size
21583 += RS6000_ALIGN (info_ptr->fixed_size + info_ptr->vars_size
21584 + info_ptr->parm_size,
21585 ABI_STACK_BOUNDARY / BITS_PER_UNIT)
21586 - (info_ptr->fixed_size + info_ptr->vars_size
21587 + info_ptr->parm_size);
21589 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21590 info_ptr->spe_gp_size = 8 * (32 - first_gp);
21591 else
21592 info_ptr->spe_gp_size = 0;
21594 if (TARGET_ALTIVEC_ABI)
21595 info_ptr->vrsave_mask = compute_vrsave_mask ();
21596 else
21597 info_ptr->vrsave_mask = 0;
21599 if (TARGET_ALTIVEC_VRSAVE && info_ptr->vrsave_mask)
21600 info_ptr->vrsave_size = 4;
21601 else
21602 info_ptr->vrsave_size = 0;
21604 compute_save_world_info (info_ptr);
21606 /* Calculate the offsets. */
21607 switch (DEFAULT_ABI)
21609 case ABI_NONE:
21610 default:
21611 gcc_unreachable ();
21613 case ABI_AIX:
21614 case ABI_ELFv2:
21615 case ABI_DARWIN:
21616 info_ptr->fp_save_offset = - info_ptr->fp_size;
21617 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21619 if (TARGET_ALTIVEC_ABI)
21621 info_ptr->vrsave_save_offset
21622 = info_ptr->gp_save_offset - info_ptr->vrsave_size;
21624 /* Align stack so vector save area is on a quadword boundary.
21625 The padding goes above the vectors. */
21626 if (info_ptr->altivec_size != 0)
21627 info_ptr->altivec_padding_size
21628 = info_ptr->vrsave_save_offset & 0xF;
21629 else
21630 info_ptr->altivec_padding_size = 0;
21632 info_ptr->altivec_save_offset
21633 = info_ptr->vrsave_save_offset
21634 - info_ptr->altivec_padding_size
21635 - info_ptr->altivec_size;
21636 gcc_assert (info_ptr->altivec_size == 0
21637 || info_ptr->altivec_save_offset % 16 == 0);
21639 /* Adjust for AltiVec case. */
21640 info_ptr->ehrd_offset = info_ptr->altivec_save_offset - ehrd_size;
21642 else
21643 info_ptr->ehrd_offset = info_ptr->gp_save_offset - ehrd_size;
21645 info_ptr->ehcr_offset = info_ptr->ehrd_offset - ehcr_size;
21646 info_ptr->cr_save_offset = reg_size; /* first word when 64-bit. */
21647 info_ptr->lr_save_offset = 2*reg_size;
21648 break;
21650 case ABI_V4:
21651 info_ptr->fp_save_offset = - info_ptr->fp_size;
21652 info_ptr->gp_save_offset = info_ptr->fp_save_offset - info_ptr->gp_size;
21653 info_ptr->cr_save_offset = info_ptr->gp_save_offset - info_ptr->cr_size;
21655 if (TARGET_SPE_ABI && info_ptr->spe_64bit_regs_used != 0)
21657 /* Align stack so SPE GPR save area is aligned on a
21658 double-word boundary. */
21659 if (info_ptr->spe_gp_size != 0 && info_ptr->cr_save_offset != 0)
21660 info_ptr->spe_padding_size
21661 = 8 - (-info_ptr->cr_save_offset % 8);
21662 else
21663 info_ptr->spe_padding_size = 0;
21665 info_ptr->spe_gp_save_offset
21666 = info_ptr->cr_save_offset
21667 - info_ptr->spe_padding_size
21668 - info_ptr->spe_gp_size;
21670 /* Adjust for SPE case. */
21671 info_ptr->ehrd_offset = info_ptr->spe_gp_save_offset;
21673 else if (TARGET_ALTIVEC_ABI)
21675 info_ptr->vrsave_save_offset
21676 = info_ptr->cr_save_offset - info_ptr->vrsave_size;
21678 /* Align stack so vector save area is on a quadword boundary. */
21679 if (info_ptr->altivec_size != 0)
21680 info_ptr->altivec_padding_size
21681 = 16 - (-info_ptr->vrsave_save_offset % 16);
21682 else
21683 info_ptr->altivec_padding_size = 0;
21685 info_ptr->altivec_save_offset
21686 = info_ptr->vrsave_save_offset
21687 - info_ptr->altivec_padding_size
21688 - info_ptr->altivec_size;
21690 /* Adjust for AltiVec case. */
21691 info_ptr->ehrd_offset = info_ptr->altivec_save_offset;
21693 else
21694 info_ptr->ehrd_offset = info_ptr->cr_save_offset;
21695 info_ptr->ehrd_offset -= ehrd_size;
21696 info_ptr->lr_save_offset = reg_size;
21697 break;
21700 save_align = (TARGET_ALTIVEC_ABI || DEFAULT_ABI == ABI_DARWIN) ? 16 : 8;
21701 info_ptr->save_size = RS6000_ALIGN (info_ptr->fp_size
21702 + info_ptr->gp_size
21703 + info_ptr->altivec_size
21704 + info_ptr->altivec_padding_size
21705 + info_ptr->spe_gp_size
21706 + info_ptr->spe_padding_size
21707 + ehrd_size
21708 + ehcr_size
21709 + info_ptr->cr_size
21710 + info_ptr->vrsave_size,
21711 save_align);
21713 non_fixed_size = (info_ptr->vars_size
21714 + info_ptr->parm_size
21715 + info_ptr->save_size);
21717 info_ptr->total_size = RS6000_ALIGN (non_fixed_size + info_ptr->fixed_size,
21718 ABI_STACK_BOUNDARY / BITS_PER_UNIT);
21720 /* Determine if we need to save the link register. */
21721 if (info_ptr->calls_p
21722 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
21723 && crtl->profile
21724 && !TARGET_PROFILE_KERNEL)
21725 || (DEFAULT_ABI == ABI_V4 && cfun->calls_alloca)
21726 #ifdef TARGET_RELOCATABLE
21727 || (TARGET_RELOCATABLE && (get_pool_size () != 0))
21728 #endif
21729 || rs6000_ra_ever_killed ())
21730 info_ptr->lr_save_p = 1;
21732 using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
21733 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
21734 && call_used_regs[STATIC_CHAIN_REGNUM]);
21735 info_ptr->savres_strategy = rs6000_savres_strategy (info_ptr,
21736 using_static_chain_p);
21738 if (!(info_ptr->savres_strategy & SAVE_INLINE_GPRS)
21739 || !(info_ptr->savres_strategy & SAVE_INLINE_FPRS)
21740 || !(info_ptr->savres_strategy & SAVE_INLINE_VRS)
21741 || !(info_ptr->savres_strategy & REST_INLINE_GPRS)
21742 || !(info_ptr->savres_strategy & REST_INLINE_FPRS)
21743 || !(info_ptr->savres_strategy & REST_INLINE_VRS))
21744 info_ptr->lr_save_p = 1;
21746 if (info_ptr->lr_save_p)
21747 df_set_regs_ever_live (LR_REGNO, true);
21749 /* Determine if we need to allocate any stack frame:
21751 For AIX we need to push the stack if a frame pointer is needed
21752 (because the stack might be dynamically adjusted), if we are
21753 debugging, if we make calls, or if the sum of fp_save, gp_save,
21754 and local variables are more than the space needed to save all
21755 non-volatile registers: 32-bit: 18*8 + 19*4 = 220 or 64-bit: 18*8
21756 + 18*8 = 288 (GPR13 reserved).
21758 For V.4 we don't have the stack cushion that AIX uses, but assume
21759 that the debugger can handle stackless frames. */
21761 if (info_ptr->calls_p)
21762 info_ptr->push_p = 1;
21764 else if (DEFAULT_ABI == ABI_V4)
21765 info_ptr->push_p = non_fixed_size != 0;
21767 else if (frame_pointer_needed)
21768 info_ptr->push_p = 1;
21770 else if (TARGET_XCOFF && write_symbols != NO_DEBUG)
21771 info_ptr->push_p = 1;
21773 else
21774 info_ptr->push_p = non_fixed_size > (TARGET_32BIT ? 220 : 288);
21776 /* Zero offsets if we're not saving those registers. */
21777 if (info_ptr->fp_size == 0)
21778 info_ptr->fp_save_offset = 0;
21780 if (info_ptr->gp_size == 0)
21781 info_ptr->gp_save_offset = 0;
21783 if (! TARGET_ALTIVEC_ABI || info_ptr->altivec_size == 0)
21784 info_ptr->altivec_save_offset = 0;
21786 /* Zero VRSAVE offset if not saved and restored. */
21787 if (! TARGET_ALTIVEC_VRSAVE || info_ptr->vrsave_mask == 0)
21788 info_ptr->vrsave_save_offset = 0;
21790 if (! TARGET_SPE_ABI
21791 || info_ptr->spe_64bit_regs_used == 0
21792 || info_ptr->spe_gp_size == 0)
21793 info_ptr->spe_gp_save_offset = 0;
21795 if (! info_ptr->lr_save_p)
21796 info_ptr->lr_save_offset = 0;
21798 if (! info_ptr->cr_save_p)
21799 info_ptr->cr_save_offset = 0;
21801 return info_ptr;
21804 /* Return true if the current function uses any GPRs in 64-bit SIMD
21805 mode. */
21807 static bool
21808 spe_func_has_64bit_regs_p (void)
21810 rtx_insn *insns, *insn;
21812 /* Functions that save and restore all the call-saved registers will
21813 need to save/restore the registers in 64-bits. */
21814 if (crtl->calls_eh_return
21815 || cfun->calls_setjmp
21816 || crtl->has_nonlocal_goto)
21817 return true;
21819 insns = get_insns ();
21821 for (insn = NEXT_INSN (insns); insn != NULL_RTX; insn = NEXT_INSN (insn))
21823 if (INSN_P (insn))
21825 rtx i;
21827 /* FIXME: This should be implemented with attributes...
21829 (set_attr "spe64" "true")....then,
21830 if (get_spe64(insn)) return true;
21832 It's the only reliable way to do the stuff below. */
21834 i = PATTERN (insn);
21835 if (GET_CODE (i) == SET)
21837 machine_mode mode = GET_MODE (SET_SRC (i));
21839 if (SPE_VECTOR_MODE (mode))
21840 return true;
21841 if (TARGET_E500_DOUBLE && (mode == DFmode || mode == TFmode))
21842 return true;
21847 return false;
21850 static void
21851 debug_stack_info (rs6000_stack_t *info)
21853 const char *abi_string;
21855 if (! info)
21856 info = rs6000_stack_info ();
21858 fprintf (stderr, "\nStack information for function %s:\n",
21859 ((current_function_decl && DECL_NAME (current_function_decl))
21860 ? IDENTIFIER_POINTER (DECL_NAME (current_function_decl))
21861 : "<unknown>"));
21863 switch (info->abi)
21865 default: abi_string = "Unknown"; break;
21866 case ABI_NONE: abi_string = "NONE"; break;
21867 case ABI_AIX: abi_string = "AIX"; break;
21868 case ABI_ELFv2: abi_string = "ELFv2"; break;
21869 case ABI_DARWIN: abi_string = "Darwin"; break;
21870 case ABI_V4: abi_string = "V.4"; break;
21873 fprintf (stderr, "\tABI = %5s\n", abi_string);
21875 if (TARGET_ALTIVEC_ABI)
21876 fprintf (stderr, "\tALTIVEC ABI extensions enabled.\n");
21878 if (TARGET_SPE_ABI)
21879 fprintf (stderr, "\tSPE ABI extensions enabled.\n");
21881 if (info->first_gp_reg_save != 32)
21882 fprintf (stderr, "\tfirst_gp_reg_save = %5d\n", info->first_gp_reg_save);
21884 if (info->first_fp_reg_save != 64)
21885 fprintf (stderr, "\tfirst_fp_reg_save = %5d\n", info->first_fp_reg_save);
21887 if (info->first_altivec_reg_save <= LAST_ALTIVEC_REGNO)
21888 fprintf (stderr, "\tfirst_altivec_reg_save = %5d\n",
21889 info->first_altivec_reg_save);
21891 if (info->lr_save_p)
21892 fprintf (stderr, "\tlr_save_p = %5d\n", info->lr_save_p);
21894 if (info->cr_save_p)
21895 fprintf (stderr, "\tcr_save_p = %5d\n", info->cr_save_p);
21897 if (info->vrsave_mask)
21898 fprintf (stderr, "\tvrsave_mask = 0x%x\n", info->vrsave_mask);
21900 if (info->push_p)
21901 fprintf (stderr, "\tpush_p = %5d\n", info->push_p);
21903 if (info->calls_p)
21904 fprintf (stderr, "\tcalls_p = %5d\n", info->calls_p);
21906 if (info->gp_save_offset)
21907 fprintf (stderr, "\tgp_save_offset = %5d\n", info->gp_save_offset);
21909 if (info->fp_save_offset)
21910 fprintf (stderr, "\tfp_save_offset = %5d\n", info->fp_save_offset);
21912 if (info->altivec_save_offset)
21913 fprintf (stderr, "\taltivec_save_offset = %5d\n",
21914 info->altivec_save_offset);
21916 if (info->spe_gp_save_offset)
21917 fprintf (stderr, "\tspe_gp_save_offset = %5d\n",
21918 info->spe_gp_save_offset);
21920 if (info->vrsave_save_offset)
21921 fprintf (stderr, "\tvrsave_save_offset = %5d\n",
21922 info->vrsave_save_offset);
21924 if (info->lr_save_offset)
21925 fprintf (stderr, "\tlr_save_offset = %5d\n", info->lr_save_offset);
21927 if (info->cr_save_offset)
21928 fprintf (stderr, "\tcr_save_offset = %5d\n", info->cr_save_offset);
21930 if (info->varargs_save_offset)
21931 fprintf (stderr, "\tvarargs_save_offset = %5d\n", info->varargs_save_offset);
21933 if (info->total_size)
21934 fprintf (stderr, "\ttotal_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21935 info->total_size);
21937 if (info->vars_size)
21938 fprintf (stderr, "\tvars_size = "HOST_WIDE_INT_PRINT_DEC"\n",
21939 info->vars_size);
21941 if (info->parm_size)
21942 fprintf (stderr, "\tparm_size = %5d\n", info->parm_size);
21944 if (info->fixed_size)
21945 fprintf (stderr, "\tfixed_size = %5d\n", info->fixed_size);
21947 if (info->gp_size)
21948 fprintf (stderr, "\tgp_size = %5d\n", info->gp_size);
21950 if (info->spe_gp_size)
21951 fprintf (stderr, "\tspe_gp_size = %5d\n", info->spe_gp_size);
21953 if (info->fp_size)
21954 fprintf (stderr, "\tfp_size = %5d\n", info->fp_size);
21956 if (info->altivec_size)
21957 fprintf (stderr, "\taltivec_size = %5d\n", info->altivec_size);
21959 if (info->vrsave_size)
21960 fprintf (stderr, "\tvrsave_size = %5d\n", info->vrsave_size);
21962 if (info->altivec_padding_size)
21963 fprintf (stderr, "\taltivec_padding_size= %5d\n",
21964 info->altivec_padding_size);
21966 if (info->spe_padding_size)
21967 fprintf (stderr, "\tspe_padding_size = %5d\n",
21968 info->spe_padding_size);
21970 if (info->cr_size)
21971 fprintf (stderr, "\tcr_size = %5d\n", info->cr_size);
21973 if (info->save_size)
21974 fprintf (stderr, "\tsave_size = %5d\n", info->save_size);
21976 if (info->reg_size != 4)
21977 fprintf (stderr, "\treg_size = %5d\n", info->reg_size);
21979 fprintf (stderr, "\tsave-strategy = %04x\n", info->savres_strategy);
21981 fprintf (stderr, "\n");
21985 rs6000_return_addr (int count, rtx frame)
21987 /* Currently we don't optimize very well between prolog and body
21988 code and for PIC code the code can be actually quite bad, so
21989 don't try to be too clever here. */
21990 if (count != 0
21991 || ((DEFAULT_ABI == ABI_V4 || DEFAULT_ABI == ABI_DARWIN) && flag_pic))
21993 cfun->machine->ra_needs_full_frame = 1;
21995 return
21996 gen_rtx_MEM
21997 (Pmode,
21998 memory_address
21999 (Pmode,
22000 plus_constant (Pmode,
22001 copy_to_reg
22002 (gen_rtx_MEM (Pmode,
22003 memory_address (Pmode, frame))),
22004 RETURN_ADDRESS_OFFSET)));
22007 cfun->machine->ra_need_lr = 1;
22008 return get_hard_reg_initial_val (Pmode, LR_REGNO);
22011 /* Say whether a function is a candidate for sibcall handling or not. */
22013 static bool
22014 rs6000_function_ok_for_sibcall (tree decl, tree exp)
22016 tree fntype;
22018 if (decl)
22019 fntype = TREE_TYPE (decl);
22020 else
22021 fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
22023 /* We can't do it if the called function has more vector parameters
22024 than the current function; there's nowhere to put the VRsave code. */
22025 if (TARGET_ALTIVEC_ABI
22026 && TARGET_ALTIVEC_VRSAVE
22027 && !(decl && decl == current_function_decl))
22029 function_args_iterator args_iter;
22030 tree type;
22031 int nvreg = 0;
22033 /* Functions with vector parameters are required to have a
22034 prototype, so the argument type info must be available
22035 here. */
22036 FOREACH_FUNCTION_ARGS(fntype, type, args_iter)
22037 if (TREE_CODE (type) == VECTOR_TYPE
22038 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22039 nvreg++;
22041 FOREACH_FUNCTION_ARGS(TREE_TYPE (current_function_decl), type, args_iter)
22042 if (TREE_CODE (type) == VECTOR_TYPE
22043 && ALTIVEC_OR_VSX_VECTOR_MODE (TYPE_MODE (type)))
22044 nvreg--;
22046 if (nvreg > 0)
22047 return false;
22050 /* Under the AIX or ELFv2 ABIs we can't allow calls to non-local
22051 functions, because the callee may have a different TOC pointer to
22052 the caller and there's no way to ensure we restore the TOC when
22053 we return. With the secure-plt SYSV ABI we can't make non-local
22054 calls when -fpic/PIC because the plt call stubs use r30. */
22055 if (DEFAULT_ABI == ABI_DARWIN
22056 || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22057 && decl
22058 && !DECL_EXTERNAL (decl)
22059 && (*targetm.binds_local_p) (decl))
22060 || (DEFAULT_ABI == ABI_V4
22061 && (!TARGET_SECURE_PLT
22062 || !flag_pic
22063 || (decl
22064 && (*targetm.binds_local_p) (decl)))))
22066 tree attr_list = TYPE_ATTRIBUTES (fntype);
22068 if (!lookup_attribute ("longcall", attr_list)
22069 || lookup_attribute ("shortcall", attr_list))
22070 return true;
22073 return false;
22076 static int
22077 rs6000_ra_ever_killed (void)
22079 rtx_insn *top;
22080 rtx reg;
22081 rtx_insn *insn;
22083 if (cfun->is_thunk)
22084 return 0;
22086 if (cfun->machine->lr_save_state)
22087 return cfun->machine->lr_save_state - 1;
22089 /* regs_ever_live has LR marked as used if any sibcalls are present,
22090 but this should not force saving and restoring in the
22091 pro/epilogue. Likewise, reg_set_between_p thinks a sibcall
22092 clobbers LR, so that is inappropriate. */
22094 /* Also, the prologue can generate a store into LR that
22095 doesn't really count, like this:
22097 move LR->R0
22098 bcl to set PIC register
22099 move LR->R31
22100 move R0->LR
22102 When we're called from the epilogue, we need to avoid counting
22103 this as a store. */
22105 push_topmost_sequence ();
22106 top = get_insns ();
22107 pop_topmost_sequence ();
22108 reg = gen_rtx_REG (Pmode, LR_REGNO);
22110 for (insn = NEXT_INSN (top); insn != NULL_RTX; insn = NEXT_INSN (insn))
22112 if (INSN_P (insn))
22114 if (CALL_P (insn))
22116 if (!SIBLING_CALL_P (insn))
22117 return 1;
22119 else if (find_regno_note (insn, REG_INC, LR_REGNO))
22120 return 1;
22121 else if (set_of (reg, insn) != NULL_RTX
22122 && !prologue_epilogue_contains (insn))
22123 return 1;
22126 return 0;
22129 /* Emit instructions needed to load the TOC register.
22130 This is only needed when TARGET_TOC, TARGET_MINIMAL_TOC, and there is
22131 a constant pool; or for SVR4 -fpic. */
22133 void
22134 rs6000_emit_load_toc_table (int fromprolog)
22136 rtx dest;
22137 dest = gen_rtx_REG (Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
22139 if (TARGET_ELF && TARGET_SECURE_PLT && DEFAULT_ABI == ABI_V4 && flag_pic)
22141 char buf[30];
22142 rtx lab, tmp1, tmp2, got;
22144 lab = gen_label_rtx ();
22145 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (lab));
22146 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22147 if (flag_pic == 2)
22148 got = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22149 else
22150 got = rs6000_got_sym ();
22151 tmp1 = tmp2 = dest;
22152 if (!fromprolog)
22154 tmp1 = gen_reg_rtx (Pmode);
22155 tmp2 = gen_reg_rtx (Pmode);
22157 emit_insn (gen_load_toc_v4_PIC_1 (lab));
22158 emit_move_insn (tmp1, gen_rtx_REG (Pmode, LR_REGNO));
22159 emit_insn (gen_load_toc_v4_PIC_3b (tmp2, tmp1, got, lab));
22160 emit_insn (gen_load_toc_v4_PIC_3c (dest, tmp2, got, lab));
22162 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 1)
22164 emit_insn (gen_load_toc_v4_pic_si ());
22165 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22167 else if (TARGET_ELF && DEFAULT_ABI == ABI_V4 && flag_pic == 2)
22169 char buf[30];
22170 rtx temp0 = (fromprolog
22171 ? gen_rtx_REG (Pmode, 0)
22172 : gen_reg_rtx (Pmode));
22174 if (fromprolog)
22176 rtx symF, symL;
22178 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
22179 symF = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22181 ASM_GENERATE_INTERNAL_LABEL (buf, "LCL", rs6000_pic_labelno);
22182 symL = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22184 emit_insn (gen_load_toc_v4_PIC_1 (symF));
22185 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22186 emit_insn (gen_load_toc_v4_PIC_2 (temp0, dest, symL, symF));
22188 else
22190 rtx tocsym, lab;
22192 tocsym = gen_rtx_SYMBOL_REF (Pmode, toc_label_name);
22193 lab = gen_label_rtx ();
22194 emit_insn (gen_load_toc_v4_PIC_1b (tocsym, lab));
22195 emit_move_insn (dest, gen_rtx_REG (Pmode, LR_REGNO));
22196 if (TARGET_LINK_STACK)
22197 emit_insn (gen_addsi3 (dest, dest, GEN_INT (4)));
22198 emit_move_insn (temp0, gen_rtx_MEM (Pmode, dest));
22200 emit_insn (gen_addsi3 (dest, temp0, dest));
22202 else if (TARGET_ELF && !TARGET_AIX && flag_pic == 0 && TARGET_MINIMAL_TOC)
22204 /* This is for AIX code running in non-PIC ELF32. */
22205 char buf[30];
22206 rtx realsym;
22207 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
22208 realsym = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
22210 emit_insn (gen_elf_high (dest, realsym));
22211 emit_insn (gen_elf_low (dest, dest, realsym));
22213 else
22215 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
22217 if (TARGET_32BIT)
22218 emit_insn (gen_load_toc_aix_si (dest));
22219 else
22220 emit_insn (gen_load_toc_aix_di (dest));
22224 /* Emit instructions to restore the link register after determining where
22225 its value has been stored. */
22227 void
22228 rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
22230 rs6000_stack_t *info = rs6000_stack_info ();
22231 rtx operands[2];
22233 operands[0] = source;
22234 operands[1] = scratch;
22236 if (info->lr_save_p)
22238 rtx frame_rtx = stack_pointer_rtx;
22239 HOST_WIDE_INT sp_offset = 0;
22240 rtx tmp;
22242 if (frame_pointer_needed
22243 || cfun->calls_alloca
22244 || info->total_size > 32767)
22246 tmp = gen_frame_mem (Pmode, frame_rtx);
22247 emit_move_insn (operands[1], tmp);
22248 frame_rtx = operands[1];
22250 else if (info->push_p)
22251 sp_offset = info->total_size;
22253 tmp = plus_constant (Pmode, frame_rtx,
22254 info->lr_save_offset + sp_offset);
22255 tmp = gen_frame_mem (Pmode, tmp);
22256 emit_move_insn (tmp, operands[0]);
22258 else
22259 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNO), operands[0]);
22261 /* Freeze lr_save_p. We've just emitted rtl that depends on the
22262 state of lr_save_p so any change from here on would be a bug. In
22263 particular, stop rs6000_ra_ever_killed from considering the SET
22264 of lr we may have added just above. */
22265 cfun->machine->lr_save_state = info->lr_save_p + 1;
22268 static GTY(()) alias_set_type set = -1;
22270 alias_set_type
22271 get_TOC_alias_set (void)
22273 if (set == -1)
22274 set = new_alias_set ();
22275 return set;
22278 /* This returns nonzero if the current function uses the TOC. This is
22279 determined by the presence of (use (unspec ... UNSPEC_TOC)), which
22280 is generated by the ABI_V4 load_toc_* patterns. */
22281 #if TARGET_ELF
22282 static int
22283 uses_TOC (void)
22285 rtx_insn *insn;
22287 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
22288 if (INSN_P (insn))
22290 rtx pat = PATTERN (insn);
22291 int i;
22293 if (GET_CODE (pat) == PARALLEL)
22294 for (i = 0; i < XVECLEN (pat, 0); i++)
22296 rtx sub = XVECEXP (pat, 0, i);
22297 if (GET_CODE (sub) == USE)
22299 sub = XEXP (sub, 0);
22300 if (GET_CODE (sub) == UNSPEC
22301 && XINT (sub, 1) == UNSPEC_TOC)
22302 return 1;
22306 return 0;
22308 #endif
22311 create_TOC_reference (rtx symbol, rtx largetoc_reg)
22313 rtx tocrel, tocreg, hi;
22315 if (TARGET_DEBUG_ADDR)
22317 if (GET_CODE (symbol) == SYMBOL_REF)
22318 fprintf (stderr, "\ncreate_TOC_reference, (symbol_ref %s)\n",
22319 XSTR (symbol, 0));
22320 else
22322 fprintf (stderr, "\ncreate_TOC_reference, code %s:\n",
22323 GET_RTX_NAME (GET_CODE (symbol)));
22324 debug_rtx (symbol);
22328 if (!can_create_pseudo_p ())
22329 df_set_regs_ever_live (TOC_REGISTER, true);
22331 tocreg = gen_rtx_REG (Pmode, TOC_REGISTER);
22332 tocrel = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, symbol, tocreg), UNSPEC_TOCREL);
22333 if (TARGET_CMODEL == CMODEL_SMALL || can_create_pseudo_p ())
22334 return tocrel;
22336 hi = gen_rtx_HIGH (Pmode, copy_rtx (tocrel));
22337 if (largetoc_reg != NULL)
22339 emit_move_insn (largetoc_reg, hi);
22340 hi = largetoc_reg;
22342 return gen_rtx_LO_SUM (Pmode, hi, tocrel);
22345 /* Issue assembly directives that create a reference to the given DWARF
22346 FRAME_TABLE_LABEL from the current function section. */
22347 void
22348 rs6000_aix_asm_output_dwarf_table_ref (char * frame_table_label)
22350 fprintf (asm_out_file, "\t.ref %s\n",
22351 (* targetm.strip_name_encoding) (frame_table_label));
22354 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
22355 and the change to the stack pointer. */
22357 static void
22358 rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
22360 rtvec p;
22361 int i;
22362 rtx regs[3];
22364 i = 0;
22365 regs[i++] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22366 if (hard_frame_needed)
22367 regs[i++] = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
22368 if (!(REGNO (fp) == STACK_POINTER_REGNUM
22369 || (hard_frame_needed
22370 && REGNO (fp) == HARD_FRAME_POINTER_REGNUM)))
22371 regs[i++] = fp;
22373 p = rtvec_alloc (i);
22374 while (--i >= 0)
22376 rtx mem = gen_frame_mem (BLKmode, regs[i]);
22377 RTVEC_ELT (p, i) = gen_rtx_SET (VOIDmode, mem, const0_rtx);
22380 emit_insn (gen_stack_tie (gen_rtx_PARALLEL (VOIDmode, p)));
22383 /* Emit the correct code for allocating stack space, as insns.
22384 If COPY_REG, make sure a copy of the old frame is left there.
22385 The generated code may use hard register 0 as a temporary. */
22387 static void
22388 rs6000_emit_allocate_stack (HOST_WIDE_INT size, rtx copy_reg, int copy_off)
22390 rtx_insn *insn;
22391 rtx stack_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
22392 rtx tmp_reg = gen_rtx_REG (Pmode, 0);
22393 rtx todec = gen_int_mode (-size, Pmode);
22394 rtx par, set, mem;
22396 if (INTVAL (todec) != -size)
22398 warning (0, "stack frame too large");
22399 emit_insn (gen_trap ());
22400 return;
22403 if (crtl->limit_stack)
22405 if (REG_P (stack_limit_rtx)
22406 && REGNO (stack_limit_rtx) > 1
22407 && REGNO (stack_limit_rtx) <= 31)
22409 emit_insn (gen_add3_insn (tmp_reg, stack_limit_rtx, GEN_INT (size)));
22410 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22411 const0_rtx));
22413 else if (GET_CODE (stack_limit_rtx) == SYMBOL_REF
22414 && TARGET_32BIT
22415 && DEFAULT_ABI == ABI_V4)
22417 rtx toload = gen_rtx_CONST (VOIDmode,
22418 gen_rtx_PLUS (Pmode,
22419 stack_limit_rtx,
22420 GEN_INT (size)));
22422 emit_insn (gen_elf_high (tmp_reg, toload));
22423 emit_insn (gen_elf_low (tmp_reg, tmp_reg, toload));
22424 emit_insn (gen_cond_trap (LTU, stack_reg, tmp_reg,
22425 const0_rtx));
22427 else
22428 warning (0, "stack limit expression is not supported");
22431 if (copy_reg)
22433 if (copy_off != 0)
22434 emit_insn (gen_add3_insn (copy_reg, stack_reg, GEN_INT (copy_off)));
22435 else
22436 emit_move_insn (copy_reg, stack_reg);
22439 if (size > 32767)
22441 /* Need a note here so that try_split doesn't get confused. */
22442 if (get_last_insn () == NULL_RTX)
22443 emit_note (NOTE_INSN_DELETED);
22444 insn = emit_move_insn (tmp_reg, todec);
22445 try_split (PATTERN (insn), insn, 0);
22446 todec = tmp_reg;
22449 insn = emit_insn (TARGET_32BIT
22450 ? gen_movsi_update_stack (stack_reg, stack_reg,
22451 todec, stack_reg)
22452 : gen_movdi_di_update_stack (stack_reg, stack_reg,
22453 todec, stack_reg));
22454 /* Since we didn't use gen_frame_mem to generate the MEM, grab
22455 it now and set the alias set/attributes. The above gen_*_update
22456 calls will generate a PARALLEL with the MEM set being the first
22457 operation. */
22458 par = PATTERN (insn);
22459 gcc_assert (GET_CODE (par) == PARALLEL);
22460 set = XVECEXP (par, 0, 0);
22461 gcc_assert (GET_CODE (set) == SET);
22462 mem = SET_DEST (set);
22463 gcc_assert (MEM_P (mem));
22464 MEM_NOTRAP_P (mem) = 1;
22465 set_mem_alias_set (mem, get_frame_alias_set ());
22467 RTX_FRAME_RELATED_P (insn) = 1;
22468 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
22469 gen_rtx_SET (VOIDmode, stack_reg,
22470 gen_rtx_PLUS (Pmode, stack_reg,
22471 GEN_INT (-size))));
22474 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
22476 #if PROBE_INTERVAL > 32768
22477 #error Cannot use indexed addressing mode for stack probing
22478 #endif
22480 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
22481 inclusive. These are offsets from the current stack pointer. */
22483 static void
22484 rs6000_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
22486 /* See if we have a constant small number of probes to generate. If so,
22487 that's the easy case. */
22488 if (first + size <= 32768)
22490 HOST_WIDE_INT i;
22492 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 1 until
22493 it exceeds SIZE. If only one probe is needed, this will not
22494 generate any code. Then probe at FIRST + SIZE. */
22495 for (i = PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
22496 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22497 -(first + i)));
22499 emit_stack_probe (plus_constant (Pmode, stack_pointer_rtx,
22500 -(first + size)));
22503 /* Otherwise, do the same as above, but in a loop. Note that we must be
22504 extra careful with variables wrapping around because we might be at
22505 the very top (or the very bottom) of the address space and we have
22506 to be able to handle this case properly; in particular, we use an
22507 equality test for the loop condition. */
22508 else
22510 HOST_WIDE_INT rounded_size;
22511 rtx r12 = gen_rtx_REG (Pmode, 12);
22512 rtx r0 = gen_rtx_REG (Pmode, 0);
22514 /* Sanity check for the addressing mode we're going to use. */
22515 gcc_assert (first <= 32768);
22517 /* Step 1: round SIZE to the previous multiple of the interval. */
22519 rounded_size = size & -PROBE_INTERVAL;
22522 /* Step 2: compute initial and final value of the loop counter. */
22524 /* TEST_ADDR = SP + FIRST. */
22525 emit_insn (gen_rtx_SET (VOIDmode, r12,
22526 plus_constant (Pmode, stack_pointer_rtx,
22527 -first)));
22529 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
22530 if (rounded_size > 32768)
22532 emit_move_insn (r0, GEN_INT (-rounded_size));
22533 emit_insn (gen_rtx_SET (VOIDmode, r0,
22534 gen_rtx_PLUS (Pmode, r12, r0)));
22536 else
22537 emit_insn (gen_rtx_SET (VOIDmode, r0,
22538 plus_constant (Pmode, r12, -rounded_size)));
22541 /* Step 3: the loop
22543 while (TEST_ADDR != LAST_ADDR)
22545 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
22546 probe at TEST_ADDR
22549 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
22550 until it is equal to ROUNDED_SIZE. */
22552 if (TARGET_64BIT)
22553 emit_insn (gen_probe_stack_rangedi (r12, r12, r0));
22554 else
22555 emit_insn (gen_probe_stack_rangesi (r12, r12, r0));
22558 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
22559 that SIZE is equal to ROUNDED_SIZE. */
22561 if (size != rounded_size)
22562 emit_stack_probe (plus_constant (Pmode, r12, rounded_size - size));
22566 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
22567 absolute addresses. */
22569 const char *
22570 output_probe_stack_range (rtx reg1, rtx reg2)
22572 static int labelno = 0;
22573 char loop_lab[32], end_lab[32];
22574 rtx xops[2];
22576 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno);
22577 ASM_GENERATE_INTERNAL_LABEL (end_lab, "LPSRE", labelno++);
22579 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
22581 /* Jump to END_LAB if TEST_ADDR == LAST_ADDR. */
22582 xops[0] = reg1;
22583 xops[1] = reg2;
22584 if (TARGET_64BIT)
22585 output_asm_insn ("cmpd 0,%0,%1", xops);
22586 else
22587 output_asm_insn ("cmpw 0,%0,%1", xops);
22589 fputs ("\tbeq 0,", asm_out_file);
22590 assemble_name_raw (asm_out_file, end_lab);
22591 fputc ('\n', asm_out_file);
22593 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
22594 xops[1] = GEN_INT (-PROBE_INTERVAL);
22595 output_asm_insn ("addi %0,%0,%1", xops);
22597 /* Probe at TEST_ADDR and branch. */
22598 xops[1] = gen_rtx_REG (Pmode, 0);
22599 output_asm_insn ("stw %1,0(%0)", xops);
22600 fprintf (asm_out_file, "\tb ");
22601 assemble_name_raw (asm_out_file, loop_lab);
22602 fputc ('\n', asm_out_file);
22604 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, end_lab);
22606 return "";
22609 /* Add to 'insn' a note which is PATTERN (INSN) but with REG replaced
22610 with (plus:P (reg 1) VAL), and with REG2 replaced with RREG if REG2
22611 is not NULL. It would be nice if dwarf2out_frame_debug_expr could
22612 deduce these equivalences by itself so it wasn't necessary to hold
22613 its hand so much. Don't be tempted to always supply d2_f_d_e with
22614 the actual cfa register, ie. r31 when we are using a hard frame
22615 pointer. That fails when saving regs off r1, and sched moves the
22616 r31 setup past the reg saves. */
22618 static rtx
22619 rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val,
22620 rtx reg2, rtx rreg, rtx split_reg)
22622 rtx real, temp;
22624 if (REGNO (reg) == STACK_POINTER_REGNUM && reg2 == NULL_RTX)
22626 /* No need for any replacement. Just set RTX_FRAME_RELATED_P. */
22627 int i;
22629 gcc_checking_assert (val == 0);
22630 real = PATTERN (insn);
22631 if (GET_CODE (real) == PARALLEL)
22632 for (i = 0; i < XVECLEN (real, 0); i++)
22633 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22635 rtx set = XVECEXP (real, 0, i);
22637 RTX_FRAME_RELATED_P (set) = 1;
22639 RTX_FRAME_RELATED_P (insn) = 1;
22640 return insn;
22643 /* copy_rtx will not make unique copies of registers, so we need to
22644 ensure we don't have unwanted sharing here. */
22645 if (reg == reg2)
22646 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22648 if (reg == rreg)
22649 reg = gen_raw_REG (GET_MODE (reg), REGNO (reg));
22651 real = copy_rtx (PATTERN (insn));
22653 if (reg2 != NULL_RTX)
22654 real = replace_rtx (real, reg2, rreg);
22656 if (REGNO (reg) == STACK_POINTER_REGNUM)
22657 gcc_checking_assert (val == 0);
22658 else
22659 real = replace_rtx (real, reg,
22660 gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode,
22661 STACK_POINTER_REGNUM),
22662 GEN_INT (val)));
22664 /* We expect that 'real' is either a SET or a PARALLEL containing
22665 SETs (and possibly other stuff). In a PARALLEL, all the SETs
22666 are important so they all have to be marked RTX_FRAME_RELATED_P. */
22668 if (GET_CODE (real) == SET)
22670 rtx set = real;
22672 temp = simplify_rtx (SET_SRC (set));
22673 if (temp)
22674 SET_SRC (set) = temp;
22675 temp = simplify_rtx (SET_DEST (set));
22676 if (temp)
22677 SET_DEST (set) = temp;
22678 if (GET_CODE (SET_DEST (set)) == MEM)
22680 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22681 if (temp)
22682 XEXP (SET_DEST (set), 0) = temp;
22685 else
22687 int i;
22689 gcc_assert (GET_CODE (real) == PARALLEL);
22690 for (i = 0; i < XVECLEN (real, 0); i++)
22691 if (GET_CODE (XVECEXP (real, 0, i)) == SET)
22693 rtx set = XVECEXP (real, 0, i);
22695 temp = simplify_rtx (SET_SRC (set));
22696 if (temp)
22697 SET_SRC (set) = temp;
22698 temp = simplify_rtx (SET_DEST (set));
22699 if (temp)
22700 SET_DEST (set) = temp;
22701 if (GET_CODE (SET_DEST (set)) == MEM)
22703 temp = simplify_rtx (XEXP (SET_DEST (set), 0));
22704 if (temp)
22705 XEXP (SET_DEST (set), 0) = temp;
22707 RTX_FRAME_RELATED_P (set) = 1;
22711 /* If a store insn has been split into multiple insns, the
22712 true source register is given by split_reg. */
22713 if (split_reg != NULL_RTX)
22714 real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg);
22716 RTX_FRAME_RELATED_P (insn) = 1;
22717 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
22719 return insn;
22722 /* Returns an insn that has a vrsave set operation with the
22723 appropriate CLOBBERs. */
22725 static rtx
22726 generate_set_vrsave (rtx reg, rs6000_stack_t *info, int epiloguep)
22728 int nclobs, i;
22729 rtx insn, clobs[TOTAL_ALTIVEC_REGS + 1];
22730 rtx vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
22732 clobs[0]
22733 = gen_rtx_SET (VOIDmode,
22734 vrsave,
22735 gen_rtx_UNSPEC_VOLATILE (SImode,
22736 gen_rtvec (2, reg, vrsave),
22737 UNSPECV_SET_VRSAVE));
22739 nclobs = 1;
22741 /* We need to clobber the registers in the mask so the scheduler
22742 does not move sets to VRSAVE before sets of AltiVec registers.
22744 However, if the function receives nonlocal gotos, reload will set
22745 all call saved registers live. We will end up with:
22747 (set (reg 999) (mem))
22748 (parallel [ (set (reg vrsave) (unspec blah))
22749 (clobber (reg 999))])
22751 The clobber will cause the store into reg 999 to be dead, and
22752 flow will attempt to delete an epilogue insn. In this case, we
22753 need an unspec use/set of the register. */
22755 for (i = FIRST_ALTIVEC_REGNO; i <= LAST_ALTIVEC_REGNO; ++i)
22756 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
22758 if (!epiloguep || call_used_regs [i])
22759 clobs[nclobs++] = gen_rtx_CLOBBER (VOIDmode,
22760 gen_rtx_REG (V4SImode, i));
22761 else
22763 rtx reg = gen_rtx_REG (V4SImode, i);
22765 clobs[nclobs++]
22766 = gen_rtx_SET (VOIDmode,
22767 reg,
22768 gen_rtx_UNSPEC (V4SImode,
22769 gen_rtvec (1, reg), 27));
22773 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nclobs));
22775 for (i = 0; i < nclobs; ++i)
22776 XVECEXP (insn, 0, i) = clobs[i];
22778 return insn;
22781 static rtx
22782 gen_frame_set (rtx reg, rtx frame_reg, int offset, bool store)
22784 rtx addr, mem;
22786 addr = gen_rtx_PLUS (Pmode, frame_reg, GEN_INT (offset));
22787 mem = gen_frame_mem (GET_MODE (reg), addr);
22788 return gen_rtx_SET (VOIDmode, store ? mem : reg, store ? reg : mem);
22791 static rtx
22792 gen_frame_load (rtx reg, rtx frame_reg, int offset)
22794 return gen_frame_set (reg, frame_reg, offset, false);
22797 static rtx
22798 gen_frame_store (rtx reg, rtx frame_reg, int offset)
22800 return gen_frame_set (reg, frame_reg, offset, true);
22803 /* Save a register into the frame, and emit RTX_FRAME_RELATED_P notes.
22804 Save REGNO into [FRAME_REG + OFFSET] in mode MODE. */
22806 static rtx
22807 emit_frame_save (rtx frame_reg, machine_mode mode,
22808 unsigned int regno, int offset, HOST_WIDE_INT frame_reg_to_sp)
22810 rtx reg, insn;
22812 /* Some cases that need register indexed addressing. */
22813 gcc_checking_assert (!((TARGET_ALTIVEC_ABI && ALTIVEC_VECTOR_MODE (mode))
22814 || (TARGET_VSX && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
22815 || (TARGET_E500_DOUBLE && mode == DFmode)
22816 || (TARGET_SPE_ABI
22817 && SPE_VECTOR_MODE (mode)
22818 && !SPE_CONST_OFFSET_OK (offset))));
22820 reg = gen_rtx_REG (mode, regno);
22821 insn = emit_insn (gen_frame_store (reg, frame_reg, offset));
22822 return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp,
22823 NULL_RTX, NULL_RTX, NULL_RTX);
22826 /* Emit an offset memory reference suitable for a frame store, while
22827 converting to a valid addressing mode. */
22829 static rtx
22830 gen_frame_mem_offset (machine_mode mode, rtx reg, int offset)
22832 rtx int_rtx, offset_rtx;
22834 int_rtx = GEN_INT (offset);
22836 if ((TARGET_SPE_ABI && SPE_VECTOR_MODE (mode) && !SPE_CONST_OFFSET_OK (offset))
22837 || (TARGET_E500_DOUBLE && mode == DFmode))
22839 offset_rtx = gen_rtx_REG (Pmode, FIXED_SCRATCH);
22840 emit_move_insn (offset_rtx, int_rtx);
22842 else
22843 offset_rtx = int_rtx;
22845 return gen_frame_mem (mode, gen_rtx_PLUS (Pmode, reg, offset_rtx));
22848 #ifndef TARGET_FIX_AND_CONTINUE
22849 #define TARGET_FIX_AND_CONTINUE 0
22850 #endif
22852 /* It's really GPR 13 or 14, FPR 14 and VR 20. We need the smallest. */
22853 #define FIRST_SAVRES_REGISTER FIRST_SAVED_GP_REGNO
22854 #define LAST_SAVRES_REGISTER 31
22855 #define N_SAVRES_REGISTERS (LAST_SAVRES_REGISTER - FIRST_SAVRES_REGISTER + 1)
22857 enum {
22858 SAVRES_LR = 0x1,
22859 SAVRES_SAVE = 0x2,
22860 SAVRES_REG = 0x0c,
22861 SAVRES_GPR = 0,
22862 SAVRES_FPR = 4,
22863 SAVRES_VR = 8
22866 static GTY(()) rtx savres_routine_syms[N_SAVRES_REGISTERS][12];
22868 /* Temporary holding space for an out-of-line register save/restore
22869 routine name. */
22870 static char savres_routine_name[30];
22872 /* Return the name for an out-of-line register save/restore routine.
22873 We are saving/restoring GPRs if GPR is true. */
22875 static char *
22876 rs6000_savres_routine_name (rs6000_stack_t *info, int regno, int sel)
22878 const char *prefix = "";
22879 const char *suffix = "";
22881 /* Different targets are supposed to define
22882 {SAVE,RESTORE}_FP_{PREFIX,SUFFIX} with the idea that the needed
22883 routine name could be defined with:
22885 sprintf (name, "%s%d%s", SAVE_FP_PREFIX, regno, SAVE_FP_SUFFIX)
22887 This is a nice idea in practice, but in reality, things are
22888 complicated in several ways:
22890 - ELF targets have save/restore routines for GPRs.
22892 - SPE targets use different prefixes for 32/64-bit registers, and
22893 neither of them fit neatly in the FOO_{PREFIX,SUFFIX} regimen.
22895 - PPC64 ELF targets have routines for save/restore of GPRs that
22896 differ in what they do with the link register, so having a set
22897 prefix doesn't work. (We only use one of the save routines at
22898 the moment, though.)
22900 - PPC32 elf targets have "exit" versions of the restore routines
22901 that restore the link register and can save some extra space.
22902 These require an extra suffix. (There are also "tail" versions
22903 of the restore routines and "GOT" versions of the save routines,
22904 but we don't generate those at present. Same problems apply,
22905 though.)
22907 We deal with all this by synthesizing our own prefix/suffix and
22908 using that for the simple sprintf call shown above. */
22909 if (TARGET_SPE)
22911 /* No floating point saves on the SPE. */
22912 gcc_assert ((sel & SAVRES_REG) == SAVRES_GPR);
22914 if ((sel & SAVRES_SAVE))
22915 prefix = info->spe_64bit_regs_used ? "_save64gpr_" : "_save32gpr_";
22916 else
22917 prefix = info->spe_64bit_regs_used ? "_rest64gpr_" : "_rest32gpr_";
22919 if ((sel & SAVRES_LR))
22920 suffix = "_x";
22922 else if (DEFAULT_ABI == ABI_V4)
22924 if (TARGET_64BIT)
22925 goto aix_names;
22927 if ((sel & SAVRES_REG) == SAVRES_GPR)
22928 prefix = (sel & SAVRES_SAVE) ? "_savegpr_" : "_restgpr_";
22929 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22930 prefix = (sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_";
22931 else if ((sel & SAVRES_REG) == SAVRES_VR)
22932 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22933 else
22934 abort ();
22936 if ((sel & SAVRES_LR))
22937 suffix = "_x";
22939 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
22941 #if !defined (POWERPC_LINUX) && !defined (POWERPC_FREEBSD)
22942 /* No out-of-line save/restore routines for GPRs on AIX. */
22943 gcc_assert (!TARGET_AIX || (sel & SAVRES_REG) != SAVRES_GPR);
22944 #endif
22946 aix_names:
22947 if ((sel & SAVRES_REG) == SAVRES_GPR)
22948 prefix = ((sel & SAVRES_SAVE)
22949 ? ((sel & SAVRES_LR) ? "_savegpr0_" : "_savegpr1_")
22950 : ((sel & SAVRES_LR) ? "_restgpr0_" : "_restgpr1_"));
22951 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22953 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
22954 if ((sel & SAVRES_LR))
22955 prefix = ((sel & SAVRES_SAVE) ? "_savefpr_" : "_restfpr_");
22956 else
22957 #endif
22959 prefix = (sel & SAVRES_SAVE) ? SAVE_FP_PREFIX : RESTORE_FP_PREFIX;
22960 suffix = (sel & SAVRES_SAVE) ? SAVE_FP_SUFFIX : RESTORE_FP_SUFFIX;
22963 else if ((sel & SAVRES_REG) == SAVRES_VR)
22964 prefix = (sel & SAVRES_SAVE) ? "_savevr_" : "_restvr_";
22965 else
22966 abort ();
22969 if (DEFAULT_ABI == ABI_DARWIN)
22971 /* The Darwin approach is (slightly) different, in order to be
22972 compatible with code generated by the system toolchain. There is a
22973 single symbol for the start of save sequence, and the code here
22974 embeds an offset into that code on the basis of the first register
22975 to be saved. */
22976 prefix = (sel & SAVRES_SAVE) ? "save" : "rest" ;
22977 if ((sel & SAVRES_REG) == SAVRES_GPR)
22978 sprintf (savres_routine_name, "*%sGPR%s%s%.0d ; %s r%d-r31", prefix,
22979 ((sel & SAVRES_LR) ? "x" : ""), (regno == 13 ? "" : "+"),
22980 (regno - 13) * 4, prefix, regno);
22981 else if ((sel & SAVRES_REG) == SAVRES_FPR)
22982 sprintf (savres_routine_name, "*%sFP%s%.0d ; %s f%d-f31", prefix,
22983 (regno == 14 ? "" : "+"), (regno - 14) * 4, prefix, regno);
22984 else if ((sel & SAVRES_REG) == SAVRES_VR)
22985 sprintf (savres_routine_name, "*%sVEC%s%.0d ; %s v%d-v31", prefix,
22986 (regno == 20 ? "" : "+"), (regno - 20) * 8, prefix, regno);
22987 else
22988 abort ();
22990 else
22991 sprintf (savres_routine_name, "%s%d%s", prefix, regno, suffix);
22993 return savres_routine_name;
22996 /* Return an RTL SYMBOL_REF for an out-of-line register save/restore routine.
22997 We are saving/restoring GPRs if GPR is true. */
22999 static rtx
23000 rs6000_savres_routine_sym (rs6000_stack_t *info, int sel)
23002 int regno = ((sel & SAVRES_REG) == SAVRES_GPR
23003 ? info->first_gp_reg_save
23004 : (sel & SAVRES_REG) == SAVRES_FPR
23005 ? info->first_fp_reg_save - 32
23006 : (sel & SAVRES_REG) == SAVRES_VR
23007 ? info->first_altivec_reg_save - FIRST_ALTIVEC_REGNO
23008 : -1);
23009 rtx sym;
23010 int select = sel;
23012 /* On the SPE, we never have any FPRs, but we do have 32/64-bit
23013 versions of the gpr routines. */
23014 if (TARGET_SPE_ABI && (sel & SAVRES_REG) == SAVRES_GPR
23015 && info->spe_64bit_regs_used)
23016 select ^= SAVRES_FPR ^ SAVRES_GPR;
23018 /* Don't generate bogus routine names. */
23019 gcc_assert (FIRST_SAVRES_REGISTER <= regno
23020 && regno <= LAST_SAVRES_REGISTER
23021 && select >= 0 && select <= 12);
23023 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select];
23025 if (sym == NULL)
23027 char *name;
23029 name = rs6000_savres_routine_name (info, regno, sel);
23031 sym = savres_routine_syms[regno-FIRST_SAVRES_REGISTER][select]
23032 = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
23033 SYMBOL_REF_FLAGS (sym) |= SYMBOL_FLAG_FUNCTION;
23036 return sym;
23039 /* Emit a sequence of insns, including a stack tie if needed, for
23040 resetting the stack pointer. If UPDT_REGNO is not 1, then don't
23041 reset the stack pointer, but move the base of the frame into
23042 reg UPDT_REGNO for use by out-of-line register restore routines. */
23044 static rtx
23045 rs6000_emit_stack_reset (rs6000_stack_t *info,
23046 rtx frame_reg_rtx, HOST_WIDE_INT frame_off,
23047 unsigned updt_regno)
23049 rtx updt_reg_rtx;
23051 /* This blockage is needed so that sched doesn't decide to move
23052 the sp change before the register restores. */
23053 if (DEFAULT_ABI == ABI_V4
23054 || (TARGET_SPE_ABI
23055 && info->spe_64bit_regs_used != 0
23056 && info->first_gp_reg_save != 32))
23057 rs6000_emit_stack_tie (frame_reg_rtx, frame_pointer_needed);
23059 /* If we are restoring registers out-of-line, we will be using the
23060 "exit" variants of the restore routines, which will reset the
23061 stack for us. But we do need to point updt_reg into the
23062 right place for those routines. */
23063 updt_reg_rtx = gen_rtx_REG (Pmode, updt_regno);
23065 if (frame_off != 0)
23066 return emit_insn (gen_add3_insn (updt_reg_rtx,
23067 frame_reg_rtx, GEN_INT (frame_off)));
23068 else if (REGNO (frame_reg_rtx) != updt_regno)
23069 return emit_move_insn (updt_reg_rtx, frame_reg_rtx);
23071 return NULL_RTX;
23074 /* Return the register number used as a pointer by out-of-line
23075 save/restore functions. */
23077 static inline unsigned
23078 ptr_regno_for_savres (int sel)
23080 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23081 return (sel & SAVRES_REG) == SAVRES_FPR || (sel & SAVRES_LR) ? 1 : 12;
23082 return DEFAULT_ABI == ABI_DARWIN && (sel & SAVRES_REG) == SAVRES_FPR ? 1 : 11;
23085 /* Construct a parallel rtx describing the effect of a call to an
23086 out-of-line register save/restore routine, and emit the insn
23087 or jump_insn as appropriate. */
23089 static rtx
23090 rs6000_emit_savres_rtx (rs6000_stack_t *info,
23091 rtx frame_reg_rtx, int save_area_offset, int lr_offset,
23092 machine_mode reg_mode, int sel)
23094 int i;
23095 int offset, start_reg, end_reg, n_regs, use_reg;
23096 int reg_size = GET_MODE_SIZE (reg_mode);
23097 rtx sym;
23098 rtvec p;
23099 rtx par, insn;
23101 offset = 0;
23102 start_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23103 ? info->first_gp_reg_save
23104 : (sel & SAVRES_REG) == SAVRES_FPR
23105 ? info->first_fp_reg_save
23106 : (sel & SAVRES_REG) == SAVRES_VR
23107 ? info->first_altivec_reg_save
23108 : -1);
23109 end_reg = ((sel & SAVRES_REG) == SAVRES_GPR
23110 ? 32
23111 : (sel & SAVRES_REG) == SAVRES_FPR
23112 ? 64
23113 : (sel & SAVRES_REG) == SAVRES_VR
23114 ? LAST_ALTIVEC_REGNO + 1
23115 : -1);
23116 n_regs = end_reg - start_reg;
23117 p = rtvec_alloc (3 + ((sel & SAVRES_LR) ? 1 : 0)
23118 + ((sel & SAVRES_REG) == SAVRES_VR ? 1 : 0)
23119 + n_regs);
23121 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23122 RTVEC_ELT (p, offset++) = ret_rtx;
23124 RTVEC_ELT (p, offset++)
23125 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
23127 sym = rs6000_savres_routine_sym (info, sel);
23128 RTVEC_ELT (p, offset++) = gen_rtx_USE (VOIDmode, sym);
23130 use_reg = ptr_regno_for_savres (sel);
23131 if ((sel & SAVRES_REG) == SAVRES_VR)
23133 /* Vector regs are saved/restored using [reg+reg] addressing. */
23134 RTVEC_ELT (p, offset++)
23135 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23136 RTVEC_ELT (p, offset++)
23137 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, 0));
23139 else
23140 RTVEC_ELT (p, offset++)
23141 = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, use_reg));
23143 for (i = 0; i < end_reg - start_reg; i++)
23144 RTVEC_ELT (p, i + offset)
23145 = gen_frame_set (gen_rtx_REG (reg_mode, start_reg + i),
23146 frame_reg_rtx, save_area_offset + reg_size * i,
23147 (sel & SAVRES_SAVE) != 0);
23149 if ((sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23150 RTVEC_ELT (p, i + offset)
23151 = gen_frame_store (gen_rtx_REG (Pmode, 0), frame_reg_rtx, lr_offset);
23153 par = gen_rtx_PARALLEL (VOIDmode, p);
23155 if (!(sel & SAVRES_SAVE) && (sel & SAVRES_LR))
23157 insn = emit_jump_insn (par);
23158 JUMP_LABEL (insn) = ret_rtx;
23160 else
23161 insn = emit_insn (par);
23162 return insn;
23165 /* Emit code to store CR fields that need to be saved into REG. */
23167 static void
23168 rs6000_emit_move_from_cr (rtx reg)
23170 /* Only the ELFv2 ABI allows storing only selected fields. */
23171 if (DEFAULT_ABI == ABI_ELFv2 && TARGET_MFCRF)
23173 int i, cr_reg[8], count = 0;
23175 /* Collect CR fields that must be saved. */
23176 for (i = 0; i < 8; i++)
23177 if (save_reg_p (CR0_REGNO + i))
23178 cr_reg[count++] = i;
23180 /* If it's just a single one, use mfcrf. */
23181 if (count == 1)
23183 rtvec p = rtvec_alloc (1);
23184 rtvec r = rtvec_alloc (2);
23185 RTVEC_ELT (r, 0) = gen_rtx_REG (CCmode, CR0_REGNO + cr_reg[0]);
23186 RTVEC_ELT (r, 1) = GEN_INT (1 << (7 - cr_reg[0]));
23187 RTVEC_ELT (p, 0)
23188 = gen_rtx_SET (VOIDmode, reg,
23189 gen_rtx_UNSPEC (SImode, r, UNSPEC_MOVESI_FROM_CR));
23191 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23192 return;
23195 /* ??? It might be better to handle count == 2 / 3 cases here
23196 as well, using logical operations to combine the values. */
23199 emit_insn (gen_movesi_from_cr (reg));
23202 /* Determine whether the gp REG is really used. */
23204 static bool
23205 rs6000_reg_live_or_pic_offset_p (int reg)
23207 /* If the function calls eh_return, claim used all the registers that would
23208 be checked for liveness otherwise. This is required for the PIC offset
23209 register with -mminimal-toc on AIX, as it is advertised as "fixed" for
23210 register allocation purposes in this case. */
23212 return (((crtl->calls_eh_return || df_regs_ever_live_p (reg))
23213 && (!call_used_regs[reg]
23214 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23215 && !TARGET_SINGLE_PIC_BASE
23216 && TARGET_TOC && TARGET_MINIMAL_TOC)))
23217 || (reg == RS6000_PIC_OFFSET_TABLE_REGNUM
23218 && !TARGET_SINGLE_PIC_BASE
23219 && ((DEFAULT_ABI == ABI_V4 && flag_pic != 0)
23220 || (DEFAULT_ABI == ABI_DARWIN && flag_pic))));
23223 /* Emit function prologue as insns. */
23225 void
23226 rs6000_emit_prologue (void)
23228 rs6000_stack_t *info = rs6000_stack_info ();
23229 machine_mode reg_mode = Pmode;
23230 int reg_size = TARGET_32BIT ? 4 : 8;
23231 rtx sp_reg_rtx = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
23232 rtx frame_reg_rtx = sp_reg_rtx;
23233 unsigned int cr_save_regno;
23234 rtx cr_save_rtx = NULL_RTX;
23235 rtx insn;
23236 int strategy;
23237 int using_static_chain_p = (cfun->static_chain_decl != NULL_TREE
23238 && df_regs_ever_live_p (STATIC_CHAIN_REGNUM)
23239 && call_used_regs[STATIC_CHAIN_REGNUM]);
23240 /* Offset to top of frame for frame_reg and sp respectively. */
23241 HOST_WIDE_INT frame_off = 0;
23242 HOST_WIDE_INT sp_off = 0;
23244 #ifdef ENABLE_CHECKING
23245 /* Track and check usage of r0, r11, r12. */
23246 int reg_inuse = using_static_chain_p ? 1 << 11 : 0;
23247 #define START_USE(R) do \
23249 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23250 reg_inuse |= 1 << (R); \
23251 } while (0)
23252 #define END_USE(R) do \
23254 gcc_assert ((reg_inuse & (1 << (R))) != 0); \
23255 reg_inuse &= ~(1 << (R)); \
23256 } while (0)
23257 #define NOT_INUSE(R) do \
23259 gcc_assert ((reg_inuse & (1 << (R))) == 0); \
23260 } while (0)
23261 #else
23262 #define START_USE(R) do {} while (0)
23263 #define END_USE(R) do {} while (0)
23264 #define NOT_INUSE(R) do {} while (0)
23265 #endif
23267 if (DEFAULT_ABI == ABI_ELFv2)
23269 cfun->machine->r2_setup_needed = df_regs_ever_live_p (TOC_REGNUM);
23271 /* With -mminimal-toc we may generate an extra use of r2 below. */
23272 if (!TARGET_SINGLE_PIC_BASE
23273 && TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
23274 cfun->machine->r2_setup_needed = true;
23278 if (flag_stack_usage_info)
23279 current_function_static_stack_size = info->total_size;
23281 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
23283 HOST_WIDE_INT size = info->total_size;
23285 if (crtl->is_leaf && !cfun->calls_alloca)
23287 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
23288 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT,
23289 size - STACK_CHECK_PROTECT);
23291 else if (size > 0)
23292 rs6000_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
23295 if (TARGET_FIX_AND_CONTINUE)
23297 /* gdb on darwin arranges to forward a function from the old
23298 address by modifying the first 5 instructions of the function
23299 to branch to the overriding function. This is necessary to
23300 permit function pointers that point to the old function to
23301 actually forward to the new function. */
23302 emit_insn (gen_nop ());
23303 emit_insn (gen_nop ());
23304 emit_insn (gen_nop ());
23305 emit_insn (gen_nop ());
23306 emit_insn (gen_nop ());
23309 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
23311 reg_mode = V2SImode;
23312 reg_size = 8;
23315 /* Handle world saves specially here. */
23316 if (WORLD_SAVE_P (info))
23318 int i, j, sz;
23319 rtx treg;
23320 rtvec p;
23321 rtx reg0;
23323 /* save_world expects lr in r0. */
23324 reg0 = gen_rtx_REG (Pmode, 0);
23325 if (info->lr_save_p)
23327 insn = emit_move_insn (reg0,
23328 gen_rtx_REG (Pmode, LR_REGNO));
23329 RTX_FRAME_RELATED_P (insn) = 1;
23332 /* The SAVE_WORLD and RESTORE_WORLD routines make a number of
23333 assumptions about the offsets of various bits of the stack
23334 frame. */
23335 gcc_assert (info->gp_save_offset == -220
23336 && info->fp_save_offset == -144
23337 && info->lr_save_offset == 8
23338 && info->cr_save_offset == 4
23339 && info->push_p
23340 && info->lr_save_p
23341 && (!crtl->calls_eh_return
23342 || info->ehrd_offset == -432)
23343 && info->vrsave_save_offset == -224
23344 && info->altivec_save_offset == -416);
23346 treg = gen_rtx_REG (SImode, 11);
23347 emit_move_insn (treg, GEN_INT (-info->total_size));
23349 /* SAVE_WORLD takes the caller's LR in R0 and the frame size
23350 in R11. It also clobbers R12, so beware! */
23352 /* Preserve CR2 for save_world prologues */
23353 sz = 5;
23354 sz += 32 - info->first_gp_reg_save;
23355 sz += 64 - info->first_fp_reg_save;
23356 sz += LAST_ALTIVEC_REGNO - info->first_altivec_reg_save + 1;
23357 p = rtvec_alloc (sz);
23358 j = 0;
23359 RTVEC_ELT (p, j++) = gen_rtx_CLOBBER (VOIDmode,
23360 gen_rtx_REG (SImode,
23361 LR_REGNO));
23362 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
23363 gen_rtx_SYMBOL_REF (Pmode,
23364 "*save_world"));
23365 /* We do floats first so that the instruction pattern matches
23366 properly. */
23367 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23368 RTVEC_ELT (p, j++)
23369 = gen_frame_store (gen_rtx_REG (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23370 ? DFmode : SFmode,
23371 info->first_fp_reg_save + i),
23372 frame_reg_rtx,
23373 info->fp_save_offset + frame_off + 8 * i);
23374 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
23375 RTVEC_ELT (p, j++)
23376 = gen_frame_store (gen_rtx_REG (V4SImode,
23377 info->first_altivec_reg_save + i),
23378 frame_reg_rtx,
23379 info->altivec_save_offset + frame_off + 16 * i);
23380 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23381 RTVEC_ELT (p, j++)
23382 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23383 frame_reg_rtx,
23384 info->gp_save_offset + frame_off + reg_size * i);
23386 /* CR register traditionally saved as CR2. */
23387 RTVEC_ELT (p, j++)
23388 = gen_frame_store (gen_rtx_REG (SImode, CR2_REGNO),
23389 frame_reg_rtx, info->cr_save_offset + frame_off);
23390 /* Explain about use of R0. */
23391 if (info->lr_save_p)
23392 RTVEC_ELT (p, j++)
23393 = gen_frame_store (reg0,
23394 frame_reg_rtx, info->lr_save_offset + frame_off);
23395 /* Explain what happens to the stack pointer. */
23397 rtx newval = gen_rtx_PLUS (Pmode, sp_reg_rtx, treg);
23398 RTVEC_ELT (p, j++) = gen_rtx_SET (VOIDmode, sp_reg_rtx, newval);
23401 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23402 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23403 treg, GEN_INT (-info->total_size), NULL_RTX);
23404 sp_off = frame_off = info->total_size;
23407 strategy = info->savres_strategy;
23409 /* For V.4, update stack before we do any saving and set back pointer. */
23410 if (! WORLD_SAVE_P (info)
23411 && info->push_p
23412 && (DEFAULT_ABI == ABI_V4
23413 || crtl->calls_eh_return))
23415 bool need_r11 = (TARGET_SPE
23416 ? (!(strategy & SAVE_INLINE_GPRS)
23417 && info->spe_64bit_regs_used == 0)
23418 : (!(strategy & SAVE_INLINE_FPRS)
23419 || !(strategy & SAVE_INLINE_GPRS)
23420 || !(strategy & SAVE_INLINE_VRS)));
23421 int ptr_regno = -1;
23422 rtx ptr_reg = NULL_RTX;
23423 int ptr_off = 0;
23425 if (info->total_size < 32767)
23426 frame_off = info->total_size;
23427 else if (need_r11)
23428 ptr_regno = 11;
23429 else if (info->cr_save_p
23430 || info->lr_save_p
23431 || info->first_fp_reg_save < 64
23432 || info->first_gp_reg_save < 32
23433 || info->altivec_size != 0
23434 || info->vrsave_mask != 0
23435 || crtl->calls_eh_return)
23436 ptr_regno = 12;
23437 else
23439 /* The prologue won't be saving any regs so there is no need
23440 to set up a frame register to access any frame save area.
23441 We also won't be using frame_off anywhere below, but set
23442 the correct value anyway to protect against future
23443 changes to this function. */
23444 frame_off = info->total_size;
23446 if (ptr_regno != -1)
23448 /* Set up the frame offset to that needed by the first
23449 out-of-line save function. */
23450 START_USE (ptr_regno);
23451 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23452 frame_reg_rtx = ptr_reg;
23453 if (!(strategy & SAVE_INLINE_FPRS) && info->fp_size != 0)
23454 gcc_checking_assert (info->fp_save_offset + info->fp_size == 0);
23455 else if (!(strategy & SAVE_INLINE_GPRS) && info->first_gp_reg_save < 32)
23456 ptr_off = info->gp_save_offset + info->gp_size;
23457 else if (!(strategy & SAVE_INLINE_VRS) && info->altivec_size != 0)
23458 ptr_off = info->altivec_save_offset + info->altivec_size;
23459 frame_off = -ptr_off;
23461 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23462 sp_off = info->total_size;
23463 if (frame_reg_rtx != sp_reg_rtx)
23464 rs6000_emit_stack_tie (frame_reg_rtx, false);
23467 /* If we use the link register, get it into r0. */
23468 if (!WORLD_SAVE_P (info) && info->lr_save_p)
23470 rtx addr, reg, mem;
23472 reg = gen_rtx_REG (Pmode, 0);
23473 START_USE (0);
23474 insn = emit_move_insn (reg, gen_rtx_REG (Pmode, LR_REGNO));
23475 RTX_FRAME_RELATED_P (insn) = 1;
23477 if (!(strategy & (SAVE_NOINLINE_GPRS_SAVES_LR
23478 | SAVE_NOINLINE_FPRS_SAVES_LR)))
23480 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23481 GEN_INT (info->lr_save_offset + frame_off));
23482 mem = gen_rtx_MEM (Pmode, addr);
23483 /* This should not be of rs6000_sr_alias_set, because of
23484 __builtin_return_address. */
23486 insn = emit_move_insn (mem, reg);
23487 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23488 NULL_RTX, NULL_RTX, NULL_RTX);
23489 END_USE (0);
23493 /* If we need to save CR, put it into r12 or r11. Choose r12 except when
23494 r12 will be needed by out-of-line gpr restore. */
23495 cr_save_regno = ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
23496 && !(strategy & (SAVE_INLINE_GPRS
23497 | SAVE_NOINLINE_GPRS_SAVES_LR))
23498 ? 11 : 12);
23499 if (!WORLD_SAVE_P (info)
23500 && info->cr_save_p
23501 && REGNO (frame_reg_rtx) != cr_save_regno
23502 && !(using_static_chain_p && cr_save_regno == 11))
23504 cr_save_rtx = gen_rtx_REG (SImode, cr_save_regno);
23505 START_USE (cr_save_regno);
23506 rs6000_emit_move_from_cr (cr_save_rtx);
23509 /* Do any required saving of fpr's. If only one or two to save, do
23510 it ourselves. Otherwise, call function. */
23511 if (!WORLD_SAVE_P (info) && (strategy & SAVE_INLINE_FPRS))
23513 int i;
23514 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
23515 if (save_reg_p (info->first_fp_reg_save + i))
23516 emit_frame_save (frame_reg_rtx,
23517 (TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
23518 ? DFmode : SFmode),
23519 info->first_fp_reg_save + i,
23520 info->fp_save_offset + frame_off + 8 * i,
23521 sp_off - frame_off);
23523 else if (!WORLD_SAVE_P (info) && info->first_fp_reg_save != 64)
23525 bool lr = (strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
23526 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
23527 unsigned ptr_regno = ptr_regno_for_savres (sel);
23528 rtx ptr_reg = frame_reg_rtx;
23530 if (REGNO (frame_reg_rtx) == ptr_regno)
23531 gcc_checking_assert (frame_off == 0);
23532 else
23534 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23535 NOT_INUSE (ptr_regno);
23536 emit_insn (gen_add3_insn (ptr_reg,
23537 frame_reg_rtx, GEN_INT (frame_off)));
23539 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23540 info->fp_save_offset,
23541 info->lr_save_offset,
23542 DFmode, sel);
23543 rs6000_frame_related (insn, ptr_reg, sp_off,
23544 NULL_RTX, NULL_RTX, NULL_RTX);
23545 if (lr)
23546 END_USE (0);
23549 /* Save GPRs. This is done as a PARALLEL if we are using
23550 the store-multiple instructions. */
23551 if (!WORLD_SAVE_P (info)
23552 && TARGET_SPE_ABI
23553 && info->spe_64bit_regs_used != 0
23554 && info->first_gp_reg_save != 32)
23556 int i;
23557 rtx spe_save_area_ptr;
23558 HOST_WIDE_INT save_off;
23559 int ool_adjust = 0;
23561 /* Determine whether we can address all of the registers that need
23562 to be saved with an offset from frame_reg_rtx that fits in
23563 the small const field for SPE memory instructions. */
23564 int spe_regs_addressable
23565 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
23566 + reg_size * (32 - info->first_gp_reg_save - 1))
23567 && (strategy & SAVE_INLINE_GPRS));
23569 if (spe_regs_addressable)
23571 spe_save_area_ptr = frame_reg_rtx;
23572 save_off = frame_off;
23574 else
23576 /* Make r11 point to the start of the SPE save area. We need
23577 to be careful here if r11 is holding the static chain. If
23578 it is, then temporarily save it in r0. */
23579 HOST_WIDE_INT offset;
23581 if (!(strategy & SAVE_INLINE_GPRS))
23582 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
23583 offset = info->spe_gp_save_offset + frame_off - ool_adjust;
23584 spe_save_area_ptr = gen_rtx_REG (Pmode, 11);
23585 save_off = frame_off - offset;
23587 if (using_static_chain_p)
23589 rtx r0 = gen_rtx_REG (Pmode, 0);
23591 START_USE (0);
23592 gcc_assert (info->first_gp_reg_save > 11);
23594 emit_move_insn (r0, spe_save_area_ptr);
23596 else if (REGNO (frame_reg_rtx) != 11)
23597 START_USE (11);
23599 emit_insn (gen_addsi3 (spe_save_area_ptr,
23600 frame_reg_rtx, GEN_INT (offset)));
23601 if (!using_static_chain_p && REGNO (frame_reg_rtx) == 11)
23602 frame_off = -info->spe_gp_save_offset + ool_adjust;
23605 if ((strategy & SAVE_INLINE_GPRS))
23607 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23608 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23609 emit_frame_save (spe_save_area_ptr, reg_mode,
23610 info->first_gp_reg_save + i,
23611 (info->spe_gp_save_offset + save_off
23612 + reg_size * i),
23613 sp_off - save_off);
23615 else
23617 insn = rs6000_emit_savres_rtx (info, spe_save_area_ptr,
23618 info->spe_gp_save_offset + save_off,
23619 0, reg_mode,
23620 SAVRES_SAVE | SAVRES_GPR);
23622 rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off,
23623 NULL_RTX, NULL_RTX, NULL_RTX);
23626 /* Move the static chain pointer back. */
23627 if (!spe_regs_addressable)
23629 if (using_static_chain_p)
23631 emit_move_insn (spe_save_area_ptr, gen_rtx_REG (Pmode, 0));
23632 END_USE (0);
23634 else if (REGNO (frame_reg_rtx) != 11)
23635 END_USE (11);
23638 else if (!WORLD_SAVE_P (info) && !(strategy & SAVE_INLINE_GPRS))
23640 bool lr = (strategy & SAVE_NOINLINE_GPRS_SAVES_LR) != 0;
23641 int sel = SAVRES_SAVE | SAVRES_GPR | (lr ? SAVRES_LR : 0);
23642 unsigned ptr_regno = ptr_regno_for_savres (sel);
23643 rtx ptr_reg = frame_reg_rtx;
23644 bool ptr_set_up = REGNO (ptr_reg) == ptr_regno;
23645 int end_save = info->gp_save_offset + info->gp_size;
23646 int ptr_off;
23648 if (!ptr_set_up)
23649 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23651 /* Need to adjust r11 (r12) if we saved any FPRs. */
23652 if (end_save + frame_off != 0)
23654 rtx offset = GEN_INT (end_save + frame_off);
23656 if (ptr_set_up)
23657 frame_off = -end_save;
23658 else
23659 NOT_INUSE (ptr_regno);
23660 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
23662 else if (!ptr_set_up)
23664 NOT_INUSE (ptr_regno);
23665 emit_move_insn (ptr_reg, frame_reg_rtx);
23667 ptr_off = -end_save;
23668 insn = rs6000_emit_savres_rtx (info, ptr_reg,
23669 info->gp_save_offset + ptr_off,
23670 info->lr_save_offset + ptr_off,
23671 reg_mode, sel);
23672 rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off,
23673 NULL_RTX, NULL_RTX, NULL_RTX);
23674 if (lr)
23675 END_USE (0);
23677 else if (!WORLD_SAVE_P (info) && (strategy & SAVRES_MULTIPLE))
23679 rtvec p;
23680 int i;
23681 p = rtvec_alloc (32 - info->first_gp_reg_save);
23682 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23683 RTVEC_ELT (p, i)
23684 = gen_frame_store (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
23685 frame_reg_rtx,
23686 info->gp_save_offset + frame_off + reg_size * i);
23687 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23688 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
23689 NULL_RTX, NULL_RTX, NULL_RTX);
23691 else if (!WORLD_SAVE_P (info))
23693 int i;
23694 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
23695 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
23696 emit_frame_save (frame_reg_rtx, reg_mode,
23697 info->first_gp_reg_save + i,
23698 info->gp_save_offset + frame_off + reg_size * i,
23699 sp_off - frame_off);
23702 if (crtl->calls_eh_return)
23704 unsigned int i;
23705 rtvec p;
23707 for (i = 0; ; ++i)
23709 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23710 if (regno == INVALID_REGNUM)
23711 break;
23714 p = rtvec_alloc (i);
23716 for (i = 0; ; ++i)
23718 unsigned int regno = EH_RETURN_DATA_REGNO (i);
23719 if (regno == INVALID_REGNUM)
23720 break;
23722 insn
23723 = gen_frame_store (gen_rtx_REG (reg_mode, regno),
23724 sp_reg_rtx,
23725 info->ehrd_offset + sp_off + reg_size * (int) i);
23726 RTVEC_ELT (p, i) = insn;
23727 RTX_FRAME_RELATED_P (insn) = 1;
23730 insn = emit_insn (gen_blockage ());
23731 RTX_FRAME_RELATED_P (insn) = 1;
23732 add_reg_note (insn, REG_FRAME_RELATED_EXPR, gen_rtx_PARALLEL (VOIDmode, p));
23735 /* In AIX ABI we need to make sure r2 is really saved. */
23736 if (TARGET_AIX && crtl->calls_eh_return)
23738 rtx tmp_reg, tmp_reg_si, hi, lo, compare_result, toc_save_done, jump;
23739 rtx save_insn, join_insn, note;
23740 long toc_restore_insn;
23742 tmp_reg = gen_rtx_REG (Pmode, 11);
23743 tmp_reg_si = gen_rtx_REG (SImode, 11);
23744 if (using_static_chain_p)
23746 START_USE (0);
23747 emit_move_insn (gen_rtx_REG (Pmode, 0), tmp_reg);
23749 else
23750 START_USE (11);
23751 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, LR_REGNO));
23752 /* Peek at instruction to which this function returns. If it's
23753 restoring r2, then we know we've already saved r2. We can't
23754 unconditionally save r2 because the value we have will already
23755 be updated if we arrived at this function via a plt call or
23756 toc adjusting stub. */
23757 emit_move_insn (tmp_reg_si, gen_rtx_MEM (SImode, tmp_reg));
23758 toc_restore_insn = ((TARGET_32BIT ? 0x80410000 : 0xE8410000)
23759 + RS6000_TOC_SAVE_SLOT);
23760 hi = gen_int_mode (toc_restore_insn & ~0xffff, SImode);
23761 emit_insn (gen_xorsi3 (tmp_reg_si, tmp_reg_si, hi));
23762 compare_result = gen_rtx_REG (CCUNSmode, CR0_REGNO);
23763 validate_condition_mode (EQ, CCUNSmode);
23764 lo = gen_int_mode (toc_restore_insn & 0xffff, SImode);
23765 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
23766 gen_rtx_COMPARE (CCUNSmode, tmp_reg_si, lo)));
23767 toc_save_done = gen_label_rtx ();
23768 jump = gen_rtx_IF_THEN_ELSE (VOIDmode,
23769 gen_rtx_EQ (VOIDmode, compare_result,
23770 const0_rtx),
23771 gen_rtx_LABEL_REF (VOIDmode, toc_save_done),
23772 pc_rtx);
23773 jump = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, jump));
23774 JUMP_LABEL (jump) = toc_save_done;
23775 LABEL_NUSES (toc_save_done) += 1;
23777 save_insn = emit_frame_save (frame_reg_rtx, reg_mode,
23778 TOC_REGNUM, frame_off + RS6000_TOC_SAVE_SLOT,
23779 sp_off - frame_off);
23781 emit_label (toc_save_done);
23783 /* ??? If we leave SAVE_INSN as marked as saving R2, then we'll
23784 have a CFG that has different saves along different paths.
23785 Move the note to a dummy blockage insn, which describes that
23786 R2 is unconditionally saved after the label. */
23787 /* ??? An alternate representation might be a special insn pattern
23788 containing both the branch and the store. That might let the
23789 code that minimizes the number of DW_CFA_advance opcodes better
23790 freedom in placing the annotations. */
23791 note = find_reg_note (save_insn, REG_FRAME_RELATED_EXPR, NULL);
23792 if (note)
23793 remove_note (save_insn, note);
23794 else
23795 note = alloc_reg_note (REG_FRAME_RELATED_EXPR,
23796 copy_rtx (PATTERN (save_insn)), NULL_RTX);
23797 RTX_FRAME_RELATED_P (save_insn) = 0;
23799 join_insn = emit_insn (gen_blockage ());
23800 REG_NOTES (join_insn) = note;
23801 RTX_FRAME_RELATED_P (join_insn) = 1;
23803 if (using_static_chain_p)
23805 emit_move_insn (tmp_reg, gen_rtx_REG (Pmode, 0));
23806 END_USE (0);
23808 else
23809 END_USE (11);
23812 /* Save CR if we use any that must be preserved. */
23813 if (!WORLD_SAVE_P (info) && info->cr_save_p)
23815 rtx addr = gen_rtx_PLUS (Pmode, frame_reg_rtx,
23816 GEN_INT (info->cr_save_offset + frame_off));
23817 rtx mem = gen_frame_mem (SImode, addr);
23819 /* If we didn't copy cr before, do so now using r0. */
23820 if (cr_save_rtx == NULL_RTX)
23822 START_USE (0);
23823 cr_save_rtx = gen_rtx_REG (SImode, 0);
23824 rs6000_emit_move_from_cr (cr_save_rtx);
23827 /* Saving CR requires a two-instruction sequence: one instruction
23828 to move the CR to a general-purpose register, and a second
23829 instruction that stores the GPR to memory.
23831 We do not emit any DWARF CFI records for the first of these,
23832 because we cannot properly represent the fact that CR is saved in
23833 a register. One reason is that we cannot express that multiple
23834 CR fields are saved; another reason is that on 64-bit, the size
23835 of the CR register in DWARF (4 bytes) differs from the size of
23836 a general-purpose register.
23838 This means if any intervening instruction were to clobber one of
23839 the call-saved CR fields, we'd have incorrect CFI. To prevent
23840 this from happening, we mark the store to memory as a use of
23841 those CR fields, which prevents any such instruction from being
23842 scheduled in between the two instructions. */
23843 rtx crsave_v[9];
23844 int n_crsave = 0;
23845 int i;
23847 crsave_v[n_crsave++] = gen_rtx_SET (VOIDmode, mem, cr_save_rtx);
23848 for (i = 0; i < 8; i++)
23849 if (save_reg_p (CR0_REGNO + i))
23850 crsave_v[n_crsave++]
23851 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23853 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode,
23854 gen_rtvec_v (n_crsave, crsave_v)));
23855 END_USE (REGNO (cr_save_rtx));
23857 /* Now, there's no way that dwarf2out_frame_debug_expr is going to
23858 understand '(unspec:SI [(reg:CC 68) ...] UNSPEC_MOVESI_FROM_CR)',
23859 so we need to construct a frame expression manually. */
23860 RTX_FRAME_RELATED_P (insn) = 1;
23862 /* Update address to be stack-pointer relative, like
23863 rs6000_frame_related would do. */
23864 addr = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, STACK_POINTER_REGNUM),
23865 GEN_INT (info->cr_save_offset + sp_off));
23866 mem = gen_frame_mem (SImode, addr);
23868 if (DEFAULT_ABI == ABI_ELFv2)
23870 /* In the ELFv2 ABI we generate separate CFI records for each
23871 CR field that was actually saved. They all point to the
23872 same 32-bit stack slot. */
23873 rtx crframe[8];
23874 int n_crframe = 0;
23876 for (i = 0; i < 8; i++)
23877 if (save_reg_p (CR0_REGNO + i))
23879 crframe[n_crframe]
23880 = gen_rtx_SET (VOIDmode, mem,
23881 gen_rtx_REG (SImode, CR0_REGNO + i));
23883 RTX_FRAME_RELATED_P (crframe[n_crframe]) = 1;
23884 n_crframe++;
23887 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23888 gen_rtx_PARALLEL (VOIDmode,
23889 gen_rtvec_v (n_crframe, crframe)));
23891 else
23893 /* In other ABIs, by convention, we use a single CR regnum to
23894 represent the fact that all call-saved CR fields are saved.
23895 We use CR2_REGNO to be compatible with gcc-2.95 on Linux. */
23896 rtx set = gen_rtx_SET (VOIDmode, mem,
23897 gen_rtx_REG (SImode, CR2_REGNO));
23898 add_reg_note (insn, REG_FRAME_RELATED_EXPR, set);
23902 /* In the ELFv2 ABI we need to save all call-saved CR fields into
23903 *separate* slots if the routine calls __builtin_eh_return, so
23904 that they can be independently restored by the unwinder. */
23905 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
23907 int i, cr_off = info->ehcr_offset;
23908 rtx crsave;
23910 /* ??? We might get better performance by using multiple mfocrf
23911 instructions. */
23912 crsave = gen_rtx_REG (SImode, 0);
23913 emit_insn (gen_movesi_from_cr (crsave));
23915 for (i = 0; i < 8; i++)
23916 if (!call_used_regs[CR0_REGNO + i])
23918 rtvec p = rtvec_alloc (2);
23919 RTVEC_ELT (p, 0)
23920 = gen_frame_store (crsave, frame_reg_rtx, cr_off + frame_off);
23921 RTVEC_ELT (p, 1)
23922 = gen_rtx_USE (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i));
23924 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
23926 RTX_FRAME_RELATED_P (insn) = 1;
23927 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
23928 gen_frame_store (gen_rtx_REG (SImode, CR0_REGNO + i),
23929 sp_reg_rtx, cr_off + sp_off));
23931 cr_off += reg_size;
23935 /* Update stack and set back pointer unless this is V.4,
23936 for which it was done previously. */
23937 if (!WORLD_SAVE_P (info) && info->push_p
23938 && !(DEFAULT_ABI == ABI_V4 || crtl->calls_eh_return))
23940 rtx ptr_reg = NULL;
23941 int ptr_off = 0;
23943 /* If saving altivec regs we need to be able to address all save
23944 locations using a 16-bit offset. */
23945 if ((strategy & SAVE_INLINE_VRS) == 0
23946 || (info->altivec_size != 0
23947 && (info->altivec_save_offset + info->altivec_size - 16
23948 + info->total_size - frame_off) > 32767)
23949 || (info->vrsave_size != 0
23950 && (info->vrsave_save_offset
23951 + info->total_size - frame_off) > 32767))
23953 int sel = SAVRES_SAVE | SAVRES_VR;
23954 unsigned ptr_regno = ptr_regno_for_savres (sel);
23956 if (using_static_chain_p
23957 && ptr_regno == STATIC_CHAIN_REGNUM)
23958 ptr_regno = 12;
23959 if (REGNO (frame_reg_rtx) != ptr_regno)
23960 START_USE (ptr_regno);
23961 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
23962 frame_reg_rtx = ptr_reg;
23963 ptr_off = info->altivec_save_offset + info->altivec_size;
23964 frame_off = -ptr_off;
23966 else if (REGNO (frame_reg_rtx) == 1)
23967 frame_off = info->total_size;
23968 rs6000_emit_allocate_stack (info->total_size, ptr_reg, ptr_off);
23969 sp_off = info->total_size;
23970 if (frame_reg_rtx != sp_reg_rtx)
23971 rs6000_emit_stack_tie (frame_reg_rtx, false);
23974 /* Set frame pointer, if needed. */
23975 if (frame_pointer_needed)
23977 insn = emit_move_insn (gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM),
23978 sp_reg_rtx);
23979 RTX_FRAME_RELATED_P (insn) = 1;
23982 /* Save AltiVec registers if needed. Save here because the red zone does
23983 not always include AltiVec registers. */
23984 if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
23985 && info->altivec_size != 0 && (strategy & SAVE_INLINE_VRS) == 0)
23987 int end_save = info->altivec_save_offset + info->altivec_size;
23988 int ptr_off;
23989 /* Oddly, the vector save/restore functions point r0 at the end
23990 of the save area, then use r11 or r12 to load offsets for
23991 [reg+reg] addressing. */
23992 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
23993 int scratch_regno = ptr_regno_for_savres (SAVRES_SAVE | SAVRES_VR);
23994 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
23996 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
23997 NOT_INUSE (0);
23998 if (end_save + frame_off != 0)
24000 rtx offset = GEN_INT (end_save + frame_off);
24002 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24004 else
24005 emit_move_insn (ptr_reg, frame_reg_rtx);
24007 ptr_off = -end_save;
24008 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24009 info->altivec_save_offset + ptr_off,
24010 0, V4SImode, SAVRES_SAVE | SAVRES_VR);
24011 rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off,
24012 NULL_RTX, NULL_RTX, NULL_RTX);
24013 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24015 /* The oddity mentioned above clobbered our frame reg. */
24016 emit_move_insn (frame_reg_rtx, ptr_reg);
24017 frame_off = ptr_off;
24020 else if (!WORLD_SAVE_P (info) && TARGET_ALTIVEC_ABI
24021 && info->altivec_size != 0)
24023 int i;
24025 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24026 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24028 rtx areg, savereg, mem, split_reg;
24029 int offset;
24031 offset = (info->altivec_save_offset + frame_off
24032 + 16 * (i - info->first_altivec_reg_save));
24034 savereg = gen_rtx_REG (V4SImode, i);
24036 NOT_INUSE (0);
24037 areg = gen_rtx_REG (Pmode, 0);
24038 emit_move_insn (areg, GEN_INT (offset));
24040 /* AltiVec addressing mode is [reg+reg]. */
24041 mem = gen_frame_mem (V4SImode,
24042 gen_rtx_PLUS (Pmode, frame_reg_rtx, areg));
24044 insn = emit_move_insn (mem, savereg);
24046 /* When we split a VSX store into two insns, we need to make
24047 sure the DWARF info knows which register we are storing.
24048 Pass it in to be used on the appropriate note. */
24049 if (!BYTES_BIG_ENDIAN
24050 && GET_CODE (PATTERN (insn)) == SET
24051 && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT)
24052 split_reg = savereg;
24053 else
24054 split_reg = NULL_RTX;
24056 rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off,
24057 areg, GEN_INT (offset), split_reg);
24061 /* VRSAVE is a bit vector representing which AltiVec registers
24062 are used. The OS uses this to determine which vector
24063 registers to save on a context switch. We need to save
24064 VRSAVE on the stack frame, add whatever AltiVec registers we
24065 used in this function, and do the corresponding magic in the
24066 epilogue. */
24068 if (!WORLD_SAVE_P (info)
24069 && TARGET_ALTIVEC
24070 && TARGET_ALTIVEC_VRSAVE
24071 && info->vrsave_mask != 0)
24073 rtx reg, vrsave;
24074 int offset;
24075 int save_regno;
24077 /* Get VRSAVE onto a GPR. Note that ABI_V4 and ABI_DARWIN might
24078 be using r12 as frame_reg_rtx and r11 as the static chain
24079 pointer for nested functions. */
24080 save_regno = 12;
24081 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
24082 && !using_static_chain_p)
24083 save_regno = 11;
24084 else if (REGNO (frame_reg_rtx) == 12)
24086 save_regno = 11;
24087 if (using_static_chain_p)
24088 save_regno = 0;
24091 NOT_INUSE (save_regno);
24092 reg = gen_rtx_REG (SImode, save_regno);
24093 vrsave = gen_rtx_REG (SImode, VRSAVE_REGNO);
24094 if (TARGET_MACHO)
24095 emit_insn (gen_get_vrsave_internal (reg));
24096 else
24097 emit_insn (gen_rtx_SET (VOIDmode, reg, vrsave));
24099 /* Save VRSAVE. */
24100 offset = info->vrsave_save_offset + frame_off;
24101 insn = emit_insn (gen_frame_store (reg, frame_reg_rtx, offset));
24103 /* Include the registers in the mask. */
24104 emit_insn (gen_iorsi3 (reg, reg, GEN_INT ((int) info->vrsave_mask)));
24106 insn = emit_insn (generate_set_vrsave (reg, info, 0));
24109 /* If we are using RS6000_PIC_OFFSET_TABLE_REGNUM, we need to set it up. */
24110 if (!TARGET_SINGLE_PIC_BASE
24111 && ((TARGET_TOC && TARGET_MINIMAL_TOC && get_pool_size () != 0)
24112 || (DEFAULT_ABI == ABI_V4
24113 && (flag_pic == 1 || (flag_pic && TARGET_SECURE_PLT))
24114 && df_regs_ever_live_p (RS6000_PIC_OFFSET_TABLE_REGNUM))))
24116 /* If emit_load_toc_table will use the link register, we need to save
24117 it. We use R12 for this purpose because emit_load_toc_table
24118 can use register 0. This allows us to use a plain 'blr' to return
24119 from the procedure more often. */
24120 int save_LR_around_toc_setup = (TARGET_ELF
24121 && DEFAULT_ABI == ABI_V4
24122 && flag_pic
24123 && ! info->lr_save_p
24124 && EDGE_COUNT (EXIT_BLOCK_PTR_FOR_FN (cfun)->preds) > 0);
24125 if (save_LR_around_toc_setup)
24127 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24128 rtx tmp = gen_rtx_REG (Pmode, 12);
24130 insn = emit_move_insn (tmp, lr);
24131 RTX_FRAME_RELATED_P (insn) = 1;
24133 rs6000_emit_load_toc_table (TRUE);
24135 insn = emit_move_insn (lr, tmp);
24136 add_reg_note (insn, REG_CFA_RESTORE, lr);
24137 RTX_FRAME_RELATED_P (insn) = 1;
24139 else
24140 rs6000_emit_load_toc_table (TRUE);
24143 #if TARGET_MACHO
24144 if (!TARGET_SINGLE_PIC_BASE
24145 && DEFAULT_ABI == ABI_DARWIN
24146 && flag_pic && crtl->uses_pic_offset_table)
24148 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24149 rtx src = gen_rtx_SYMBOL_REF (Pmode, MACHOPIC_FUNCTION_BASE_NAME);
24151 /* Save and restore LR locally around this call (in R0). */
24152 if (!info->lr_save_p)
24153 emit_move_insn (gen_rtx_REG (Pmode, 0), lr);
24155 emit_insn (gen_load_macho_picbase (src));
24157 emit_move_insn (gen_rtx_REG (Pmode,
24158 RS6000_PIC_OFFSET_TABLE_REGNUM),
24159 lr);
24161 if (!info->lr_save_p)
24162 emit_move_insn (lr, gen_rtx_REG (Pmode, 0));
24164 #endif
24166 /* If we need to, save the TOC register after doing the stack setup.
24167 Do not emit eh frame info for this save. The unwinder wants info,
24168 conceptually attached to instructions in this function, about
24169 register values in the caller of this function. This R2 may have
24170 already been changed from the value in the caller.
24171 We don't attempt to write accurate DWARF EH frame info for R2
24172 because code emitted by gcc for a (non-pointer) function call
24173 doesn't save and restore R2. Instead, R2 is managed out-of-line
24174 by a linker generated plt call stub when the function resides in
24175 a shared library. This behaviour is costly to describe in DWARF,
24176 both in terms of the size of DWARF info and the time taken in the
24177 unwinder to interpret it. R2 changes, apart from the
24178 calls_eh_return case earlier in this function, are handled by
24179 linux-unwind.h frob_update_context. */
24180 if (rs6000_save_toc_in_prologue_p ())
24182 rtx reg = gen_rtx_REG (reg_mode, TOC_REGNUM);
24183 emit_insn (gen_frame_store (reg, sp_reg_rtx, RS6000_TOC_SAVE_SLOT));
24187 /* Write function prologue. */
24189 static void
24190 rs6000_output_function_prologue (FILE *file,
24191 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
24193 rs6000_stack_t *info = rs6000_stack_info ();
24195 if (TARGET_DEBUG_STACK)
24196 debug_stack_info (info);
24198 /* Write .extern for any function we will call to save and restore
24199 fp values. */
24200 if (info->first_fp_reg_save < 64
24201 && !TARGET_MACHO
24202 && !TARGET_ELF)
24204 char *name;
24205 int regno = info->first_fp_reg_save - 32;
24207 if ((info->savres_strategy & SAVE_INLINE_FPRS) == 0)
24209 bool lr = (info->savres_strategy & SAVE_NOINLINE_FPRS_SAVES_LR) != 0;
24210 int sel = SAVRES_SAVE | SAVRES_FPR | (lr ? SAVRES_LR : 0);
24211 name = rs6000_savres_routine_name (info, regno, sel);
24212 fprintf (file, "\t.extern %s\n", name);
24214 if ((info->savres_strategy & REST_INLINE_FPRS) == 0)
24216 bool lr = (info->savres_strategy
24217 & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
24218 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24219 name = rs6000_savres_routine_name (info, regno, sel);
24220 fprintf (file, "\t.extern %s\n", name);
24224 /* ELFv2 ABI r2 setup code and local entry point. This must follow
24225 immediately after the global entry point label. */
24226 if (DEFAULT_ABI == ABI_ELFv2 && cfun->machine->r2_setup_needed)
24228 const char *name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
24230 fprintf (file, "0:\taddis 2,12,.TOC.-0b@ha\n");
24231 fprintf (file, "\taddi 2,2,.TOC.-0b@l\n");
24233 fputs ("\t.localentry\t", file);
24234 assemble_name (file, name);
24235 fputs (",.-", file);
24236 assemble_name (file, name);
24237 fputs ("\n", file);
24240 /* Output -mprofile-kernel code. This needs to be done here instead of
24241 in output_function_profile since it must go after the ELFv2 ABI
24242 local entry point. */
24243 if (TARGET_PROFILE_KERNEL && crtl->profile)
24245 gcc_assert (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2);
24246 gcc_assert (!TARGET_32BIT);
24248 asm_fprintf (file, "\tmflr %s\n", reg_names[0]);
24249 asm_fprintf (file, "\tstd %s,16(%s)\n", reg_names[0], reg_names[1]);
24251 /* In the ELFv2 ABI we have no compiler stack word. It must be
24252 the resposibility of _mcount to preserve the static chain
24253 register if required. */
24254 if (DEFAULT_ABI != ABI_ELFv2
24255 && cfun->static_chain_decl != NULL)
24257 asm_fprintf (file, "\tstd %s,24(%s)\n",
24258 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24259 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24260 asm_fprintf (file, "\tld %s,24(%s)\n",
24261 reg_names[STATIC_CHAIN_REGNUM], reg_names[1]);
24263 else
24264 fprintf (file, "\tbl %s\n", RS6000_MCOUNT);
24267 rs6000_pic_labelno++;
24270 /* Non-zero if vmx regs are restored before the frame pop, zero if
24271 we restore after the pop when possible. */
24272 #define ALWAYS_RESTORE_ALTIVEC_BEFORE_POP 0
24274 /* Restoring cr is a two step process: loading a reg from the frame
24275 save, then moving the reg to cr. For ABI_V4 we must let the
24276 unwinder know that the stack location is no longer valid at or
24277 before the stack deallocation, but we can't emit a cfa_restore for
24278 cr at the stack deallocation like we do for other registers.
24279 The trouble is that it is possible for the move to cr to be
24280 scheduled after the stack deallocation. So say exactly where cr
24281 is located on each of the two insns. */
24283 static rtx
24284 load_cr_save (int regno, rtx frame_reg_rtx, int offset, bool exit_func)
24286 rtx mem = gen_frame_mem_offset (SImode, frame_reg_rtx, offset);
24287 rtx reg = gen_rtx_REG (SImode, regno);
24288 rtx_insn *insn = emit_move_insn (reg, mem);
24290 if (!exit_func && DEFAULT_ABI == ABI_V4)
24292 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24293 rtx set = gen_rtx_SET (VOIDmode, reg, cr);
24295 add_reg_note (insn, REG_CFA_REGISTER, set);
24296 RTX_FRAME_RELATED_P (insn) = 1;
24298 return reg;
24301 /* Reload CR from REG. */
24303 static void
24304 restore_saved_cr (rtx reg, int using_mfcr_multiple, bool exit_func)
24306 int count = 0;
24307 int i;
24309 if (using_mfcr_multiple)
24311 for (i = 0; i < 8; i++)
24312 if (save_reg_p (CR0_REGNO + i))
24313 count++;
24314 gcc_assert (count);
24317 if (using_mfcr_multiple && count > 1)
24319 rtx_insn *insn;
24320 rtvec p;
24321 int ndx;
24323 p = rtvec_alloc (count);
24325 ndx = 0;
24326 for (i = 0; i < 8; i++)
24327 if (save_reg_p (CR0_REGNO + i))
24329 rtvec r = rtvec_alloc (2);
24330 RTVEC_ELT (r, 0) = reg;
24331 RTVEC_ELT (r, 1) = GEN_INT (1 << (7-i));
24332 RTVEC_ELT (p, ndx) =
24333 gen_rtx_SET (VOIDmode, gen_rtx_REG (CCmode, CR0_REGNO + i),
24334 gen_rtx_UNSPEC (CCmode, r, UNSPEC_MOVESI_TO_CR));
24335 ndx++;
24337 insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
24338 gcc_assert (ndx == count);
24340 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24341 CR field separately. */
24342 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24344 for (i = 0; i < 8; i++)
24345 if (save_reg_p (CR0_REGNO + i))
24346 add_reg_note (insn, REG_CFA_RESTORE,
24347 gen_rtx_REG (SImode, CR0_REGNO + i));
24349 RTX_FRAME_RELATED_P (insn) = 1;
24352 else
24353 for (i = 0; i < 8; i++)
24354 if (save_reg_p (CR0_REGNO + i))
24356 rtx insn = emit_insn (gen_movsi_to_cr_one
24357 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24359 /* For the ELFv2 ABI we generate a CFA_RESTORE for each
24360 CR field separately, attached to the insn that in fact
24361 restores this particular CR field. */
24362 if (!exit_func && DEFAULT_ABI == ABI_ELFv2 && flag_shrink_wrap)
24364 add_reg_note (insn, REG_CFA_RESTORE,
24365 gen_rtx_REG (SImode, CR0_REGNO + i));
24367 RTX_FRAME_RELATED_P (insn) = 1;
24371 /* For other ABIs, we just generate a single CFA_RESTORE for CR2. */
24372 if (!exit_func && DEFAULT_ABI != ABI_ELFv2
24373 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24375 rtx_insn *insn = get_last_insn ();
24376 rtx cr = gen_rtx_REG (SImode, CR2_REGNO);
24378 add_reg_note (insn, REG_CFA_RESTORE, cr);
24379 RTX_FRAME_RELATED_P (insn) = 1;
24383 /* Like cr, the move to lr instruction can be scheduled after the
24384 stack deallocation, but unlike cr, its stack frame save is still
24385 valid. So we only need to emit the cfa_restore on the correct
24386 instruction. */
24388 static void
24389 load_lr_save (int regno, rtx frame_reg_rtx, int offset)
24391 rtx mem = gen_frame_mem_offset (Pmode, frame_reg_rtx, offset);
24392 rtx reg = gen_rtx_REG (Pmode, regno);
24394 emit_move_insn (reg, mem);
24397 static void
24398 restore_saved_lr (int regno, bool exit_func)
24400 rtx reg = gen_rtx_REG (Pmode, regno);
24401 rtx lr = gen_rtx_REG (Pmode, LR_REGNO);
24402 rtx_insn *insn = emit_move_insn (lr, reg);
24404 if (!exit_func && flag_shrink_wrap)
24406 add_reg_note (insn, REG_CFA_RESTORE, lr);
24407 RTX_FRAME_RELATED_P (insn) = 1;
24411 static rtx
24412 add_crlr_cfa_restore (const rs6000_stack_t *info, rtx cfa_restores)
24414 if (DEFAULT_ABI == ABI_ELFv2)
24416 int i;
24417 for (i = 0; i < 8; i++)
24418 if (save_reg_p (CR0_REGNO + i))
24420 rtx cr = gen_rtx_REG (SImode, CR0_REGNO + i);
24421 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, cr,
24422 cfa_restores);
24425 else if (info->cr_save_p)
24426 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24427 gen_rtx_REG (SImode, CR2_REGNO),
24428 cfa_restores);
24430 if (info->lr_save_p)
24431 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24432 gen_rtx_REG (Pmode, LR_REGNO),
24433 cfa_restores);
24434 return cfa_restores;
24437 /* Return true if OFFSET from stack pointer can be clobbered by signals.
24438 V.4 doesn't have any stack cushion, AIX ABIs have 220 or 288 bytes
24439 below stack pointer not cloberred by signals. */
24441 static inline bool
24442 offset_below_red_zone_p (HOST_WIDE_INT offset)
24444 return offset < (DEFAULT_ABI == ABI_V4
24446 : TARGET_32BIT ? -220 : -288);
24449 /* Append CFA_RESTORES to any existing REG_NOTES on the last insn. */
24451 static void
24452 emit_cfa_restores (rtx cfa_restores)
24454 rtx_insn *insn = get_last_insn ();
24455 rtx *loc = &REG_NOTES (insn);
24457 while (*loc)
24458 loc = &XEXP (*loc, 1);
24459 *loc = cfa_restores;
24460 RTX_FRAME_RELATED_P (insn) = 1;
24463 /* Emit function epilogue as insns. */
24465 void
24466 rs6000_emit_epilogue (int sibcall)
24468 rs6000_stack_t *info;
24469 int restoring_GPRs_inline;
24470 int restoring_FPRs_inline;
24471 int using_load_multiple;
24472 int using_mtcr_multiple;
24473 int use_backchain_to_restore_sp;
24474 int restore_lr;
24475 int strategy;
24476 HOST_WIDE_INT frame_off = 0;
24477 rtx sp_reg_rtx = gen_rtx_REG (Pmode, 1);
24478 rtx frame_reg_rtx = sp_reg_rtx;
24479 rtx cfa_restores = NULL_RTX;
24480 rtx insn;
24481 rtx cr_save_reg = NULL_RTX;
24482 machine_mode reg_mode = Pmode;
24483 int reg_size = TARGET_32BIT ? 4 : 8;
24484 int i;
24485 bool exit_func;
24486 unsigned ptr_regno;
24488 info = rs6000_stack_info ();
24490 if (TARGET_SPE_ABI && info->spe_64bit_regs_used != 0)
24492 reg_mode = V2SImode;
24493 reg_size = 8;
24496 strategy = info->savres_strategy;
24497 using_load_multiple = strategy & SAVRES_MULTIPLE;
24498 restoring_FPRs_inline = sibcall || (strategy & REST_INLINE_FPRS);
24499 restoring_GPRs_inline = sibcall || (strategy & REST_INLINE_GPRS);
24500 using_mtcr_multiple = (rs6000_cpu == PROCESSOR_PPC601
24501 || rs6000_cpu == PROCESSOR_PPC603
24502 || rs6000_cpu == PROCESSOR_PPC750
24503 || optimize_size);
24504 /* Restore via the backchain when we have a large frame, since this
24505 is more efficient than an addis, addi pair. The second condition
24506 here will not trigger at the moment; We don't actually need a
24507 frame pointer for alloca, but the generic parts of the compiler
24508 give us one anyway. */
24509 use_backchain_to_restore_sp = (info->total_size > 32767 - info->lr_save_offset
24510 || (cfun->calls_alloca
24511 && !frame_pointer_needed));
24512 restore_lr = (info->lr_save_p
24513 && (restoring_FPRs_inline
24514 || (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR))
24515 && (restoring_GPRs_inline
24516 || info->first_fp_reg_save < 64));
24518 if (WORLD_SAVE_P (info))
24520 int i, j;
24521 char rname[30];
24522 const char *alloc_rname;
24523 rtvec p;
24525 /* eh_rest_world_r10 will return to the location saved in the LR
24526 stack slot (which is not likely to be our caller.)
24527 Input: R10 -- stack adjustment. Clobbers R0, R11, R12, R7, R8.
24528 rest_world is similar, except any R10 parameter is ignored.
24529 The exception-handling stuff that was here in 2.95 is no
24530 longer necessary. */
24532 p = rtvec_alloc (9
24534 + 32 - info->first_gp_reg_save
24535 + LAST_ALTIVEC_REGNO + 1 - info->first_altivec_reg_save
24536 + 63 + 1 - info->first_fp_reg_save);
24538 strcpy (rname, ((crtl->calls_eh_return) ?
24539 "*eh_rest_world_r10" : "*rest_world"));
24540 alloc_rname = ggc_strdup (rname);
24542 j = 0;
24543 RTVEC_ELT (p, j++) = ret_rtx;
24544 RTVEC_ELT (p, j++) = gen_rtx_USE (VOIDmode,
24545 gen_rtx_REG (Pmode,
24546 LR_REGNO));
24547 RTVEC_ELT (p, j++)
24548 = gen_rtx_USE (VOIDmode, gen_rtx_SYMBOL_REF (Pmode, alloc_rname));
24549 /* The instruction pattern requires a clobber here;
24550 it is shared with the restVEC helper. */
24551 RTVEC_ELT (p, j++)
24552 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 11));
24555 /* CR register traditionally saved as CR2. */
24556 rtx reg = gen_rtx_REG (SImode, CR2_REGNO);
24557 RTVEC_ELT (p, j++)
24558 = gen_frame_load (reg, frame_reg_rtx, info->cr_save_offset);
24559 if (flag_shrink_wrap)
24561 cfa_restores = alloc_reg_note (REG_CFA_RESTORE,
24562 gen_rtx_REG (Pmode, LR_REGNO),
24563 cfa_restores);
24564 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24568 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
24570 rtx reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
24571 RTVEC_ELT (p, j++)
24572 = gen_frame_load (reg,
24573 frame_reg_rtx, info->gp_save_offset + reg_size * i);
24574 if (flag_shrink_wrap)
24575 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24577 for (i = 0; info->first_altivec_reg_save + i <= LAST_ALTIVEC_REGNO; i++)
24579 rtx reg = gen_rtx_REG (V4SImode, info->first_altivec_reg_save + i);
24580 RTVEC_ELT (p, j++)
24581 = gen_frame_load (reg,
24582 frame_reg_rtx, info->altivec_save_offset + 16 * i);
24583 if (flag_shrink_wrap)
24584 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24586 for (i = 0; info->first_fp_reg_save + i <= 63; i++)
24588 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
24589 ? DFmode : SFmode),
24590 info->first_fp_reg_save + i);
24591 RTVEC_ELT (p, j++)
24592 = gen_frame_load (reg, frame_reg_rtx, info->fp_save_offset + 8 * i);
24593 if (flag_shrink_wrap)
24594 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24596 RTVEC_ELT (p, j++)
24597 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, 0));
24598 RTVEC_ELT (p, j++)
24599 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 12));
24600 RTVEC_ELT (p, j++)
24601 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 7));
24602 RTVEC_ELT (p, j++)
24603 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 8));
24604 RTVEC_ELT (p, j++)
24605 = gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, 10));
24606 insn = emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
24608 if (flag_shrink_wrap)
24610 REG_NOTES (insn) = cfa_restores;
24611 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24612 RTX_FRAME_RELATED_P (insn) = 1;
24614 return;
24617 /* frame_reg_rtx + frame_off points to the top of this stack frame. */
24618 if (info->push_p)
24619 frame_off = info->total_size;
24621 /* Restore AltiVec registers if we must do so before adjusting the
24622 stack. */
24623 if (TARGET_ALTIVEC_ABI
24624 && info->altivec_size != 0
24625 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24626 || (DEFAULT_ABI != ABI_V4
24627 && offset_below_red_zone_p (info->altivec_save_offset))))
24629 int i;
24630 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24632 gcc_checking_assert (scratch_regno == 11 || scratch_regno == 12);
24633 if (use_backchain_to_restore_sp)
24635 int frame_regno = 11;
24637 if ((strategy & REST_INLINE_VRS) == 0)
24639 /* Of r11 and r12, select the one not clobbered by an
24640 out-of-line restore function for the frame register. */
24641 frame_regno = 11 + 12 - scratch_regno;
24643 frame_reg_rtx = gen_rtx_REG (Pmode, frame_regno);
24644 emit_move_insn (frame_reg_rtx,
24645 gen_rtx_MEM (Pmode, sp_reg_rtx));
24646 frame_off = 0;
24648 else if (frame_pointer_needed)
24649 frame_reg_rtx = hard_frame_pointer_rtx;
24651 if ((strategy & REST_INLINE_VRS) == 0)
24653 int end_save = info->altivec_save_offset + info->altivec_size;
24654 int ptr_off;
24655 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24656 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24658 if (end_save + frame_off != 0)
24660 rtx offset = GEN_INT (end_save + frame_off);
24662 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24664 else
24665 emit_move_insn (ptr_reg, frame_reg_rtx);
24667 ptr_off = -end_save;
24668 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24669 info->altivec_save_offset + ptr_off,
24670 0, V4SImode, SAVRES_VR);
24672 else
24674 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24675 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24677 rtx addr, areg, mem, reg;
24679 areg = gen_rtx_REG (Pmode, 0);
24680 emit_move_insn
24681 (areg, GEN_INT (info->altivec_save_offset
24682 + frame_off
24683 + 16 * (i - info->first_altivec_reg_save)));
24685 /* AltiVec addressing mode is [reg+reg]. */
24686 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24687 mem = gen_frame_mem (V4SImode, addr);
24689 reg = gen_rtx_REG (V4SImode, i);
24690 emit_move_insn (reg, mem);
24694 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24695 if (((strategy & REST_INLINE_VRS) == 0
24696 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24697 && (flag_shrink_wrap
24698 || (offset_below_red_zone_p
24699 (info->altivec_save_offset
24700 + 16 * (i - info->first_altivec_reg_save)))))
24702 rtx reg = gen_rtx_REG (V4SImode, i);
24703 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24707 /* Restore VRSAVE if we must do so before adjusting the stack. */
24708 if (TARGET_ALTIVEC
24709 && TARGET_ALTIVEC_VRSAVE
24710 && info->vrsave_mask != 0
24711 && (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24712 || (DEFAULT_ABI != ABI_V4
24713 && offset_below_red_zone_p (info->vrsave_save_offset))))
24715 rtx reg;
24717 if (frame_reg_rtx == sp_reg_rtx)
24719 if (use_backchain_to_restore_sp)
24721 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24722 emit_move_insn (frame_reg_rtx,
24723 gen_rtx_MEM (Pmode, sp_reg_rtx));
24724 frame_off = 0;
24726 else if (frame_pointer_needed)
24727 frame_reg_rtx = hard_frame_pointer_rtx;
24730 reg = gen_rtx_REG (SImode, 12);
24731 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24732 info->vrsave_save_offset + frame_off));
24734 emit_insn (generate_set_vrsave (reg, info, 1));
24737 insn = NULL_RTX;
24738 /* If we have a large stack frame, restore the old stack pointer
24739 using the backchain. */
24740 if (use_backchain_to_restore_sp)
24742 if (frame_reg_rtx == sp_reg_rtx)
24744 /* Under V.4, don't reset the stack pointer until after we're done
24745 loading the saved registers. */
24746 if (DEFAULT_ABI == ABI_V4)
24747 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24749 insn = emit_move_insn (frame_reg_rtx,
24750 gen_rtx_MEM (Pmode, sp_reg_rtx));
24751 frame_off = 0;
24753 else if (ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24754 && DEFAULT_ABI == ABI_V4)
24755 /* frame_reg_rtx has been set up by the altivec restore. */
24757 else
24759 insn = emit_move_insn (sp_reg_rtx, frame_reg_rtx);
24760 frame_reg_rtx = sp_reg_rtx;
24763 /* If we have a frame pointer, we can restore the old stack pointer
24764 from it. */
24765 else if (frame_pointer_needed)
24767 frame_reg_rtx = sp_reg_rtx;
24768 if (DEFAULT_ABI == ABI_V4)
24769 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
24770 /* Prevent reordering memory accesses against stack pointer restore. */
24771 else if (cfun->calls_alloca
24772 || offset_below_red_zone_p (-info->total_size))
24773 rs6000_emit_stack_tie (frame_reg_rtx, true);
24775 insn = emit_insn (gen_add3_insn (frame_reg_rtx, hard_frame_pointer_rtx,
24776 GEN_INT (info->total_size)));
24777 frame_off = 0;
24779 else if (info->push_p
24780 && DEFAULT_ABI != ABI_V4
24781 && !crtl->calls_eh_return)
24783 /* Prevent reordering memory accesses against stack pointer restore. */
24784 if (cfun->calls_alloca
24785 || offset_below_red_zone_p (-info->total_size))
24786 rs6000_emit_stack_tie (frame_reg_rtx, false);
24787 insn = emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx,
24788 GEN_INT (info->total_size)));
24789 frame_off = 0;
24791 if (insn && frame_reg_rtx == sp_reg_rtx)
24793 if (cfa_restores)
24795 REG_NOTES (insn) = cfa_restores;
24796 cfa_restores = NULL_RTX;
24798 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
24799 RTX_FRAME_RELATED_P (insn) = 1;
24802 /* Restore AltiVec registers if we have not done so already. */
24803 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24804 && TARGET_ALTIVEC_ABI
24805 && info->altivec_size != 0
24806 && (DEFAULT_ABI == ABI_V4
24807 || !offset_below_red_zone_p (info->altivec_save_offset)))
24809 int i;
24811 if ((strategy & REST_INLINE_VRS) == 0)
24813 int end_save = info->altivec_save_offset + info->altivec_size;
24814 int ptr_off;
24815 rtx ptr_reg = gen_rtx_REG (Pmode, 0);
24816 int scratch_regno = ptr_regno_for_savres (SAVRES_VR);
24817 rtx scratch_reg = gen_rtx_REG (Pmode, scratch_regno);
24819 if (end_save + frame_off != 0)
24821 rtx offset = GEN_INT (end_save + frame_off);
24823 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx, offset));
24825 else
24826 emit_move_insn (ptr_reg, frame_reg_rtx);
24828 ptr_off = -end_save;
24829 insn = rs6000_emit_savres_rtx (info, scratch_reg,
24830 info->altivec_save_offset + ptr_off,
24831 0, V4SImode, SAVRES_VR);
24832 if (REGNO (frame_reg_rtx) == REGNO (scratch_reg))
24834 /* Frame reg was clobbered by out-of-line save. Restore it
24835 from ptr_reg, and if we are calling out-of-line gpr or
24836 fpr restore set up the correct pointer and offset. */
24837 unsigned newptr_regno = 1;
24838 if (!restoring_GPRs_inline)
24840 bool lr = info->gp_save_offset + info->gp_size == 0;
24841 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24842 newptr_regno = ptr_regno_for_savres (sel);
24843 end_save = info->gp_save_offset + info->gp_size;
24845 else if (!restoring_FPRs_inline)
24847 bool lr = !(strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR);
24848 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
24849 newptr_regno = ptr_regno_for_savres (sel);
24850 end_save = info->gp_save_offset + info->gp_size;
24853 if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
24854 frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
24856 if (end_save + ptr_off != 0)
24858 rtx offset = GEN_INT (end_save + ptr_off);
24860 frame_off = -end_save;
24861 emit_insn (gen_add3_insn (frame_reg_rtx, ptr_reg, offset));
24863 else
24865 frame_off = ptr_off;
24866 emit_move_insn (frame_reg_rtx, ptr_reg);
24870 else
24872 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24873 if (info->vrsave_mask & ALTIVEC_REG_BIT (i))
24875 rtx addr, areg, mem, reg;
24877 areg = gen_rtx_REG (Pmode, 0);
24878 emit_move_insn
24879 (areg, GEN_INT (info->altivec_save_offset
24880 + frame_off
24881 + 16 * (i - info->first_altivec_reg_save)));
24883 /* AltiVec addressing mode is [reg+reg]. */
24884 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, areg);
24885 mem = gen_frame_mem (V4SImode, addr);
24887 reg = gen_rtx_REG (V4SImode, i);
24888 emit_move_insn (reg, mem);
24892 for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i)
24893 if (((strategy & REST_INLINE_VRS) == 0
24894 || (info->vrsave_mask & ALTIVEC_REG_BIT (i)) != 0)
24895 && (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap))
24897 rtx reg = gen_rtx_REG (V4SImode, i);
24898 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
24902 /* Restore VRSAVE if we have not done so already. */
24903 if (!ALWAYS_RESTORE_ALTIVEC_BEFORE_POP
24904 && TARGET_ALTIVEC
24905 && TARGET_ALTIVEC_VRSAVE
24906 && info->vrsave_mask != 0
24907 && (DEFAULT_ABI == ABI_V4
24908 || !offset_below_red_zone_p (info->vrsave_save_offset)))
24910 rtx reg;
24912 reg = gen_rtx_REG (SImode, 12);
24913 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24914 info->vrsave_save_offset + frame_off));
24916 emit_insn (generate_set_vrsave (reg, info, 1));
24919 /* If we exit by an out-of-line restore function on ABI_V4 then that
24920 function will deallocate the stack, so we don't need to worry
24921 about the unwinder restoring cr from an invalid stack frame
24922 location. */
24923 exit_func = (!restoring_FPRs_inline
24924 || (!restoring_GPRs_inline
24925 && info->first_fp_reg_save == 64));
24927 /* In the ELFv2 ABI we need to restore all call-saved CR fields from
24928 *separate* slots if the routine calls __builtin_eh_return, so
24929 that they can be independently restored by the unwinder. */
24930 if (DEFAULT_ABI == ABI_ELFv2 && crtl->calls_eh_return)
24932 int i, cr_off = info->ehcr_offset;
24934 for (i = 0; i < 8; i++)
24935 if (!call_used_regs[CR0_REGNO + i])
24937 rtx reg = gen_rtx_REG (SImode, 0);
24938 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24939 cr_off + frame_off));
24941 insn = emit_insn (gen_movsi_to_cr_one
24942 (gen_rtx_REG (CCmode, CR0_REGNO + i), reg));
24944 if (!exit_func && flag_shrink_wrap)
24946 add_reg_note (insn, REG_CFA_RESTORE,
24947 gen_rtx_REG (SImode, CR0_REGNO + i));
24949 RTX_FRAME_RELATED_P (insn) = 1;
24952 cr_off += reg_size;
24956 /* Get the old lr if we saved it. If we are restoring registers
24957 out-of-line, then the out-of-line routines can do this for us. */
24958 if (restore_lr && restoring_GPRs_inline)
24959 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
24961 /* Get the old cr if we saved it. */
24962 if (info->cr_save_p)
24964 unsigned cr_save_regno = 12;
24966 if (!restoring_GPRs_inline)
24968 /* Ensure we don't use the register used by the out-of-line
24969 gpr register restore below. */
24970 bool lr = info->gp_save_offset + info->gp_size == 0;
24971 int sel = SAVRES_GPR | (lr ? SAVRES_LR : 0);
24972 int gpr_ptr_regno = ptr_regno_for_savres (sel);
24974 if (gpr_ptr_regno == 12)
24975 cr_save_regno = 11;
24976 gcc_checking_assert (REGNO (frame_reg_rtx) != cr_save_regno);
24978 else if (REGNO (frame_reg_rtx) == 12)
24979 cr_save_regno = 11;
24981 cr_save_reg = load_cr_save (cr_save_regno, frame_reg_rtx,
24982 info->cr_save_offset + frame_off,
24983 exit_func);
24986 /* Set LR here to try to overlap restores below. */
24987 if (restore_lr && restoring_GPRs_inline)
24988 restore_saved_lr (0, exit_func);
24990 /* Load exception handler data registers, if needed. */
24991 if (crtl->calls_eh_return)
24993 unsigned int i, regno;
24995 if (TARGET_AIX)
24997 rtx reg = gen_rtx_REG (reg_mode, 2);
24998 emit_insn (gen_frame_load (reg, frame_reg_rtx,
24999 frame_off + RS6000_TOC_SAVE_SLOT));
25002 for (i = 0; ; ++i)
25004 rtx mem;
25006 regno = EH_RETURN_DATA_REGNO (i);
25007 if (regno == INVALID_REGNUM)
25008 break;
25010 /* Note: possible use of r0 here to address SPE regs. */
25011 mem = gen_frame_mem_offset (reg_mode, frame_reg_rtx,
25012 info->ehrd_offset + frame_off
25013 + reg_size * (int) i);
25015 emit_move_insn (gen_rtx_REG (reg_mode, regno), mem);
25019 /* Restore GPRs. This is done as a PARALLEL if we are using
25020 the load-multiple instructions. */
25021 if (TARGET_SPE_ABI
25022 && info->spe_64bit_regs_used
25023 && info->first_gp_reg_save != 32)
25025 /* Determine whether we can address all of the registers that need
25026 to be saved with an offset from frame_reg_rtx that fits in
25027 the small const field for SPE memory instructions. */
25028 int spe_regs_addressable
25029 = (SPE_CONST_OFFSET_OK (info->spe_gp_save_offset + frame_off
25030 + reg_size * (32 - info->first_gp_reg_save - 1))
25031 && restoring_GPRs_inline);
25033 if (!spe_regs_addressable)
25035 int ool_adjust = 0;
25036 rtx old_frame_reg_rtx = frame_reg_rtx;
25037 /* Make r11 point to the start of the SPE save area. We worried about
25038 not clobbering it when we were saving registers in the prologue.
25039 There's no need to worry here because the static chain is passed
25040 anew to every function. */
25042 if (!restoring_GPRs_inline)
25043 ool_adjust = 8 * (info->first_gp_reg_save - FIRST_SAVED_GP_REGNO);
25044 frame_reg_rtx = gen_rtx_REG (Pmode, 11);
25045 emit_insn (gen_addsi3 (frame_reg_rtx, old_frame_reg_rtx,
25046 GEN_INT (info->spe_gp_save_offset
25047 + frame_off
25048 - ool_adjust)));
25049 /* Keep the invariant that frame_reg_rtx + frame_off points
25050 at the top of the stack frame. */
25051 frame_off = -info->spe_gp_save_offset + ool_adjust;
25054 if (restoring_GPRs_inline)
25056 HOST_WIDE_INT spe_offset = info->spe_gp_save_offset + frame_off;
25058 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25059 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25061 rtx offset, addr, mem, reg;
25063 /* We're doing all this to ensure that the immediate offset
25064 fits into the immediate field of 'evldd'. */
25065 gcc_assert (SPE_CONST_OFFSET_OK (spe_offset + reg_size * i));
25067 offset = GEN_INT (spe_offset + reg_size * i);
25068 addr = gen_rtx_PLUS (Pmode, frame_reg_rtx, offset);
25069 mem = gen_rtx_MEM (V2SImode, addr);
25070 reg = gen_rtx_REG (reg_mode, info->first_gp_reg_save + i);
25072 emit_move_insn (reg, mem);
25075 else
25076 rs6000_emit_savres_rtx (info, frame_reg_rtx,
25077 info->spe_gp_save_offset + frame_off,
25078 info->lr_save_offset + frame_off,
25079 reg_mode,
25080 SAVRES_GPR | SAVRES_LR);
25082 else if (!restoring_GPRs_inline)
25084 /* We are jumping to an out-of-line function. */
25085 rtx ptr_reg;
25086 int end_save = info->gp_save_offset + info->gp_size;
25087 bool can_use_exit = end_save == 0;
25088 int sel = SAVRES_GPR | (can_use_exit ? SAVRES_LR : 0);
25089 int ptr_off;
25091 /* Emit stack reset code if we need it. */
25092 ptr_regno = ptr_regno_for_savres (sel);
25093 ptr_reg = gen_rtx_REG (Pmode, ptr_regno);
25094 if (can_use_exit)
25095 rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25096 else if (end_save + frame_off != 0)
25097 emit_insn (gen_add3_insn (ptr_reg, frame_reg_rtx,
25098 GEN_INT (end_save + frame_off)));
25099 else if (REGNO (frame_reg_rtx) != ptr_regno)
25100 emit_move_insn (ptr_reg, frame_reg_rtx);
25101 if (REGNO (frame_reg_rtx) == ptr_regno)
25102 frame_off = -end_save;
25104 if (can_use_exit && info->cr_save_p)
25105 restore_saved_cr (cr_save_reg, using_mtcr_multiple, true);
25107 ptr_off = -end_save;
25108 rs6000_emit_savres_rtx (info, ptr_reg,
25109 info->gp_save_offset + ptr_off,
25110 info->lr_save_offset + ptr_off,
25111 reg_mode, sel);
25113 else if (using_load_multiple)
25115 rtvec p;
25116 p = rtvec_alloc (32 - info->first_gp_reg_save);
25117 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25118 RTVEC_ELT (p, i)
25119 = gen_frame_load (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25120 frame_reg_rtx,
25121 info->gp_save_offset + frame_off + reg_size * i);
25122 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
25124 else
25126 for (i = 0; i < 32 - info->first_gp_reg_save; i++)
25127 if (rs6000_reg_live_or_pic_offset_p (info->first_gp_reg_save + i))
25128 emit_insn (gen_frame_load
25129 (gen_rtx_REG (reg_mode, info->first_gp_reg_save + i),
25130 frame_reg_rtx,
25131 info->gp_save_offset + frame_off + reg_size * i));
25134 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25136 /* If the frame pointer was used then we can't delay emitting
25137 a REG_CFA_DEF_CFA note. This must happen on the insn that
25138 restores the frame pointer, r31. We may have already emitted
25139 a REG_CFA_DEF_CFA note, but that's OK; A duplicate is
25140 discarded by dwarf2cfi.c/dwarf2out.c, and in any case would
25141 be harmless if emitted. */
25142 if (frame_pointer_needed)
25144 insn = get_last_insn ();
25145 add_reg_note (insn, REG_CFA_DEF_CFA,
25146 plus_constant (Pmode, frame_reg_rtx, frame_off));
25147 RTX_FRAME_RELATED_P (insn) = 1;
25150 /* Set up cfa_restores. We always need these when
25151 shrink-wrapping. If not shrink-wrapping then we only need
25152 the cfa_restore when the stack location is no longer valid.
25153 The cfa_restores must be emitted on or before the insn that
25154 invalidates the stack, and of course must not be emitted
25155 before the insn that actually does the restore. The latter
25156 is why it is a bad idea to emit the cfa_restores as a group
25157 on the last instruction here that actually does a restore:
25158 That insn may be reordered with respect to others doing
25159 restores. */
25160 if (flag_shrink_wrap
25161 && !restoring_GPRs_inline
25162 && info->first_fp_reg_save == 64)
25163 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25165 for (i = info->first_gp_reg_save; i < 32; i++)
25166 if (!restoring_GPRs_inline
25167 || using_load_multiple
25168 || rs6000_reg_live_or_pic_offset_p (i))
25170 rtx reg = gen_rtx_REG (reg_mode, i);
25172 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25176 if (!restoring_GPRs_inline
25177 && info->first_fp_reg_save == 64)
25179 /* We are jumping to an out-of-line function. */
25180 if (cfa_restores)
25181 emit_cfa_restores (cfa_restores);
25182 return;
25185 if (restore_lr && !restoring_GPRs_inline)
25187 load_lr_save (0, frame_reg_rtx, info->lr_save_offset + frame_off);
25188 restore_saved_lr (0, exit_func);
25191 /* Restore fpr's if we need to do it without calling a function. */
25192 if (restoring_FPRs_inline)
25193 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25194 if (save_reg_p (info->first_fp_reg_save + i))
25196 rtx reg = gen_rtx_REG ((TARGET_HARD_FLOAT && TARGET_DOUBLE_FLOAT
25197 ? DFmode : SFmode),
25198 info->first_fp_reg_save + i);
25199 emit_insn (gen_frame_load (reg, frame_reg_rtx,
25200 info->fp_save_offset + frame_off + 8 * i));
25201 if (DEFAULT_ABI == ABI_V4 || flag_shrink_wrap)
25202 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg, cfa_restores);
25205 /* If we saved cr, restore it here. Just those that were used. */
25206 if (info->cr_save_p)
25207 restore_saved_cr (cr_save_reg, using_mtcr_multiple, exit_func);
25209 /* If this is V.4, unwind the stack pointer after all of the loads
25210 have been done, or set up r11 if we are restoring fp out of line. */
25211 ptr_regno = 1;
25212 if (!restoring_FPRs_inline)
25214 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25215 int sel = SAVRES_FPR | (lr ? SAVRES_LR : 0);
25216 ptr_regno = ptr_regno_for_savres (sel);
25219 insn = rs6000_emit_stack_reset (info, frame_reg_rtx, frame_off, ptr_regno);
25220 if (REGNO (frame_reg_rtx) == ptr_regno)
25221 frame_off = 0;
25223 if (insn && restoring_FPRs_inline)
25225 if (cfa_restores)
25227 REG_NOTES (insn) = cfa_restores;
25228 cfa_restores = NULL_RTX;
25230 add_reg_note (insn, REG_CFA_DEF_CFA, sp_reg_rtx);
25231 RTX_FRAME_RELATED_P (insn) = 1;
25234 if (crtl->calls_eh_return)
25236 rtx sa = EH_RETURN_STACKADJ_RTX;
25237 emit_insn (gen_add3_insn (sp_reg_rtx, sp_reg_rtx, sa));
25240 if (!sibcall)
25242 rtvec p;
25243 bool lr = (strategy & REST_NOINLINE_FPRS_DOESNT_RESTORE_LR) == 0;
25244 if (! restoring_FPRs_inline)
25246 p = rtvec_alloc (4 + 64 - info->first_fp_reg_save);
25247 RTVEC_ELT (p, 0) = ret_rtx;
25249 else
25251 if (cfa_restores)
25253 /* We can't hang the cfa_restores off a simple return,
25254 since the shrink-wrap code sometimes uses an existing
25255 return. This means there might be a path from
25256 pre-prologue code to this return, and dwarf2cfi code
25257 wants the eh_frame unwinder state to be the same on
25258 all paths to any point. So we need to emit the
25259 cfa_restores before the return. For -m64 we really
25260 don't need epilogue cfa_restores at all, except for
25261 this irritating dwarf2cfi with shrink-wrap
25262 requirement; The stack red-zone means eh_frame info
25263 from the prologue telling the unwinder to restore
25264 from the stack is perfectly good right to the end of
25265 the function. */
25266 emit_insn (gen_blockage ());
25267 emit_cfa_restores (cfa_restores);
25268 cfa_restores = NULL_RTX;
25270 p = rtvec_alloc (2);
25271 RTVEC_ELT (p, 0) = simple_return_rtx;
25274 RTVEC_ELT (p, 1) = ((restoring_FPRs_inline || !lr)
25275 ? gen_rtx_USE (VOIDmode,
25276 gen_rtx_REG (Pmode, LR_REGNO))
25277 : gen_rtx_CLOBBER (VOIDmode,
25278 gen_rtx_REG (Pmode, LR_REGNO)));
25280 /* If we have to restore more than two FP registers, branch to the
25281 restore function. It will return to our caller. */
25282 if (! restoring_FPRs_inline)
25284 int i;
25285 int reg;
25286 rtx sym;
25288 if (flag_shrink_wrap)
25289 cfa_restores = add_crlr_cfa_restore (info, cfa_restores);
25291 sym = rs6000_savres_routine_sym (info,
25292 SAVRES_FPR | (lr ? SAVRES_LR : 0));
25293 RTVEC_ELT (p, 2) = gen_rtx_USE (VOIDmode, sym);
25294 reg = (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)? 1 : 11;
25295 RTVEC_ELT (p, 3) = gen_rtx_USE (VOIDmode, gen_rtx_REG (Pmode, reg));
25297 for (i = 0; i < 64 - info->first_fp_reg_save; i++)
25299 rtx reg = gen_rtx_REG (DFmode, info->first_fp_reg_save + i);
25301 RTVEC_ELT (p, i + 4)
25302 = gen_frame_load (reg, sp_reg_rtx, info->fp_save_offset + 8 * i);
25303 if (flag_shrink_wrap)
25304 cfa_restores = alloc_reg_note (REG_CFA_RESTORE, reg,
25305 cfa_restores);
25309 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
25312 if (cfa_restores)
25314 if (sibcall)
25315 /* Ensure the cfa_restores are hung off an insn that won't
25316 be reordered above other restores. */
25317 emit_insn (gen_blockage ());
25319 emit_cfa_restores (cfa_restores);
25323 /* Write function epilogue. */
25325 static void
25326 rs6000_output_function_epilogue (FILE *file,
25327 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
25329 #if TARGET_MACHO
25330 macho_branch_islands ();
25331 /* Mach-O doesn't support labels at the end of objects, so if
25332 it looks like we might want one, insert a NOP. */
25334 rtx_insn *insn = get_last_insn ();
25335 rtx_insn *deleted_debug_label = NULL;
25336 while (insn
25337 && NOTE_P (insn)
25338 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
25340 /* Don't insert a nop for NOTE_INSN_DELETED_DEBUG_LABEL
25341 notes only, instead set their CODE_LABEL_NUMBER to -1,
25342 otherwise there would be code generation differences
25343 in between -g and -g0. */
25344 if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25345 deleted_debug_label = insn;
25346 insn = PREV_INSN (insn);
25348 if (insn
25349 && (LABEL_P (insn)
25350 || (NOTE_P (insn)
25351 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
25352 fputs ("\tnop\n", file);
25353 else if (deleted_debug_label)
25354 for (insn = deleted_debug_label; insn; insn = NEXT_INSN (insn))
25355 if (NOTE_KIND (insn) == NOTE_INSN_DELETED_DEBUG_LABEL)
25356 CODE_LABEL_NUMBER (insn) = -1;
25358 #endif
25360 /* Output a traceback table here. See /usr/include/sys/debug.h for info
25361 on its format.
25363 We don't output a traceback table if -finhibit-size-directive was
25364 used. The documentation for -finhibit-size-directive reads
25365 ``don't output a @code{.size} assembler directive, or anything
25366 else that would cause trouble if the function is split in the
25367 middle, and the two halves are placed at locations far apart in
25368 memory.'' The traceback table has this property, since it
25369 includes the offset from the start of the function to the
25370 traceback table itself.
25372 System V.4 Powerpc's (and the embedded ABI derived from it) use a
25373 different traceback table. */
25374 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
25375 && ! flag_inhibit_size_directive
25376 && rs6000_traceback != traceback_none && !cfun->is_thunk)
25378 const char *fname = NULL;
25379 const char *language_string = lang_hooks.name;
25380 int fixed_parms = 0, float_parms = 0, parm_info = 0;
25381 int i;
25382 int optional_tbtab;
25383 rs6000_stack_t *info = rs6000_stack_info ();
25385 if (rs6000_traceback == traceback_full)
25386 optional_tbtab = 1;
25387 else if (rs6000_traceback == traceback_part)
25388 optional_tbtab = 0;
25389 else
25390 optional_tbtab = !optimize_size && !TARGET_ELF;
25392 if (optional_tbtab)
25394 fname = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25395 while (*fname == '.') /* V.4 encodes . in the name */
25396 fname++;
25398 /* Need label immediately before tbtab, so we can compute
25399 its offset from the function start. */
25400 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25401 ASM_OUTPUT_LABEL (file, fname);
25404 /* The .tbtab pseudo-op can only be used for the first eight
25405 expressions, since it can't handle the possibly variable
25406 length fields that follow. However, if you omit the optional
25407 fields, the assembler outputs zeros for all optional fields
25408 anyways, giving each variable length field is minimum length
25409 (as defined in sys/debug.h). Thus we can not use the .tbtab
25410 pseudo-op at all. */
25412 /* An all-zero word flags the start of the tbtab, for debuggers
25413 that have to find it by searching forward from the entry
25414 point or from the current pc. */
25415 fputs ("\t.long 0\n", file);
25417 /* Tbtab format type. Use format type 0. */
25418 fputs ("\t.byte 0,", file);
25420 /* Language type. Unfortunately, there does not seem to be any
25421 official way to discover the language being compiled, so we
25422 use language_string.
25423 C is 0. Fortran is 1. Pascal is 2. Ada is 3. C++ is 9.
25424 Java is 13. Objective-C is 14. Objective-C++ isn't assigned
25425 a number, so for now use 9. LTO and Go aren't assigned numbers
25426 either, so for now use 0. */
25427 if (lang_GNU_C ()
25428 || ! strcmp (language_string, "GNU GIMPLE")
25429 || ! strcmp (language_string, "GNU Go"))
25430 i = 0;
25431 else if (! strcmp (language_string, "GNU F77")
25432 || ! strcmp (language_string, "GNU Fortran"))
25433 i = 1;
25434 else if (! strcmp (language_string, "GNU Pascal"))
25435 i = 2;
25436 else if (! strcmp (language_string, "GNU Ada"))
25437 i = 3;
25438 else if (lang_GNU_CXX ()
25439 || ! strcmp (language_string, "GNU Objective-C++"))
25440 i = 9;
25441 else if (! strcmp (language_string, "GNU Java"))
25442 i = 13;
25443 else if (! strcmp (language_string, "GNU Objective-C"))
25444 i = 14;
25445 else
25446 gcc_unreachable ();
25447 fprintf (file, "%d,", i);
25449 /* 8 single bit fields: global linkage (not set for C extern linkage,
25450 apparently a PL/I convention?), out-of-line epilogue/prologue, offset
25451 from start of procedure stored in tbtab, internal function, function
25452 has controlled storage, function has no toc, function uses fp,
25453 function logs/aborts fp operations. */
25454 /* Assume that fp operations are used if any fp reg must be saved. */
25455 fprintf (file, "%d,",
25456 (optional_tbtab << 5) | ((info->first_fp_reg_save != 64) << 1));
25458 /* 6 bitfields: function is interrupt handler, name present in
25459 proc table, function calls alloca, on condition directives
25460 (controls stack walks, 3 bits), saves condition reg, saves
25461 link reg. */
25462 /* The `function calls alloca' bit seems to be set whenever reg 31 is
25463 set up as a frame pointer, even when there is no alloca call. */
25464 fprintf (file, "%d,",
25465 ((optional_tbtab << 6)
25466 | ((optional_tbtab & frame_pointer_needed) << 5)
25467 | (info->cr_save_p << 1)
25468 | (info->lr_save_p)));
25470 /* 3 bitfields: saves backchain, fixup code, number of fpr saved
25471 (6 bits). */
25472 fprintf (file, "%d,",
25473 (info->push_p << 7) | (64 - info->first_fp_reg_save));
25475 /* 2 bitfields: spare bits (2 bits), number of gpr saved (6 bits). */
25476 fprintf (file, "%d,", (32 - first_reg_to_save ()));
25478 if (optional_tbtab)
25480 /* Compute the parameter info from the function decl argument
25481 list. */
25482 tree decl;
25483 int next_parm_info_bit = 31;
25485 for (decl = DECL_ARGUMENTS (current_function_decl);
25486 decl; decl = DECL_CHAIN (decl))
25488 rtx parameter = DECL_INCOMING_RTL (decl);
25489 machine_mode mode = GET_MODE (parameter);
25491 if (GET_CODE (parameter) == REG)
25493 if (SCALAR_FLOAT_MODE_P (mode))
25495 int bits;
25497 float_parms++;
25499 switch (mode)
25501 case SFmode:
25502 case SDmode:
25503 bits = 0x2;
25504 break;
25506 case DFmode:
25507 case DDmode:
25508 case TFmode:
25509 case TDmode:
25510 bits = 0x3;
25511 break;
25513 default:
25514 gcc_unreachable ();
25517 /* If only one bit will fit, don't or in this entry. */
25518 if (next_parm_info_bit > 0)
25519 parm_info |= (bits << (next_parm_info_bit - 1));
25520 next_parm_info_bit -= 2;
25522 else
25524 fixed_parms += ((GET_MODE_SIZE (mode)
25525 + (UNITS_PER_WORD - 1))
25526 / UNITS_PER_WORD);
25527 next_parm_info_bit -= 1;
25533 /* Number of fixed point parameters. */
25534 /* This is actually the number of words of fixed point parameters; thus
25535 an 8 byte struct counts as 2; and thus the maximum value is 8. */
25536 fprintf (file, "%d,", fixed_parms);
25538 /* 2 bitfields: number of floating point parameters (7 bits), parameters
25539 all on stack. */
25540 /* This is actually the number of fp registers that hold parameters;
25541 and thus the maximum value is 13. */
25542 /* Set parameters on stack bit if parameters are not in their original
25543 registers, regardless of whether they are on the stack? Xlc
25544 seems to set the bit when not optimizing. */
25545 fprintf (file, "%d\n", ((float_parms << 1) | (! optimize)));
25547 if (! optional_tbtab)
25548 return;
25550 /* Optional fields follow. Some are variable length. */
25552 /* Parameter types, left adjusted bit fields: 0 fixed, 10 single float,
25553 11 double float. */
25554 /* There is an entry for each parameter in a register, in the order that
25555 they occur in the parameter list. Any intervening arguments on the
25556 stack are ignored. If the list overflows a long (max possible length
25557 34 bits) then completely leave off all elements that don't fit. */
25558 /* Only emit this long if there was at least one parameter. */
25559 if (fixed_parms || float_parms)
25560 fprintf (file, "\t.long %d\n", parm_info);
25562 /* Offset from start of code to tb table. */
25563 fputs ("\t.long ", file);
25564 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LT");
25565 RS6000_OUTPUT_BASENAME (file, fname);
25566 putc ('-', file);
25567 rs6000_output_function_entry (file, fname);
25568 putc ('\n', file);
25570 /* Interrupt handler mask. */
25571 /* Omit this long, since we never set the interrupt handler bit
25572 above. */
25574 /* Number of CTL (controlled storage) anchors. */
25575 /* Omit this long, since the has_ctl bit is never set above. */
25577 /* Displacement into stack of each CTL anchor. */
25578 /* Omit this list of longs, because there are no CTL anchors. */
25580 /* Length of function name. */
25581 if (*fname == '*')
25582 ++fname;
25583 fprintf (file, "\t.short %d\n", (int) strlen (fname));
25585 /* Function name. */
25586 assemble_string (fname, strlen (fname));
25588 /* Register for alloca automatic storage; this is always reg 31.
25589 Only emit this if the alloca bit was set above. */
25590 if (frame_pointer_needed)
25591 fputs ("\t.byte 31\n", file);
25593 fputs ("\t.align 2\n", file);
25597 /* A C compound statement that outputs the assembler code for a thunk
25598 function, used to implement C++ virtual function calls with
25599 multiple inheritance. The thunk acts as a wrapper around a virtual
25600 function, adjusting the implicit object parameter before handing
25601 control off to the real function.
25603 First, emit code to add the integer DELTA to the location that
25604 contains the incoming first argument. Assume that this argument
25605 contains a pointer, and is the one used to pass the `this' pointer
25606 in C++. This is the incoming argument *before* the function
25607 prologue, e.g. `%o0' on a sparc. The addition must preserve the
25608 values of all other incoming arguments.
25610 After the addition, emit code to jump to FUNCTION, which is a
25611 `FUNCTION_DECL'. This is a direct pure jump, not a call, and does
25612 not touch the return address. Hence returning from FUNCTION will
25613 return to whoever called the current `thunk'.
25615 The effect must be as if FUNCTION had been called directly with the
25616 adjusted first argument. This macro is responsible for emitting
25617 all of the code for a thunk function; output_function_prologue()
25618 and output_function_epilogue() are not invoked.
25620 The THUNK_FNDECL is redundant. (DELTA and FUNCTION have already
25621 been extracted from it.) It might possibly be useful on some
25622 targets, but probably not.
25624 If you do not define this macro, the target-independent code in the
25625 C++ frontend will generate a less efficient heavyweight thunk that
25626 calls FUNCTION instead of jumping to it. The generic approach does
25627 not support varargs. */
25629 static void
25630 rs6000_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
25631 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
25632 tree function)
25634 rtx this_rtx, funexp;
25635 rtx_insn *insn;
25637 reload_completed = 1;
25638 epilogue_completed = 1;
25640 /* Mark the end of the (empty) prologue. */
25641 emit_note (NOTE_INSN_PROLOGUE_END);
25643 /* Find the "this" pointer. If the function returns a structure,
25644 the structure return pointer is in r3. */
25645 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
25646 this_rtx = gen_rtx_REG (Pmode, 4);
25647 else
25648 this_rtx = gen_rtx_REG (Pmode, 3);
25650 /* Apply the constant offset, if required. */
25651 if (delta)
25652 emit_insn (gen_add3_insn (this_rtx, this_rtx, GEN_INT (delta)));
25654 /* Apply the offset from the vtable, if required. */
25655 if (vcall_offset)
25657 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
25658 rtx tmp = gen_rtx_REG (Pmode, 12);
25660 emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
25661 if (((unsigned HOST_WIDE_INT) vcall_offset) + 0x8000 >= 0x10000)
25663 emit_insn (gen_add3_insn (tmp, tmp, vcall_offset_rtx));
25664 emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
25666 else
25668 rtx loc = gen_rtx_PLUS (Pmode, tmp, vcall_offset_rtx);
25670 emit_move_insn (tmp, gen_rtx_MEM (Pmode, loc));
25672 emit_insn (gen_add3_insn (this_rtx, this_rtx, tmp));
25675 /* Generate a tail call to the target function. */
25676 if (!TREE_USED (function))
25678 assemble_external (function);
25679 TREE_USED (function) = 1;
25681 funexp = XEXP (DECL_RTL (function), 0);
25682 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
25684 #if TARGET_MACHO
25685 if (MACHOPIC_INDIRECT)
25686 funexp = machopic_indirect_call_target (funexp);
25687 #endif
25689 /* gen_sibcall expects reload to convert scratch pseudo to LR so we must
25690 generate sibcall RTL explicitly. */
25691 insn = emit_call_insn (
25692 gen_rtx_PARALLEL (VOIDmode,
25693 gen_rtvec (4,
25694 gen_rtx_CALL (VOIDmode,
25695 funexp, const0_rtx),
25696 gen_rtx_USE (VOIDmode, const0_rtx),
25697 gen_rtx_USE (VOIDmode,
25698 gen_rtx_REG (SImode,
25699 LR_REGNO)),
25700 simple_return_rtx)));
25701 SIBLING_CALL_P (insn) = 1;
25702 emit_barrier ();
25704 /* Ensure we have a global entry point for the thunk. ??? We could
25705 avoid that if the target routine doesn't need a global entry point,
25706 but we do not know whether this is the case at this point. */
25707 if (DEFAULT_ABI == ABI_ELFv2)
25708 cfun->machine->r2_setup_needed = true;
25710 /* Run just enough of rest_of_compilation to get the insns emitted.
25711 There's not really enough bulk here to make other passes such as
25712 instruction scheduling worth while. Note that use_thunk calls
25713 assemble_start_function and assemble_end_function. */
25714 insn = get_insns ();
25715 shorten_branches (insn);
25716 final_start_function (insn, file, 1);
25717 final (insn, file, 1);
25718 final_end_function ();
25720 reload_completed = 0;
25721 epilogue_completed = 0;
25724 /* A quick summary of the various types of 'constant-pool tables'
25725 under PowerPC:
25727 Target Flags Name One table per
25728 AIX (none) AIX TOC object file
25729 AIX -mfull-toc AIX TOC object file
25730 AIX -mminimal-toc AIX minimal TOC translation unit
25731 SVR4/EABI (none) SVR4 SDATA object file
25732 SVR4/EABI -fpic SVR4 pic object file
25733 SVR4/EABI -fPIC SVR4 PIC translation unit
25734 SVR4/EABI -mrelocatable EABI TOC function
25735 SVR4/EABI -maix AIX TOC object file
25736 SVR4/EABI -maix -mminimal-toc
25737 AIX minimal TOC translation unit
25739 Name Reg. Set by entries contains:
25740 made by addrs? fp? sum?
25742 AIX TOC 2 crt0 as Y option option
25743 AIX minimal TOC 30 prolog gcc Y Y option
25744 SVR4 SDATA 13 crt0 gcc N Y N
25745 SVR4 pic 30 prolog ld Y not yet N
25746 SVR4 PIC 30 prolog gcc Y option option
25747 EABI TOC 30 prolog gcc Y option option
25751 /* Hash functions for the hash table. */
25753 static unsigned
25754 rs6000_hash_constant (rtx k)
25756 enum rtx_code code = GET_CODE (k);
25757 machine_mode mode = GET_MODE (k);
25758 unsigned result = (code << 3) ^ mode;
25759 const char *format;
25760 int flen, fidx;
25762 format = GET_RTX_FORMAT (code);
25763 flen = strlen (format);
25764 fidx = 0;
25766 switch (code)
25768 case LABEL_REF:
25769 return result * 1231 + (unsigned) INSN_UID (XEXP (k, 0));
25771 case CONST_WIDE_INT:
25773 int i;
25774 flen = CONST_WIDE_INT_NUNITS (k);
25775 for (i = 0; i < flen; i++)
25776 result = result * 613 + CONST_WIDE_INT_ELT (k, i);
25777 return result;
25780 case CONST_DOUBLE:
25781 if (mode != VOIDmode)
25782 return real_hash (CONST_DOUBLE_REAL_VALUE (k)) * result;
25783 flen = 2;
25784 break;
25786 case CODE_LABEL:
25787 fidx = 3;
25788 break;
25790 default:
25791 break;
25794 for (; fidx < flen; fidx++)
25795 switch (format[fidx])
25797 case 's':
25799 unsigned i, len;
25800 const char *str = XSTR (k, fidx);
25801 len = strlen (str);
25802 result = result * 613 + len;
25803 for (i = 0; i < len; i++)
25804 result = result * 613 + (unsigned) str[i];
25805 break;
25807 case 'u':
25808 case 'e':
25809 result = result * 1231 + rs6000_hash_constant (XEXP (k, fidx));
25810 break;
25811 case 'i':
25812 case 'n':
25813 result = result * 613 + (unsigned) XINT (k, fidx);
25814 break;
25815 case 'w':
25816 if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
25817 result = result * 613 + (unsigned) XWINT (k, fidx);
25818 else
25820 size_t i;
25821 for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
25822 result = result * 613 + (unsigned) (XWINT (k, fidx)
25823 >> CHAR_BIT * i);
25825 break;
25826 case '0':
25827 break;
25828 default:
25829 gcc_unreachable ();
25832 return result;
25835 hashval_t
25836 toc_hasher::hash (toc_hash_struct *thc)
25838 return rs6000_hash_constant (thc->key) ^ thc->key_mode;
25841 /* Compare H1 and H2 for equivalence. */
25843 bool
25844 toc_hasher::equal (toc_hash_struct *h1, toc_hash_struct *h2)
25846 rtx r1 = h1->key;
25847 rtx r2 = h2->key;
25849 if (h1->key_mode != h2->key_mode)
25850 return 0;
25852 return rtx_equal_p (r1, r2);
25855 /* These are the names given by the C++ front-end to vtables, and
25856 vtable-like objects. Ideally, this logic should not be here;
25857 instead, there should be some programmatic way of inquiring as
25858 to whether or not an object is a vtable. */
25860 #define VTABLE_NAME_P(NAME) \
25861 (strncmp ("_vt.", name, strlen ("_vt.")) == 0 \
25862 || strncmp ("_ZTV", name, strlen ("_ZTV")) == 0 \
25863 || strncmp ("_ZTT", name, strlen ("_ZTT")) == 0 \
25864 || strncmp ("_ZTI", name, strlen ("_ZTI")) == 0 \
25865 || strncmp ("_ZTC", name, strlen ("_ZTC")) == 0)
25867 #ifdef NO_DOLLAR_IN_LABEL
25868 /* Return a GGC-allocated character string translating dollar signs in
25869 input NAME to underscores. Used by XCOFF ASM_OUTPUT_LABELREF. */
25871 const char *
25872 rs6000_xcoff_strip_dollar (const char *name)
25874 char *strip, *p;
25875 const char *q;
25876 size_t len;
25878 q = (const char *) strchr (name, '$');
25880 if (q == 0 || q == name)
25881 return name;
25883 len = strlen (name);
25884 strip = XALLOCAVEC (char, len + 1);
25885 strcpy (strip, name);
25886 p = strip + (q - name);
25887 while (p)
25889 *p = '_';
25890 p = strchr (p + 1, '$');
25893 return ggc_alloc_string (strip, len);
25895 #endif
25897 void
25898 rs6000_output_symbol_ref (FILE *file, rtx x)
25900 /* Currently C++ toc references to vtables can be emitted before it
25901 is decided whether the vtable is public or private. If this is
25902 the case, then the linker will eventually complain that there is
25903 a reference to an unknown section. Thus, for vtables only,
25904 we emit the TOC reference to reference the symbol and not the
25905 section. */
25906 const char *name = XSTR (x, 0);
25908 if (VTABLE_NAME_P (name))
25910 RS6000_OUTPUT_BASENAME (file, name);
25912 else
25913 assemble_name (file, name);
25916 /* Output a TOC entry. We derive the entry name from what is being
25917 written. */
25919 void
25920 output_toc (FILE *file, rtx x, int labelno, machine_mode mode)
25922 char buf[256];
25923 const char *name = buf;
25924 rtx base = x;
25925 HOST_WIDE_INT offset = 0;
25927 gcc_assert (!TARGET_NO_TOC);
25929 /* When the linker won't eliminate them, don't output duplicate
25930 TOC entries (this happens on AIX if there is any kind of TOC,
25931 and on SVR4 under -fPIC or -mrelocatable). Don't do this for
25932 CODE_LABELs. */
25933 if (TARGET_TOC && GET_CODE (x) != LABEL_REF)
25935 struct toc_hash_struct *h;
25937 /* Create toc_hash_table. This can't be done at TARGET_OPTION_OVERRIDE
25938 time because GGC is not initialized at that point. */
25939 if (toc_hash_table == NULL)
25940 toc_hash_table = hash_table<toc_hasher>::create_ggc (1021);
25942 h = ggc_alloc<toc_hash_struct> ();
25943 h->key = x;
25944 h->key_mode = mode;
25945 h->labelno = labelno;
25947 toc_hash_struct **found = toc_hash_table->find_slot (h, INSERT);
25948 if (*found == NULL)
25949 *found = h;
25950 else /* This is indeed a duplicate.
25951 Set this label equal to that label. */
25953 fputs ("\t.set ", file);
25954 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25955 fprintf (file, "%d,", labelno);
25956 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LC");
25957 fprintf (file, "%d\n", ((*found)->labelno));
25959 #ifdef HAVE_AS_TLS
25960 if (TARGET_XCOFF && GET_CODE (x) == SYMBOL_REF
25961 && (SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_GLOBAL_DYNAMIC
25962 || SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC))
25964 fputs ("\t.set ", file);
25965 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25966 fprintf (file, "%d,", labelno);
25967 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (file, "LCM");
25968 fprintf (file, "%d\n", ((*found)->labelno));
25970 #endif
25971 return;
25975 /* If we're going to put a double constant in the TOC, make sure it's
25976 aligned properly when strict alignment is on. */
25977 if ((CONST_DOUBLE_P (x) || CONST_WIDE_INT_P (x))
25978 && STRICT_ALIGNMENT
25979 && GET_MODE_BITSIZE (mode) >= 64
25980 && ! (TARGET_NO_FP_IN_TOC && ! TARGET_MINIMAL_TOC)) {
25981 ASM_OUTPUT_ALIGN (file, 3);
25984 (*targetm.asm_out.internal_label) (file, "LC", labelno);
25986 /* Handle FP constants specially. Note that if we have a minimal
25987 TOC, things we put here aren't actually in the TOC, so we can allow
25988 FP constants. */
25989 if (GET_CODE (x) == CONST_DOUBLE &&
25990 (GET_MODE (x) == TFmode || GET_MODE (x) == TDmode))
25992 REAL_VALUE_TYPE rv;
25993 long k[4];
25995 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
25996 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
25997 REAL_VALUE_TO_TARGET_DECIMAL128 (rv, k);
25998 else
25999 REAL_VALUE_TO_TARGET_LONG_DOUBLE (rv, k);
26001 if (TARGET_64BIT)
26003 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26004 fputs (DOUBLE_INT_ASM_OP, file);
26005 else
26006 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26007 k[0] & 0xffffffff, k[1] & 0xffffffff,
26008 k[2] & 0xffffffff, k[3] & 0xffffffff);
26009 fprintf (file, "0x%lx%08lx,0x%lx%08lx\n",
26010 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26011 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff,
26012 k[WORDS_BIG_ENDIAN ? 2 : 3] & 0xffffffff,
26013 k[WORDS_BIG_ENDIAN ? 3 : 2] & 0xffffffff);
26014 return;
26016 else
26018 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26019 fputs ("\t.long ", file);
26020 else
26021 fprintf (file, "\t.tc FT_%lx_%lx_%lx_%lx[TC],",
26022 k[0] & 0xffffffff, k[1] & 0xffffffff,
26023 k[2] & 0xffffffff, k[3] & 0xffffffff);
26024 fprintf (file, "0x%lx,0x%lx,0x%lx,0x%lx\n",
26025 k[0] & 0xffffffff, k[1] & 0xffffffff,
26026 k[2] & 0xffffffff, k[3] & 0xffffffff);
26027 return;
26030 else if (GET_CODE (x) == CONST_DOUBLE &&
26031 (GET_MODE (x) == DFmode || GET_MODE (x) == DDmode))
26033 REAL_VALUE_TYPE rv;
26034 long k[2];
26036 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26038 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26039 REAL_VALUE_TO_TARGET_DECIMAL64 (rv, k);
26040 else
26041 REAL_VALUE_TO_TARGET_DOUBLE (rv, k);
26043 if (TARGET_64BIT)
26045 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26046 fputs (DOUBLE_INT_ASM_OP, file);
26047 else
26048 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26049 k[0] & 0xffffffff, k[1] & 0xffffffff);
26050 fprintf (file, "0x%lx%08lx\n",
26051 k[WORDS_BIG_ENDIAN ? 0 : 1] & 0xffffffff,
26052 k[WORDS_BIG_ENDIAN ? 1 : 0] & 0xffffffff);
26053 return;
26055 else
26057 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26058 fputs ("\t.long ", file);
26059 else
26060 fprintf (file, "\t.tc FD_%lx_%lx[TC],",
26061 k[0] & 0xffffffff, k[1] & 0xffffffff);
26062 fprintf (file, "0x%lx,0x%lx\n",
26063 k[0] & 0xffffffff, k[1] & 0xffffffff);
26064 return;
26067 else if (GET_CODE (x) == CONST_DOUBLE &&
26068 (GET_MODE (x) == SFmode || GET_MODE (x) == SDmode))
26070 REAL_VALUE_TYPE rv;
26071 long l;
26073 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
26074 if (DECIMAL_FLOAT_MODE_P (GET_MODE (x)))
26075 REAL_VALUE_TO_TARGET_DECIMAL32 (rv, l);
26076 else
26077 REAL_VALUE_TO_TARGET_SINGLE (rv, l);
26079 if (TARGET_64BIT)
26081 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26082 fputs (DOUBLE_INT_ASM_OP, file);
26083 else
26084 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26085 if (WORDS_BIG_ENDIAN)
26086 fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
26087 else
26088 fprintf (file, "0x%lx\n", l & 0xffffffff);
26089 return;
26091 else
26093 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26094 fputs ("\t.long ", file);
26095 else
26096 fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
26097 fprintf (file, "0x%lx\n", l & 0xffffffff);
26098 return;
26101 else if (GET_MODE (x) == VOIDmode && GET_CODE (x) == CONST_INT)
26103 unsigned HOST_WIDE_INT low;
26104 HOST_WIDE_INT high;
26106 low = INTVAL (x) & 0xffffffff;
26107 high = (HOST_WIDE_INT) INTVAL (x) >> 32;
26109 /* TOC entries are always Pmode-sized, so when big-endian
26110 smaller integer constants in the TOC need to be padded.
26111 (This is still a win over putting the constants in
26112 a separate constant pool, because then we'd have
26113 to have both a TOC entry _and_ the actual constant.)
26115 For a 32-bit target, CONST_INT values are loaded and shifted
26116 entirely within `low' and can be stored in one TOC entry. */
26118 /* It would be easy to make this work, but it doesn't now. */
26119 gcc_assert (!TARGET_64BIT || POINTER_SIZE >= GET_MODE_BITSIZE (mode));
26121 if (WORDS_BIG_ENDIAN && POINTER_SIZE > GET_MODE_BITSIZE (mode))
26123 low |= high << 32;
26124 low <<= POINTER_SIZE - GET_MODE_BITSIZE (mode);
26125 high = (HOST_WIDE_INT) low >> 32;
26126 low &= 0xffffffff;
26129 if (TARGET_64BIT)
26131 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26132 fputs (DOUBLE_INT_ASM_OP, file);
26133 else
26134 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26135 (long) high & 0xffffffff, (long) low & 0xffffffff);
26136 fprintf (file, "0x%lx%08lx\n",
26137 (long) high & 0xffffffff, (long) low & 0xffffffff);
26138 return;
26140 else
26142 if (POINTER_SIZE < GET_MODE_BITSIZE (mode))
26144 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26145 fputs ("\t.long ", file);
26146 else
26147 fprintf (file, "\t.tc ID_%lx_%lx[TC],",
26148 (long) high & 0xffffffff, (long) low & 0xffffffff);
26149 fprintf (file, "0x%lx,0x%lx\n",
26150 (long) high & 0xffffffff, (long) low & 0xffffffff);
26152 else
26154 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26155 fputs ("\t.long ", file);
26156 else
26157 fprintf (file, "\t.tc IS_%lx[TC],", (long) low & 0xffffffff);
26158 fprintf (file, "0x%lx\n", (long) low & 0xffffffff);
26160 return;
26164 if (GET_CODE (x) == CONST)
26166 gcc_assert (GET_CODE (XEXP (x, 0)) == PLUS
26167 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT);
26169 base = XEXP (XEXP (x, 0), 0);
26170 offset = INTVAL (XEXP (XEXP (x, 0), 1));
26173 switch (GET_CODE (base))
26175 case SYMBOL_REF:
26176 name = XSTR (base, 0);
26177 break;
26179 case LABEL_REF:
26180 ASM_GENERATE_INTERNAL_LABEL (buf, "L",
26181 CODE_LABEL_NUMBER (XEXP (base, 0)));
26182 break;
26184 case CODE_LABEL:
26185 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (base));
26186 break;
26188 default:
26189 gcc_unreachable ();
26192 if (TARGET_ELF || TARGET_MINIMAL_TOC)
26193 fputs (TARGET_32BIT ? "\t.long " : DOUBLE_INT_ASM_OP, file);
26194 else
26196 fputs ("\t.tc ", file);
26197 RS6000_OUTPUT_BASENAME (file, name);
26199 if (offset < 0)
26200 fprintf (file, ".N" HOST_WIDE_INT_PRINT_UNSIGNED, - offset);
26201 else if (offset)
26202 fprintf (file, ".P" HOST_WIDE_INT_PRINT_UNSIGNED, offset);
26204 /* Mark large TOC symbols on AIX with [TE] so they are mapped
26205 after other TOC symbols, reducing overflow of small TOC access
26206 to [TC] symbols. */
26207 fputs (TARGET_XCOFF && TARGET_CMODEL != CMODEL_SMALL
26208 ? "[TE]," : "[TC],", file);
26211 /* Currently C++ toc references to vtables can be emitted before it
26212 is decided whether the vtable is public or private. If this is
26213 the case, then the linker will eventually complain that there is
26214 a TOC reference to an unknown section. Thus, for vtables only,
26215 we emit the TOC reference to reference the symbol and not the
26216 section. */
26217 if (VTABLE_NAME_P (name))
26219 RS6000_OUTPUT_BASENAME (file, name);
26220 if (offset < 0)
26221 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset);
26222 else if (offset > 0)
26223 fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC, offset);
26225 else
26226 output_addr_const (file, x);
26228 #if HAVE_AS_TLS
26229 if (TARGET_XCOFF && GET_CODE (base) == SYMBOL_REF
26230 && SYMBOL_REF_TLS_MODEL (base) != 0)
26232 if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_EXEC)
26233 fputs ("@le", file);
26234 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_INITIAL_EXEC)
26235 fputs ("@ie", file);
26236 /* Use global-dynamic for local-dynamic. */
26237 else if (SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_GLOBAL_DYNAMIC
26238 || SYMBOL_REF_TLS_MODEL (base) == TLS_MODEL_LOCAL_DYNAMIC)
26240 putc ('\n', file);
26241 (*targetm.asm_out.internal_label) (file, "LCM", labelno);
26242 fputs ("\t.tc .", file);
26243 RS6000_OUTPUT_BASENAME (file, name);
26244 fputs ("[TC],", file);
26245 output_addr_const (file, x);
26246 fputs ("@m", file);
26249 #endif
26251 putc ('\n', file);
26254 /* Output an assembler pseudo-op to write an ASCII string of N characters
26255 starting at P to FILE.
26257 On the RS/6000, we have to do this using the .byte operation and
26258 write out special characters outside the quoted string.
26259 Also, the assembler is broken; very long strings are truncated,
26260 so we must artificially break them up early. */
26262 void
26263 output_ascii (FILE *file, const char *p, int n)
26265 char c;
26266 int i, count_string;
26267 const char *for_string = "\t.byte \"";
26268 const char *for_decimal = "\t.byte ";
26269 const char *to_close = NULL;
26271 count_string = 0;
26272 for (i = 0; i < n; i++)
26274 c = *p++;
26275 if (c >= ' ' && c < 0177)
26277 if (for_string)
26278 fputs (for_string, file);
26279 putc (c, file);
26281 /* Write two quotes to get one. */
26282 if (c == '"')
26284 putc (c, file);
26285 ++count_string;
26288 for_string = NULL;
26289 for_decimal = "\"\n\t.byte ";
26290 to_close = "\"\n";
26291 ++count_string;
26293 if (count_string >= 512)
26295 fputs (to_close, file);
26297 for_string = "\t.byte \"";
26298 for_decimal = "\t.byte ";
26299 to_close = NULL;
26300 count_string = 0;
26303 else
26305 if (for_decimal)
26306 fputs (for_decimal, file);
26307 fprintf (file, "%d", c);
26309 for_string = "\n\t.byte \"";
26310 for_decimal = ", ";
26311 to_close = "\n";
26312 count_string = 0;
26316 /* Now close the string if we have written one. Then end the line. */
26317 if (to_close)
26318 fputs (to_close, file);
26321 /* Generate a unique section name for FILENAME for a section type
26322 represented by SECTION_DESC. Output goes into BUF.
26324 SECTION_DESC can be any string, as long as it is different for each
26325 possible section type.
26327 We name the section in the same manner as xlc. The name begins with an
26328 underscore followed by the filename (after stripping any leading directory
26329 names) with the last period replaced by the string SECTION_DESC. If
26330 FILENAME does not contain a period, SECTION_DESC is appended to the end of
26331 the name. */
26333 void
26334 rs6000_gen_section_name (char **buf, const char *filename,
26335 const char *section_desc)
26337 const char *q, *after_last_slash, *last_period = 0;
26338 char *p;
26339 int len;
26341 after_last_slash = filename;
26342 for (q = filename; *q; q++)
26344 if (*q == '/')
26345 after_last_slash = q + 1;
26346 else if (*q == '.')
26347 last_period = q;
26350 len = strlen (after_last_slash) + strlen (section_desc) + 2;
26351 *buf = (char *) xmalloc (len);
26353 p = *buf;
26354 *p++ = '_';
26356 for (q = after_last_slash; *q; q++)
26358 if (q == last_period)
26360 strcpy (p, section_desc);
26361 p += strlen (section_desc);
26362 break;
26365 else if (ISALNUM (*q))
26366 *p++ = *q;
26369 if (last_period == 0)
26370 strcpy (p, section_desc);
26371 else
26372 *p = '\0';
26375 /* Emit profile function. */
26377 void
26378 output_profile_hook (int labelno ATTRIBUTE_UNUSED)
26380 /* Non-standard profiling for kernels, which just saves LR then calls
26381 _mcount without worrying about arg saves. The idea is to change
26382 the function prologue as little as possible as it isn't easy to
26383 account for arg save/restore code added just for _mcount. */
26384 if (TARGET_PROFILE_KERNEL)
26385 return;
26387 if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
26389 #ifndef NO_PROFILE_COUNTERS
26390 # define NO_PROFILE_COUNTERS 0
26391 #endif
26392 if (NO_PROFILE_COUNTERS)
26393 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26394 LCT_NORMAL, VOIDmode, 0);
26395 else
26397 char buf[30];
26398 const char *label_name;
26399 rtx fun;
26401 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26402 label_name = ggc_strdup ((*targetm.strip_name_encoding) (buf));
26403 fun = gen_rtx_SYMBOL_REF (Pmode, label_name);
26405 emit_library_call (init_one_libfunc (RS6000_MCOUNT),
26406 LCT_NORMAL, VOIDmode, 1, fun, Pmode);
26409 else if (DEFAULT_ABI == ABI_DARWIN)
26411 const char *mcount_name = RS6000_MCOUNT;
26412 int caller_addr_regno = LR_REGNO;
26414 /* Be conservative and always set this, at least for now. */
26415 crtl->uses_pic_offset_table = 1;
26417 #if TARGET_MACHO
26418 /* For PIC code, set up a stub and collect the caller's address
26419 from r0, which is where the prologue puts it. */
26420 if (MACHOPIC_INDIRECT
26421 && crtl->uses_pic_offset_table)
26422 caller_addr_regno = 0;
26423 #endif
26424 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, mcount_name),
26425 LCT_NORMAL, VOIDmode, 1,
26426 gen_rtx_REG (Pmode, caller_addr_regno), Pmode);
26430 /* Write function profiler code. */
26432 void
26433 output_function_profiler (FILE *file, int labelno)
26435 char buf[100];
26437 switch (DEFAULT_ABI)
26439 default:
26440 gcc_unreachable ();
26442 case ABI_V4:
26443 if (!TARGET_32BIT)
26445 warning (0, "no profiling of 64-bit code for this ABI");
26446 return;
26448 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
26449 fprintf (file, "\tmflr %s\n", reg_names[0]);
26450 if (NO_PROFILE_COUNTERS)
26452 asm_fprintf (file, "\tstw %s,4(%s)\n",
26453 reg_names[0], reg_names[1]);
26455 else if (TARGET_SECURE_PLT && flag_pic)
26457 if (TARGET_LINK_STACK)
26459 char name[32];
26460 get_ppc476_thunk_name (name);
26461 asm_fprintf (file, "\tbl %s\n", name);
26463 else
26464 asm_fprintf (file, "\tbcl 20,31,1f\n1:\n");
26465 asm_fprintf (file, "\tstw %s,4(%s)\n",
26466 reg_names[0], reg_names[1]);
26467 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26468 asm_fprintf (file, "\taddis %s,%s,",
26469 reg_names[12], reg_names[12]);
26470 assemble_name (file, buf);
26471 asm_fprintf (file, "-1b@ha\n\tla %s,", reg_names[0]);
26472 assemble_name (file, buf);
26473 asm_fprintf (file, "-1b@l(%s)\n", reg_names[12]);
26475 else if (flag_pic == 1)
26477 fputs ("\tbl _GLOBAL_OFFSET_TABLE_@local-4\n", file);
26478 asm_fprintf (file, "\tstw %s,4(%s)\n",
26479 reg_names[0], reg_names[1]);
26480 asm_fprintf (file, "\tmflr %s\n", reg_names[12]);
26481 asm_fprintf (file, "\tlwz %s,", reg_names[0]);
26482 assemble_name (file, buf);
26483 asm_fprintf (file, "@got(%s)\n", reg_names[12]);
26485 else if (flag_pic > 1)
26487 asm_fprintf (file, "\tstw %s,4(%s)\n",
26488 reg_names[0], reg_names[1]);
26489 /* Now, we need to get the address of the label. */
26490 if (TARGET_LINK_STACK)
26492 char name[32];
26493 get_ppc476_thunk_name (name);
26494 asm_fprintf (file, "\tbl %s\n\tb 1f\n\t.long ", name);
26495 assemble_name (file, buf);
26496 fputs ("-.\n1:", file);
26497 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26498 asm_fprintf (file, "\taddi %s,%s,4\n",
26499 reg_names[11], reg_names[11]);
26501 else
26503 fputs ("\tbcl 20,31,1f\n\t.long ", file);
26504 assemble_name (file, buf);
26505 fputs ("-.\n1:", file);
26506 asm_fprintf (file, "\tmflr %s\n", reg_names[11]);
26508 asm_fprintf (file, "\tlwz %s,0(%s)\n",
26509 reg_names[0], reg_names[11]);
26510 asm_fprintf (file, "\tadd %s,%s,%s\n",
26511 reg_names[0], reg_names[0], reg_names[11]);
26513 else
26515 asm_fprintf (file, "\tlis %s,", reg_names[12]);
26516 assemble_name (file, buf);
26517 fputs ("@ha\n", file);
26518 asm_fprintf (file, "\tstw %s,4(%s)\n",
26519 reg_names[0], reg_names[1]);
26520 asm_fprintf (file, "\tla %s,", reg_names[0]);
26521 assemble_name (file, buf);
26522 asm_fprintf (file, "@l(%s)\n", reg_names[12]);
26525 /* ABI_V4 saves the static chain reg with ASM_OUTPUT_REG_PUSH. */
26526 fprintf (file, "\tbl %s%s\n",
26527 RS6000_MCOUNT, flag_pic ? "@plt" : "");
26528 break;
26530 case ABI_AIX:
26531 case ABI_ELFv2:
26532 case ABI_DARWIN:
26533 /* Don't do anything, done in output_profile_hook (). */
26534 break;
26540 /* The following variable value is the last issued insn. */
26542 static rtx last_scheduled_insn;
26544 /* The following variable helps to balance issuing of load and
26545 store instructions */
26547 static int load_store_pendulum;
26549 /* Power4 load update and store update instructions are cracked into a
26550 load or store and an integer insn which are executed in the same cycle.
26551 Branches have their own dispatch slot which does not count against the
26552 GCC issue rate, but it changes the program flow so there are no other
26553 instructions to issue in this cycle. */
26555 static int
26556 rs6000_variable_issue_1 (rtx_insn *insn, int more)
26558 last_scheduled_insn = insn;
26559 if (GET_CODE (PATTERN (insn)) == USE
26560 || GET_CODE (PATTERN (insn)) == CLOBBER)
26562 cached_can_issue_more = more;
26563 return cached_can_issue_more;
26566 if (insn_terminates_group_p (insn, current_group))
26568 cached_can_issue_more = 0;
26569 return cached_can_issue_more;
26572 /* If no reservation, but reach here */
26573 if (recog_memoized (insn) < 0)
26574 return more;
26576 if (rs6000_sched_groups)
26578 if (is_microcoded_insn (insn))
26579 cached_can_issue_more = 0;
26580 else if (is_cracked_insn (insn))
26581 cached_can_issue_more = more > 2 ? more - 2 : 0;
26582 else
26583 cached_can_issue_more = more - 1;
26585 return cached_can_issue_more;
26588 if (rs6000_cpu_attr == CPU_CELL && is_nonpipeline_insn (insn))
26589 return 0;
26591 cached_can_issue_more = more - 1;
26592 return cached_can_issue_more;
26595 static int
26596 rs6000_variable_issue (FILE *stream, int verbose, rtx_insn *insn, int more)
26598 int r = rs6000_variable_issue_1 (insn, more);
26599 if (verbose)
26600 fprintf (stream, "// rs6000_variable_issue (more = %d) = %d\n", more, r);
26601 return r;
26604 /* Adjust the cost of a scheduling dependency. Return the new cost of
26605 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
26607 static int
26608 rs6000_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
26610 enum attr_type attr_type;
26612 if (! recog_memoized (insn))
26613 return 0;
26615 switch (REG_NOTE_KIND (link))
26617 case REG_DEP_TRUE:
26619 /* Data dependency; DEP_INSN writes a register that INSN reads
26620 some cycles later. */
26622 /* Separate a load from a narrower, dependent store. */
26623 if (rs6000_sched_groups
26624 && GET_CODE (PATTERN (insn)) == SET
26625 && GET_CODE (PATTERN (dep_insn)) == SET
26626 && GET_CODE (XEXP (PATTERN (insn), 1)) == MEM
26627 && GET_CODE (XEXP (PATTERN (dep_insn), 0)) == MEM
26628 && (GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (insn), 1)))
26629 > GET_MODE_SIZE (GET_MODE (XEXP (PATTERN (dep_insn), 0)))))
26630 return cost + 14;
26632 attr_type = get_attr_type (insn);
26634 switch (attr_type)
26636 case TYPE_JMPREG:
26637 /* Tell the first scheduling pass about the latency between
26638 a mtctr and bctr (and mtlr and br/blr). The first
26639 scheduling pass will not know about this latency since
26640 the mtctr instruction, which has the latency associated
26641 to it, will be generated by reload. */
26642 return 4;
26643 case TYPE_BRANCH:
26644 /* Leave some extra cycles between a compare and its
26645 dependent branch, to inhibit expensive mispredicts. */
26646 if ((rs6000_cpu_attr == CPU_PPC603
26647 || rs6000_cpu_attr == CPU_PPC604
26648 || rs6000_cpu_attr == CPU_PPC604E
26649 || rs6000_cpu_attr == CPU_PPC620
26650 || rs6000_cpu_attr == CPU_PPC630
26651 || rs6000_cpu_attr == CPU_PPC750
26652 || rs6000_cpu_attr == CPU_PPC7400
26653 || rs6000_cpu_attr == CPU_PPC7450
26654 || rs6000_cpu_attr == CPU_PPCE5500
26655 || rs6000_cpu_attr == CPU_PPCE6500
26656 || rs6000_cpu_attr == CPU_POWER4
26657 || rs6000_cpu_attr == CPU_POWER5
26658 || rs6000_cpu_attr == CPU_POWER7
26659 || rs6000_cpu_attr == CPU_POWER8
26660 || rs6000_cpu_attr == CPU_CELL)
26661 && recog_memoized (dep_insn)
26662 && (INSN_CODE (dep_insn) >= 0))
26664 switch (get_attr_type (dep_insn))
26666 case TYPE_CMP:
26667 case TYPE_COMPARE:
26668 case TYPE_FPCOMPARE:
26669 case TYPE_CR_LOGICAL:
26670 case TYPE_DELAYED_CR:
26671 return cost + 2;
26672 case TYPE_EXTS:
26673 case TYPE_MUL:
26674 if (get_attr_dot (dep_insn) == DOT_YES)
26675 return cost + 2;
26676 else
26677 break;
26678 case TYPE_SHIFT:
26679 if (get_attr_dot (dep_insn) == DOT_YES
26680 && get_attr_var_shift (dep_insn) == VAR_SHIFT_NO)
26681 return cost + 2;
26682 else
26683 break;
26684 default:
26685 break;
26687 break;
26689 case TYPE_STORE:
26690 case TYPE_FPSTORE:
26691 if ((rs6000_cpu == PROCESSOR_POWER6)
26692 && recog_memoized (dep_insn)
26693 && (INSN_CODE (dep_insn) >= 0))
26696 if (GET_CODE (PATTERN (insn)) != SET)
26697 /* If this happens, we have to extend this to schedule
26698 optimally. Return default for now. */
26699 return cost;
26701 /* Adjust the cost for the case where the value written
26702 by a fixed point operation is used as the address
26703 gen value on a store. */
26704 switch (get_attr_type (dep_insn))
26706 case TYPE_LOAD:
26707 case TYPE_CNTLZ:
26709 if (! store_data_bypass_p (dep_insn, insn))
26710 return get_attr_sign_extend (dep_insn)
26711 == SIGN_EXTEND_YES ? 6 : 4;
26712 break;
26714 case TYPE_SHIFT:
26716 if (! store_data_bypass_p (dep_insn, insn))
26717 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26718 6 : 3;
26719 break;
26721 case TYPE_INTEGER:
26722 case TYPE_ADD:
26723 case TYPE_LOGICAL:
26724 case TYPE_COMPARE:
26725 case TYPE_EXTS:
26726 case TYPE_INSERT:
26728 if (! store_data_bypass_p (dep_insn, insn))
26729 return 3;
26730 break;
26732 case TYPE_STORE:
26733 case TYPE_FPLOAD:
26734 case TYPE_FPSTORE:
26736 if (get_attr_update (dep_insn) == UPDATE_YES
26737 && ! store_data_bypass_p (dep_insn, insn))
26738 return 3;
26739 break;
26741 case TYPE_MUL:
26743 if (! store_data_bypass_p (dep_insn, insn))
26744 return 17;
26745 break;
26747 case TYPE_DIV:
26749 if (! store_data_bypass_p (dep_insn, insn))
26750 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26751 break;
26753 default:
26754 break;
26757 break;
26759 case TYPE_LOAD:
26760 if ((rs6000_cpu == PROCESSOR_POWER6)
26761 && recog_memoized (dep_insn)
26762 && (INSN_CODE (dep_insn) >= 0))
26765 /* Adjust the cost for the case where the value written
26766 by a fixed point instruction is used within the address
26767 gen portion of a subsequent load(u)(x) */
26768 switch (get_attr_type (dep_insn))
26770 case TYPE_LOAD:
26771 case TYPE_CNTLZ:
26773 if (set_to_load_agen (dep_insn, insn))
26774 return get_attr_sign_extend (dep_insn)
26775 == SIGN_EXTEND_YES ? 6 : 4;
26776 break;
26778 case TYPE_SHIFT:
26780 if (set_to_load_agen (dep_insn, insn))
26781 return get_attr_var_shift (dep_insn) == VAR_SHIFT_YES ?
26782 6 : 3;
26783 break;
26785 case TYPE_INTEGER:
26786 case TYPE_ADD:
26787 case TYPE_LOGICAL:
26788 case TYPE_COMPARE:
26789 case TYPE_EXTS:
26790 case TYPE_INSERT:
26792 if (set_to_load_agen (dep_insn, insn))
26793 return 3;
26794 break;
26796 case TYPE_STORE:
26797 case TYPE_FPLOAD:
26798 case TYPE_FPSTORE:
26800 if (get_attr_update (dep_insn) == UPDATE_YES
26801 && set_to_load_agen (dep_insn, insn))
26802 return 3;
26803 break;
26805 case TYPE_MUL:
26807 if (set_to_load_agen (dep_insn, insn))
26808 return 17;
26809 break;
26811 case TYPE_DIV:
26813 if (set_to_load_agen (dep_insn, insn))
26814 return get_attr_size (dep_insn) == SIZE_32 ? 45 : 57;
26815 break;
26817 default:
26818 break;
26821 break;
26823 case TYPE_FPLOAD:
26824 if ((rs6000_cpu == PROCESSOR_POWER6)
26825 && get_attr_update (insn) == UPDATE_NO
26826 && recog_memoized (dep_insn)
26827 && (INSN_CODE (dep_insn) >= 0)
26828 && (get_attr_type (dep_insn) == TYPE_MFFGPR))
26829 return 2;
26831 default:
26832 break;
26835 /* Fall out to return default cost. */
26837 break;
26839 case REG_DEP_OUTPUT:
26840 /* Output dependency; DEP_INSN writes a register that INSN writes some
26841 cycles later. */
26842 if ((rs6000_cpu == PROCESSOR_POWER6)
26843 && recog_memoized (dep_insn)
26844 && (INSN_CODE (dep_insn) >= 0))
26846 attr_type = get_attr_type (insn);
26848 switch (attr_type)
26850 case TYPE_FP:
26851 if (get_attr_type (dep_insn) == TYPE_FP)
26852 return 1;
26853 break;
26854 case TYPE_FPLOAD:
26855 if (get_attr_update (insn) == UPDATE_NO
26856 && get_attr_type (dep_insn) == TYPE_MFFGPR)
26857 return 2;
26858 break;
26859 default:
26860 break;
26863 case REG_DEP_ANTI:
26864 /* Anti dependency; DEP_INSN reads a register that INSN writes some
26865 cycles later. */
26866 return 0;
26868 default:
26869 gcc_unreachable ();
26872 return cost;
26875 /* Debug version of rs6000_adjust_cost. */
26877 static int
26878 rs6000_debug_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn,
26879 int cost)
26881 int ret = rs6000_adjust_cost (insn, link, dep_insn, cost);
26883 if (ret != cost)
26885 const char *dep;
26887 switch (REG_NOTE_KIND (link))
26889 default: dep = "unknown depencency"; break;
26890 case REG_DEP_TRUE: dep = "data dependency"; break;
26891 case REG_DEP_OUTPUT: dep = "output dependency"; break;
26892 case REG_DEP_ANTI: dep = "anti depencency"; break;
26895 fprintf (stderr,
26896 "\nrs6000_adjust_cost, final cost = %d, orig cost = %d, "
26897 "%s, insn:\n", ret, cost, dep);
26899 debug_rtx (insn);
26902 return ret;
26905 /* The function returns a true if INSN is microcoded.
26906 Return false otherwise. */
26908 static bool
26909 is_microcoded_insn (rtx_insn *insn)
26911 if (!insn || !NONDEBUG_INSN_P (insn)
26912 || GET_CODE (PATTERN (insn)) == USE
26913 || GET_CODE (PATTERN (insn)) == CLOBBER)
26914 return false;
26916 if (rs6000_cpu_attr == CPU_CELL)
26917 return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
26919 if (rs6000_sched_groups
26920 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26922 enum attr_type type = get_attr_type (insn);
26923 if ((type == TYPE_LOAD
26924 && get_attr_update (insn) == UPDATE_YES
26925 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES)
26926 || ((type == TYPE_LOAD || type == TYPE_STORE)
26927 && get_attr_update (insn) == UPDATE_YES
26928 && get_attr_indexed (insn) == INDEXED_YES)
26929 || type == TYPE_MFCR)
26930 return true;
26933 return false;
26936 /* The function returns true if INSN is cracked into 2 instructions
26937 by the processor (and therefore occupies 2 issue slots). */
26939 static bool
26940 is_cracked_insn (rtx_insn *insn)
26942 if (!insn || !NONDEBUG_INSN_P (insn)
26943 || GET_CODE (PATTERN (insn)) == USE
26944 || GET_CODE (PATTERN (insn)) == CLOBBER)
26945 return false;
26947 if (rs6000_sched_groups
26948 && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
26950 enum attr_type type = get_attr_type (insn);
26951 if ((type == TYPE_LOAD
26952 && get_attr_sign_extend (insn) == SIGN_EXTEND_YES
26953 && get_attr_update (insn) == UPDATE_NO)
26954 || (type == TYPE_LOAD
26955 && get_attr_sign_extend (insn) == SIGN_EXTEND_NO
26956 && get_attr_update (insn) == UPDATE_YES
26957 && get_attr_indexed (insn) == INDEXED_NO)
26958 || (type == TYPE_STORE
26959 && get_attr_update (insn) == UPDATE_YES
26960 && get_attr_indexed (insn) == INDEXED_NO)
26961 || ((type == TYPE_FPLOAD || type == TYPE_FPSTORE)
26962 && get_attr_update (insn) == UPDATE_YES)
26963 || type == TYPE_DELAYED_CR
26964 || type == TYPE_COMPARE
26965 || (type == TYPE_EXTS
26966 && get_attr_dot (insn) == DOT_YES)
26967 || (type == TYPE_SHIFT
26968 && get_attr_dot (insn) == DOT_YES
26969 && get_attr_var_shift (insn) == VAR_SHIFT_NO)
26970 || (type == TYPE_MUL
26971 && get_attr_dot (insn) == DOT_YES)
26972 || type == TYPE_DIV
26973 || (type == TYPE_INSERT
26974 && get_attr_size (insn) == SIZE_32))
26975 return true;
26978 return false;
26981 /* The function returns true if INSN can be issued only from
26982 the branch slot. */
26984 static bool
26985 is_branch_slot_insn (rtx_insn *insn)
26987 if (!insn || !NONDEBUG_INSN_P (insn)
26988 || GET_CODE (PATTERN (insn)) == USE
26989 || GET_CODE (PATTERN (insn)) == CLOBBER)
26990 return false;
26992 if (rs6000_sched_groups)
26994 enum attr_type type = get_attr_type (insn);
26995 if (type == TYPE_BRANCH || type == TYPE_JMPREG)
26996 return true;
26997 return false;
27000 return false;
27003 /* The function returns true if out_inst sets a value that is
27004 used in the address generation computation of in_insn */
27005 static bool
27006 set_to_load_agen (rtx_insn *out_insn, rtx_insn *in_insn)
27008 rtx out_set, in_set;
27010 /* For performance reasons, only handle the simple case where
27011 both loads are a single_set. */
27012 out_set = single_set (out_insn);
27013 if (out_set)
27015 in_set = single_set (in_insn);
27016 if (in_set)
27017 return reg_mentioned_p (SET_DEST (out_set), SET_SRC (in_set));
27020 return false;
27023 /* Try to determine base/offset/size parts of the given MEM.
27024 Return true if successful, false if all the values couldn't
27025 be determined.
27027 This function only looks for REG or REG+CONST address forms.
27028 REG+REG address form will return false. */
27030 static bool
27031 get_memref_parts (rtx mem, rtx *base, HOST_WIDE_INT *offset,
27032 HOST_WIDE_INT *size)
27034 rtx addr_rtx;
27035 if MEM_SIZE_KNOWN_P (mem)
27036 *size = MEM_SIZE (mem);
27037 else
27038 return false;
27040 if (GET_CODE (XEXP (mem, 0)) == PRE_MODIFY)
27041 addr_rtx = XEXP (XEXP (mem, 0), 1);
27042 else
27043 addr_rtx = (XEXP (mem, 0));
27045 if (GET_CODE (addr_rtx) == REG)
27047 *base = addr_rtx;
27048 *offset = 0;
27050 else if (GET_CODE (addr_rtx) == PLUS
27051 && CONST_INT_P (XEXP (addr_rtx, 1)))
27053 *base = XEXP (addr_rtx, 0);
27054 *offset = INTVAL (XEXP (addr_rtx, 1));
27056 else
27057 return false;
27059 return true;
27062 /* The function returns true if the target storage location of
27063 mem1 is adjacent to the target storage location of mem2 */
27064 /* Return 1 if memory locations are adjacent. */
27066 static bool
27067 adjacent_mem_locations (rtx mem1, rtx mem2)
27069 rtx reg1, reg2;
27070 HOST_WIDE_INT off1, size1, off2, size2;
27072 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27073 && get_memref_parts (mem2, &reg2, &off2, &size2))
27074 return ((REGNO (reg1) == REGNO (reg2))
27075 && ((off1 + size1 == off2)
27076 || (off2 + size2 == off1)));
27078 return false;
27081 /* This function returns true if it can be determined that the two MEM
27082 locations overlap by at least 1 byte based on base reg/offset/size. */
27084 static bool
27085 mem_locations_overlap (rtx mem1, rtx mem2)
27087 rtx reg1, reg2;
27088 HOST_WIDE_INT off1, size1, off2, size2;
27090 if (get_memref_parts (mem1, &reg1, &off1, &size1)
27091 && get_memref_parts (mem2, &reg2, &off2, &size2))
27092 return ((REGNO (reg1) == REGNO (reg2))
27093 && (((off1 <= off2) && (off1 + size1 > off2))
27094 || ((off2 <= off1) && (off2 + size2 > off1))));
27096 return false;
27099 /* A C statement (sans semicolon) to update the integer scheduling
27100 priority INSN_PRIORITY (INSN). Increase the priority to execute the
27101 INSN earlier, reduce the priority to execute INSN later. Do not
27102 define this macro if you do not need to adjust the scheduling
27103 priorities of insns. */
27105 static int
27106 rs6000_adjust_priority (rtx_insn *insn ATTRIBUTE_UNUSED, int priority)
27108 rtx load_mem, str_mem;
27109 /* On machines (like the 750) which have asymmetric integer units,
27110 where one integer unit can do multiply and divides and the other
27111 can't, reduce the priority of multiply/divide so it is scheduled
27112 before other integer operations. */
27114 #if 0
27115 if (! INSN_P (insn))
27116 return priority;
27118 if (GET_CODE (PATTERN (insn)) == USE)
27119 return priority;
27121 switch (rs6000_cpu_attr) {
27122 case CPU_PPC750:
27123 switch (get_attr_type (insn))
27125 default:
27126 break;
27128 case TYPE_MUL:
27129 case TYPE_DIV:
27130 fprintf (stderr, "priority was %#x (%d) before adjustment\n",
27131 priority, priority);
27132 if (priority >= 0 && priority < 0x01000000)
27133 priority >>= 3;
27134 break;
27137 #endif
27139 if (insn_must_be_first_in_group (insn)
27140 && reload_completed
27141 && current_sched_info->sched_max_insns_priority
27142 && rs6000_sched_restricted_insns_priority)
27145 /* Prioritize insns that can be dispatched only in the first
27146 dispatch slot. */
27147 if (rs6000_sched_restricted_insns_priority == 1)
27148 /* Attach highest priority to insn. This means that in
27149 haifa-sched.c:ready_sort(), dispatch-slot restriction considerations
27150 precede 'priority' (critical path) considerations. */
27151 return current_sched_info->sched_max_insns_priority;
27152 else if (rs6000_sched_restricted_insns_priority == 2)
27153 /* Increase priority of insn by a minimal amount. This means that in
27154 haifa-sched.c:ready_sort(), only 'priority' (critical path)
27155 considerations precede dispatch-slot restriction considerations. */
27156 return (priority + 1);
27159 if (rs6000_cpu == PROCESSOR_POWER6
27160 && ((load_store_pendulum == -2 && is_load_insn (insn, &load_mem))
27161 || (load_store_pendulum == 2 && is_store_insn (insn, &str_mem))))
27162 /* Attach highest priority to insn if the scheduler has just issued two
27163 stores and this instruction is a load, or two loads and this instruction
27164 is a store. Power6 wants loads and stores scheduled alternately
27165 when possible */
27166 return current_sched_info->sched_max_insns_priority;
27168 return priority;
27171 /* Return true if the instruction is nonpipelined on the Cell. */
27172 static bool
27173 is_nonpipeline_insn (rtx_insn *insn)
27175 enum attr_type type;
27176 if (!insn || !NONDEBUG_INSN_P (insn)
27177 || GET_CODE (PATTERN (insn)) == USE
27178 || GET_CODE (PATTERN (insn)) == CLOBBER)
27179 return false;
27181 type = get_attr_type (insn);
27182 if (type == TYPE_MUL
27183 || type == TYPE_DIV
27184 || type == TYPE_SDIV
27185 || type == TYPE_DDIV
27186 || type == TYPE_SSQRT
27187 || type == TYPE_DSQRT
27188 || type == TYPE_MFCR
27189 || type == TYPE_MFCRF
27190 || type == TYPE_MFJMPR)
27192 return true;
27194 return false;
27198 /* Return how many instructions the machine can issue per cycle. */
27200 static int
27201 rs6000_issue_rate (void)
27203 /* Unless scheduling for register pressure, use issue rate of 1 for
27204 first scheduling pass to decrease degradation. */
27205 if (!reload_completed && !flag_sched_pressure)
27206 return 1;
27208 switch (rs6000_cpu_attr) {
27209 case CPU_RS64A:
27210 case CPU_PPC601: /* ? */
27211 case CPU_PPC7450:
27212 return 3;
27213 case CPU_PPC440:
27214 case CPU_PPC603:
27215 case CPU_PPC750:
27216 case CPU_PPC7400:
27217 case CPU_PPC8540:
27218 case CPU_PPC8548:
27219 case CPU_CELL:
27220 case CPU_PPCE300C2:
27221 case CPU_PPCE300C3:
27222 case CPU_PPCE500MC:
27223 case CPU_PPCE500MC64:
27224 case CPU_PPCE5500:
27225 case CPU_PPCE6500:
27226 case CPU_TITAN:
27227 return 2;
27228 case CPU_PPC476:
27229 case CPU_PPC604:
27230 case CPU_PPC604E:
27231 case CPU_PPC620:
27232 case CPU_PPC630:
27233 return 4;
27234 case CPU_POWER4:
27235 case CPU_POWER5:
27236 case CPU_POWER6:
27237 case CPU_POWER7:
27238 return 5;
27239 case CPU_POWER8:
27240 return 7;
27241 default:
27242 return 1;
27246 /* Return how many instructions to look ahead for better insn
27247 scheduling. */
27249 static int
27250 rs6000_use_sched_lookahead (void)
27252 switch (rs6000_cpu_attr)
27254 case CPU_PPC8540:
27255 case CPU_PPC8548:
27256 return 4;
27258 case CPU_CELL:
27259 return (reload_completed ? 8 : 0);
27261 default:
27262 return 0;
27266 /* We are choosing insn from the ready queue. Return zero if INSN can be
27267 chosen. */
27268 static int
27269 rs6000_use_sched_lookahead_guard (rtx_insn *insn, int ready_index)
27271 if (ready_index == 0)
27272 return 0;
27274 if (rs6000_cpu_attr != CPU_CELL)
27275 return 0;
27277 gcc_assert (insn != NULL_RTX && INSN_P (insn));
27279 if (!reload_completed
27280 || is_nonpipeline_insn (insn)
27281 || is_microcoded_insn (insn))
27282 return 1;
27284 return 0;
27287 /* Determine if PAT refers to memory. If so, set MEM_REF to the MEM rtx
27288 and return true. */
27290 static bool
27291 find_mem_ref (rtx pat, rtx *mem_ref)
27293 const char * fmt;
27294 int i, j;
27296 /* stack_tie does not produce any real memory traffic. */
27297 if (tie_operand (pat, VOIDmode))
27298 return false;
27300 if (GET_CODE (pat) == MEM)
27302 *mem_ref = pat;
27303 return true;
27306 /* Recursively process the pattern. */
27307 fmt = GET_RTX_FORMAT (GET_CODE (pat));
27309 for (i = GET_RTX_LENGTH (GET_CODE (pat)) - 1; i >= 0; i--)
27311 if (fmt[i] == 'e')
27313 if (find_mem_ref (XEXP (pat, i), mem_ref))
27314 return true;
27316 else if (fmt[i] == 'E')
27317 for (j = XVECLEN (pat, i) - 1; j >= 0; j--)
27319 if (find_mem_ref (XVECEXP (pat, i, j), mem_ref))
27320 return true;
27324 return false;
27327 /* Determine if PAT is a PATTERN of a load insn. */
27329 static bool
27330 is_load_insn1 (rtx pat, rtx *load_mem)
27332 if (!pat || pat == NULL_RTX)
27333 return false;
27335 if (GET_CODE (pat) == SET)
27336 return find_mem_ref (SET_SRC (pat), load_mem);
27338 if (GET_CODE (pat) == PARALLEL)
27340 int i;
27342 for (i = 0; i < XVECLEN (pat, 0); i++)
27343 if (is_load_insn1 (XVECEXP (pat, 0, i), load_mem))
27344 return true;
27347 return false;
27350 /* Determine if INSN loads from memory. */
27352 static bool
27353 is_load_insn (rtx insn, rtx *load_mem)
27355 if (!insn || !INSN_P (insn))
27356 return false;
27358 if (CALL_P (insn))
27359 return false;
27361 return is_load_insn1 (PATTERN (insn), load_mem);
27364 /* Determine if PAT is a PATTERN of a store insn. */
27366 static bool
27367 is_store_insn1 (rtx pat, rtx *str_mem)
27369 if (!pat || pat == NULL_RTX)
27370 return false;
27372 if (GET_CODE (pat) == SET)
27373 return find_mem_ref (SET_DEST (pat), str_mem);
27375 if (GET_CODE (pat) == PARALLEL)
27377 int i;
27379 for (i = 0; i < XVECLEN (pat, 0); i++)
27380 if (is_store_insn1 (XVECEXP (pat, 0, i), str_mem))
27381 return true;
27384 return false;
27387 /* Determine if INSN stores to memory. */
27389 static bool
27390 is_store_insn (rtx insn, rtx *str_mem)
27392 if (!insn || !INSN_P (insn))
27393 return false;
27395 return is_store_insn1 (PATTERN (insn), str_mem);
27398 /* Returns whether the dependence between INSN and NEXT is considered
27399 costly by the given target. */
27401 static bool
27402 rs6000_is_costly_dependence (dep_t dep, int cost, int distance)
27404 rtx insn;
27405 rtx next;
27406 rtx load_mem, str_mem;
27408 /* If the flag is not enabled - no dependence is considered costly;
27409 allow all dependent insns in the same group.
27410 This is the most aggressive option. */
27411 if (rs6000_sched_costly_dep == no_dep_costly)
27412 return false;
27414 /* If the flag is set to 1 - a dependence is always considered costly;
27415 do not allow dependent instructions in the same group.
27416 This is the most conservative option. */
27417 if (rs6000_sched_costly_dep == all_deps_costly)
27418 return true;
27420 insn = DEP_PRO (dep);
27421 next = DEP_CON (dep);
27423 if (rs6000_sched_costly_dep == store_to_load_dep_costly
27424 && is_load_insn (next, &load_mem)
27425 && is_store_insn (insn, &str_mem))
27426 /* Prevent load after store in the same group. */
27427 return true;
27429 if (rs6000_sched_costly_dep == true_store_to_load_dep_costly
27430 && is_load_insn (next, &load_mem)
27431 && is_store_insn (insn, &str_mem)
27432 && DEP_TYPE (dep) == REG_DEP_TRUE
27433 && mem_locations_overlap(str_mem, load_mem))
27434 /* Prevent load after store in the same group if it is a true
27435 dependence. */
27436 return true;
27438 /* The flag is set to X; dependences with latency >= X are considered costly,
27439 and will not be scheduled in the same group. */
27440 if (rs6000_sched_costly_dep <= max_dep_latency
27441 && ((cost - distance) >= (int)rs6000_sched_costly_dep))
27442 return true;
27444 return false;
27447 /* Return the next insn after INSN that is found before TAIL is reached,
27448 skipping any "non-active" insns - insns that will not actually occupy
27449 an issue slot. Return NULL_RTX if such an insn is not found. */
27451 static rtx_insn *
27452 get_next_active_insn (rtx_insn *insn, rtx_insn *tail)
27454 if (insn == NULL_RTX || insn == tail)
27455 return NULL;
27457 while (1)
27459 insn = NEXT_INSN (insn);
27460 if (insn == NULL_RTX || insn == tail)
27461 return NULL;
27463 if (CALL_P (insn)
27464 || JUMP_P (insn) || JUMP_TABLE_DATA_P (insn)
27465 || (NONJUMP_INSN_P (insn)
27466 && GET_CODE (PATTERN (insn)) != USE
27467 && GET_CODE (PATTERN (insn)) != CLOBBER
27468 && INSN_CODE (insn) != CODE_FOR_stack_tie))
27469 break;
27471 return insn;
27474 /* We are about to begin issuing insns for this clock cycle. */
27476 static int
27477 rs6000_sched_reorder (FILE *dump ATTRIBUTE_UNUSED, int sched_verbose,
27478 rtx_insn **ready ATTRIBUTE_UNUSED,
27479 int *pn_ready ATTRIBUTE_UNUSED,
27480 int clock_var ATTRIBUTE_UNUSED)
27482 int n_ready = *pn_ready;
27484 if (sched_verbose)
27485 fprintf (dump, "// rs6000_sched_reorder :\n");
27487 /* Reorder the ready list, if the second to last ready insn
27488 is a nonepipeline insn. */
27489 if (rs6000_cpu_attr == CPU_CELL && n_ready > 1)
27491 if (is_nonpipeline_insn (ready[n_ready - 1])
27492 && (recog_memoized (ready[n_ready - 2]) > 0))
27493 /* Simply swap first two insns. */
27495 rtx_insn *tmp = ready[n_ready - 1];
27496 ready[n_ready - 1] = ready[n_ready - 2];
27497 ready[n_ready - 2] = tmp;
27501 if (rs6000_cpu == PROCESSOR_POWER6)
27502 load_store_pendulum = 0;
27504 return rs6000_issue_rate ();
27507 /* Like rs6000_sched_reorder, but called after issuing each insn. */
27509 static int
27510 rs6000_sched_reorder2 (FILE *dump, int sched_verbose, rtx_insn **ready,
27511 int *pn_ready, int clock_var ATTRIBUTE_UNUSED)
27513 if (sched_verbose)
27514 fprintf (dump, "// rs6000_sched_reorder2 :\n");
27516 /* For Power6, we need to handle some special cases to try and keep the
27517 store queue from overflowing and triggering expensive flushes.
27519 This code monitors how load and store instructions are being issued
27520 and skews the ready list one way or the other to increase the likelihood
27521 that a desired instruction is issued at the proper time.
27523 A couple of things are done. First, we maintain a "load_store_pendulum"
27524 to track the current state of load/store issue.
27526 - If the pendulum is at zero, then no loads or stores have been
27527 issued in the current cycle so we do nothing.
27529 - If the pendulum is 1, then a single load has been issued in this
27530 cycle and we attempt to locate another load in the ready list to
27531 issue with it.
27533 - If the pendulum is -2, then two stores have already been
27534 issued in this cycle, so we increase the priority of the first load
27535 in the ready list to increase it's likelihood of being chosen first
27536 in the next cycle.
27538 - If the pendulum is -1, then a single store has been issued in this
27539 cycle and we attempt to locate another store in the ready list to
27540 issue with it, preferring a store to an adjacent memory location to
27541 facilitate store pairing in the store queue.
27543 - If the pendulum is 2, then two loads have already been
27544 issued in this cycle, so we increase the priority of the first store
27545 in the ready list to increase it's likelihood of being chosen first
27546 in the next cycle.
27548 - If the pendulum < -2 or > 2, then do nothing.
27550 Note: This code covers the most common scenarios. There exist non
27551 load/store instructions which make use of the LSU and which
27552 would need to be accounted for to strictly model the behavior
27553 of the machine. Those instructions are currently unaccounted
27554 for to help minimize compile time overhead of this code.
27556 if (rs6000_cpu == PROCESSOR_POWER6 && last_scheduled_insn)
27558 int pos;
27559 int i;
27560 rtx_insn *tmp;
27561 rtx load_mem, str_mem;
27563 if (is_store_insn (last_scheduled_insn, &str_mem))
27564 /* Issuing a store, swing the load_store_pendulum to the left */
27565 load_store_pendulum--;
27566 else if (is_load_insn (last_scheduled_insn, &load_mem))
27567 /* Issuing a load, swing the load_store_pendulum to the right */
27568 load_store_pendulum++;
27569 else
27570 return cached_can_issue_more;
27572 /* If the pendulum is balanced, or there is only one instruction on
27573 the ready list, then all is well, so return. */
27574 if ((load_store_pendulum == 0) || (*pn_ready <= 1))
27575 return cached_can_issue_more;
27577 if (load_store_pendulum == 1)
27579 /* A load has been issued in this cycle. Scan the ready list
27580 for another load to issue with it */
27581 pos = *pn_ready-1;
27583 while (pos >= 0)
27585 if (is_load_insn (ready[pos], &load_mem))
27587 /* Found a load. Move it to the head of the ready list,
27588 and adjust it's priority so that it is more likely to
27589 stay there */
27590 tmp = ready[pos];
27591 for (i=pos; i<*pn_ready-1; i++)
27592 ready[i] = ready[i + 1];
27593 ready[*pn_ready-1] = tmp;
27595 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27596 INSN_PRIORITY (tmp)++;
27597 break;
27599 pos--;
27602 else if (load_store_pendulum == -2)
27604 /* Two stores have been issued in this cycle. Increase the
27605 priority of the first load in the ready list to favor it for
27606 issuing in the next cycle. */
27607 pos = *pn_ready-1;
27609 while (pos >= 0)
27611 if (is_load_insn (ready[pos], &load_mem)
27612 && !sel_sched_p ()
27613 && INSN_PRIORITY_KNOWN (ready[pos]))
27615 INSN_PRIORITY (ready[pos])++;
27617 /* Adjust the pendulum to account for the fact that a load
27618 was found and increased in priority. This is to prevent
27619 increasing the priority of multiple loads */
27620 load_store_pendulum--;
27622 break;
27624 pos--;
27627 else if (load_store_pendulum == -1)
27629 /* A store has been issued in this cycle. Scan the ready list for
27630 another store to issue with it, preferring a store to an adjacent
27631 memory location */
27632 int first_store_pos = -1;
27634 pos = *pn_ready-1;
27636 while (pos >= 0)
27638 if (is_store_insn (ready[pos], &str_mem))
27640 rtx str_mem2;
27641 /* Maintain the index of the first store found on the
27642 list */
27643 if (first_store_pos == -1)
27644 first_store_pos = pos;
27646 if (is_store_insn (last_scheduled_insn, &str_mem2)
27647 && adjacent_mem_locations (str_mem, str_mem2))
27649 /* Found an adjacent store. Move it to the head of the
27650 ready list, and adjust it's priority so that it is
27651 more likely to stay there */
27652 tmp = ready[pos];
27653 for (i=pos; i<*pn_ready-1; i++)
27654 ready[i] = ready[i + 1];
27655 ready[*pn_ready-1] = tmp;
27657 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27658 INSN_PRIORITY (tmp)++;
27660 first_store_pos = -1;
27662 break;
27665 pos--;
27668 if (first_store_pos >= 0)
27670 /* An adjacent store wasn't found, but a non-adjacent store was,
27671 so move the non-adjacent store to the front of the ready
27672 list, and adjust its priority so that it is more likely to
27673 stay there. */
27674 tmp = ready[first_store_pos];
27675 for (i=first_store_pos; i<*pn_ready-1; i++)
27676 ready[i] = ready[i + 1];
27677 ready[*pn_ready-1] = tmp;
27678 if (!sel_sched_p () && INSN_PRIORITY_KNOWN (tmp))
27679 INSN_PRIORITY (tmp)++;
27682 else if (load_store_pendulum == 2)
27684 /* Two loads have been issued in this cycle. Increase the priority
27685 of the first store in the ready list to favor it for issuing in
27686 the next cycle. */
27687 pos = *pn_ready-1;
27689 while (pos >= 0)
27691 if (is_store_insn (ready[pos], &str_mem)
27692 && !sel_sched_p ()
27693 && INSN_PRIORITY_KNOWN (ready[pos]))
27695 INSN_PRIORITY (ready[pos])++;
27697 /* Adjust the pendulum to account for the fact that a store
27698 was found and increased in priority. This is to prevent
27699 increasing the priority of multiple stores */
27700 load_store_pendulum++;
27702 break;
27704 pos--;
27709 return cached_can_issue_more;
27712 /* Return whether the presence of INSN causes a dispatch group termination
27713 of group WHICH_GROUP.
27715 If WHICH_GROUP == current_group, this function will return true if INSN
27716 causes the termination of the current group (i.e, the dispatch group to
27717 which INSN belongs). This means that INSN will be the last insn in the
27718 group it belongs to.
27720 If WHICH_GROUP == previous_group, this function will return true if INSN
27721 causes the termination of the previous group (i.e, the dispatch group that
27722 precedes the group to which INSN belongs). This means that INSN will be
27723 the first insn in the group it belongs to). */
27725 static bool
27726 insn_terminates_group_p (rtx_insn *insn, enum group_termination which_group)
27728 bool first, last;
27730 if (! insn)
27731 return false;
27733 first = insn_must_be_first_in_group (insn);
27734 last = insn_must_be_last_in_group (insn);
27736 if (first && last)
27737 return true;
27739 if (which_group == current_group)
27740 return last;
27741 else if (which_group == previous_group)
27742 return first;
27744 return false;
27748 static bool
27749 insn_must_be_first_in_group (rtx_insn *insn)
27751 enum attr_type type;
27753 if (!insn
27754 || NOTE_P (insn)
27755 || DEBUG_INSN_P (insn)
27756 || GET_CODE (PATTERN (insn)) == USE
27757 || GET_CODE (PATTERN (insn)) == CLOBBER)
27758 return false;
27760 switch (rs6000_cpu)
27762 case PROCESSOR_POWER5:
27763 if (is_cracked_insn (insn))
27764 return true;
27765 case PROCESSOR_POWER4:
27766 if (is_microcoded_insn (insn))
27767 return true;
27769 if (!rs6000_sched_groups)
27770 return false;
27772 type = get_attr_type (insn);
27774 switch (type)
27776 case TYPE_MFCR:
27777 case TYPE_MFCRF:
27778 case TYPE_MTCR:
27779 case TYPE_DELAYED_CR:
27780 case TYPE_CR_LOGICAL:
27781 case TYPE_MTJMPR:
27782 case TYPE_MFJMPR:
27783 case TYPE_DIV:
27784 case TYPE_LOAD_L:
27785 case TYPE_STORE_C:
27786 case TYPE_ISYNC:
27787 case TYPE_SYNC:
27788 return true;
27789 default:
27790 break;
27792 break;
27793 case PROCESSOR_POWER6:
27794 type = get_attr_type (insn);
27796 switch (type)
27798 case TYPE_EXTS:
27799 case TYPE_CNTLZ:
27800 case TYPE_TRAP:
27801 case TYPE_MUL:
27802 case TYPE_INSERT:
27803 case TYPE_FPCOMPARE:
27804 case TYPE_MFCR:
27805 case TYPE_MTCR:
27806 case TYPE_MFJMPR:
27807 case TYPE_MTJMPR:
27808 case TYPE_ISYNC:
27809 case TYPE_SYNC:
27810 case TYPE_LOAD_L:
27811 case TYPE_STORE_C:
27812 return true;
27813 case TYPE_SHIFT:
27814 if (get_attr_dot (insn) == DOT_NO
27815 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27816 return true;
27817 else
27818 break;
27819 case TYPE_DIV:
27820 if (get_attr_size (insn) == SIZE_32)
27821 return true;
27822 else
27823 break;
27824 case TYPE_LOAD:
27825 case TYPE_STORE:
27826 case TYPE_FPLOAD:
27827 case TYPE_FPSTORE:
27828 if (get_attr_update (insn) == UPDATE_YES)
27829 return true;
27830 else
27831 break;
27832 default:
27833 break;
27835 break;
27836 case PROCESSOR_POWER7:
27837 type = get_attr_type (insn);
27839 switch (type)
27841 case TYPE_CR_LOGICAL:
27842 case TYPE_MFCR:
27843 case TYPE_MFCRF:
27844 case TYPE_MTCR:
27845 case TYPE_DIV:
27846 case TYPE_COMPARE:
27847 case TYPE_ISYNC:
27848 case TYPE_LOAD_L:
27849 case TYPE_STORE_C:
27850 case TYPE_MFJMPR:
27851 case TYPE_MTJMPR:
27852 return true;
27853 case TYPE_MUL:
27854 case TYPE_SHIFT:
27855 case TYPE_EXTS:
27856 if (get_attr_dot (insn) == DOT_YES)
27857 return true;
27858 else
27859 break;
27860 case TYPE_LOAD:
27861 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27862 || get_attr_update (insn) == UPDATE_YES)
27863 return true;
27864 else
27865 break;
27866 case TYPE_STORE:
27867 case TYPE_FPLOAD:
27868 case TYPE_FPSTORE:
27869 if (get_attr_update (insn) == UPDATE_YES)
27870 return true;
27871 else
27872 break;
27873 default:
27874 break;
27876 break;
27877 case PROCESSOR_POWER8:
27878 type = get_attr_type (insn);
27880 switch (type)
27882 case TYPE_CR_LOGICAL:
27883 case TYPE_DELAYED_CR:
27884 case TYPE_MFCR:
27885 case TYPE_MFCRF:
27886 case TYPE_MTCR:
27887 case TYPE_COMPARE:
27888 case TYPE_SYNC:
27889 case TYPE_ISYNC:
27890 case TYPE_LOAD_L:
27891 case TYPE_STORE_C:
27892 case TYPE_VECSTORE:
27893 case TYPE_MFJMPR:
27894 case TYPE_MTJMPR:
27895 return true;
27896 case TYPE_SHIFT:
27897 case TYPE_EXTS:
27898 case TYPE_MUL:
27899 if (get_attr_dot (insn) == DOT_YES)
27900 return true;
27901 else
27902 break;
27903 case TYPE_LOAD:
27904 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27905 || get_attr_update (insn) == UPDATE_YES)
27906 return true;
27907 else
27908 break;
27909 case TYPE_STORE:
27910 if (get_attr_update (insn) == UPDATE_YES
27911 && get_attr_indexed (insn) == INDEXED_YES)
27912 return true;
27913 else
27914 break;
27915 default:
27916 break;
27918 break;
27919 default:
27920 break;
27923 return false;
27926 static bool
27927 insn_must_be_last_in_group (rtx_insn *insn)
27929 enum attr_type type;
27931 if (!insn
27932 || NOTE_P (insn)
27933 || DEBUG_INSN_P (insn)
27934 || GET_CODE (PATTERN (insn)) == USE
27935 || GET_CODE (PATTERN (insn)) == CLOBBER)
27936 return false;
27938 switch (rs6000_cpu) {
27939 case PROCESSOR_POWER4:
27940 case PROCESSOR_POWER5:
27941 if (is_microcoded_insn (insn))
27942 return true;
27944 if (is_branch_slot_insn (insn))
27945 return true;
27947 break;
27948 case PROCESSOR_POWER6:
27949 type = get_attr_type (insn);
27951 switch (type)
27953 case TYPE_EXTS:
27954 case TYPE_CNTLZ:
27955 case TYPE_TRAP:
27956 case TYPE_MUL:
27957 case TYPE_FPCOMPARE:
27958 case TYPE_MFCR:
27959 case TYPE_MTCR:
27960 case TYPE_MFJMPR:
27961 case TYPE_MTJMPR:
27962 case TYPE_ISYNC:
27963 case TYPE_SYNC:
27964 case TYPE_LOAD_L:
27965 case TYPE_STORE_C:
27966 return true;
27967 case TYPE_SHIFT:
27968 if (get_attr_dot (insn) == DOT_NO
27969 || get_attr_var_shift (insn) == VAR_SHIFT_NO)
27970 return true;
27971 else
27972 break;
27973 case TYPE_DIV:
27974 if (get_attr_size (insn) == SIZE_32)
27975 return true;
27976 else
27977 break;
27978 default:
27979 break;
27981 break;
27982 case PROCESSOR_POWER7:
27983 type = get_attr_type (insn);
27985 switch (type)
27987 case TYPE_ISYNC:
27988 case TYPE_SYNC:
27989 case TYPE_LOAD_L:
27990 case TYPE_STORE_C:
27991 return true;
27992 case TYPE_LOAD:
27993 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
27994 && get_attr_update (insn) == UPDATE_YES)
27995 return true;
27996 else
27997 break;
27998 case TYPE_STORE:
27999 if (get_attr_update (insn) == UPDATE_YES
28000 && get_attr_indexed (insn) == INDEXED_YES)
28001 return true;
28002 else
28003 break;
28004 default:
28005 break;
28007 break;
28008 case PROCESSOR_POWER8:
28009 type = get_attr_type (insn);
28011 switch (type)
28013 case TYPE_MFCR:
28014 case TYPE_MTCR:
28015 case TYPE_ISYNC:
28016 case TYPE_SYNC:
28017 case TYPE_LOAD_L:
28018 case TYPE_STORE_C:
28019 return true;
28020 case TYPE_LOAD:
28021 if (get_attr_sign_extend (insn) == SIGN_EXTEND_YES
28022 && get_attr_update (insn) == UPDATE_YES)
28023 return true;
28024 else
28025 break;
28026 case TYPE_STORE:
28027 if (get_attr_update (insn) == UPDATE_YES
28028 && get_attr_indexed (insn) == INDEXED_YES)
28029 return true;
28030 else
28031 break;
28032 default:
28033 break;
28035 break;
28036 default:
28037 break;
28040 return false;
28043 /* Return true if it is recommended to keep NEXT_INSN "far" (in a separate
28044 dispatch group) from the insns in GROUP_INSNS. Return false otherwise. */
28046 static bool
28047 is_costly_group (rtx *group_insns, rtx next_insn)
28049 int i;
28050 int issue_rate = rs6000_issue_rate ();
28052 for (i = 0; i < issue_rate; i++)
28054 sd_iterator_def sd_it;
28055 dep_t dep;
28056 rtx insn = group_insns[i];
28058 if (!insn)
28059 continue;
28061 FOR_EACH_DEP (insn, SD_LIST_RES_FORW, sd_it, dep)
28063 rtx next = DEP_CON (dep);
28065 if (next == next_insn
28066 && rs6000_is_costly_dependence (dep, dep_cost (dep), 0))
28067 return true;
28071 return false;
28074 /* Utility of the function redefine_groups.
28075 Check if it is too costly to schedule NEXT_INSN together with GROUP_INSNS
28076 in the same dispatch group. If so, insert nops before NEXT_INSN, in order
28077 to keep it "far" (in a separate group) from GROUP_INSNS, following
28078 one of the following schemes, depending on the value of the flag
28079 -minsert_sched_nops = X:
28080 (1) X == sched_finish_regroup_exact: insert exactly as many nops as needed
28081 in order to force NEXT_INSN into a separate group.
28082 (2) X < sched_finish_regroup_exact: insert exactly X nops.
28083 GROUP_END, CAN_ISSUE_MORE and GROUP_COUNT record the state after nop
28084 insertion (has a group just ended, how many vacant issue slots remain in the
28085 last group, and how many dispatch groups were encountered so far). */
28087 static int
28088 force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
28089 rtx_insn *next_insn, bool *group_end, int can_issue_more,
28090 int *group_count)
28092 rtx nop;
28093 bool force;
28094 int issue_rate = rs6000_issue_rate ();
28095 bool end = *group_end;
28096 int i;
28098 if (next_insn == NULL_RTX || DEBUG_INSN_P (next_insn))
28099 return can_issue_more;
28101 if (rs6000_sched_insert_nops > sched_finish_regroup_exact)
28102 return can_issue_more;
28104 force = is_costly_group (group_insns, next_insn);
28105 if (!force)
28106 return can_issue_more;
28108 if (sched_verbose > 6)
28109 fprintf (dump,"force: group count = %d, can_issue_more = %d\n",
28110 *group_count ,can_issue_more);
28112 if (rs6000_sched_insert_nops == sched_finish_regroup_exact)
28114 if (*group_end)
28115 can_issue_more = 0;
28117 /* Since only a branch can be issued in the last issue_slot, it is
28118 sufficient to insert 'can_issue_more - 1' nops if next_insn is not
28119 a branch. If next_insn is a branch, we insert 'can_issue_more' nops;
28120 in this case the last nop will start a new group and the branch
28121 will be forced to the new group. */
28122 if (can_issue_more && !is_branch_slot_insn (next_insn))
28123 can_issue_more--;
28125 /* Do we have a special group ending nop? */
28126 if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
28127 || rs6000_cpu_attr == CPU_POWER8)
28129 nop = gen_group_ending_nop ();
28130 emit_insn_before (nop, next_insn);
28131 can_issue_more = 0;
28133 else
28134 while (can_issue_more > 0)
28136 nop = gen_nop ();
28137 emit_insn_before (nop, next_insn);
28138 can_issue_more--;
28141 *group_end = true;
28142 return 0;
28145 if (rs6000_sched_insert_nops < sched_finish_regroup_exact)
28147 int n_nops = rs6000_sched_insert_nops;
28149 /* Nops can't be issued from the branch slot, so the effective
28150 issue_rate for nops is 'issue_rate - 1'. */
28151 if (can_issue_more == 0)
28152 can_issue_more = issue_rate;
28153 can_issue_more--;
28154 if (can_issue_more == 0)
28156 can_issue_more = issue_rate - 1;
28157 (*group_count)++;
28158 end = true;
28159 for (i = 0; i < issue_rate; i++)
28161 group_insns[i] = 0;
28165 while (n_nops > 0)
28167 nop = gen_nop ();
28168 emit_insn_before (nop, next_insn);
28169 if (can_issue_more == issue_rate - 1) /* new group begins */
28170 end = false;
28171 can_issue_more--;
28172 if (can_issue_more == 0)
28174 can_issue_more = issue_rate - 1;
28175 (*group_count)++;
28176 end = true;
28177 for (i = 0; i < issue_rate; i++)
28179 group_insns[i] = 0;
28182 n_nops--;
28185 /* Scale back relative to 'issue_rate' (instead of 'issue_rate - 1'). */
28186 can_issue_more++;
28188 /* Is next_insn going to start a new group? */
28189 *group_end
28190 = (end
28191 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28192 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28193 || (can_issue_more < issue_rate &&
28194 insn_terminates_group_p (next_insn, previous_group)));
28195 if (*group_end && end)
28196 (*group_count)--;
28198 if (sched_verbose > 6)
28199 fprintf (dump, "done force: group count = %d, can_issue_more = %d\n",
28200 *group_count, can_issue_more);
28201 return can_issue_more;
28204 return can_issue_more;
28207 /* This function tries to synch the dispatch groups that the compiler "sees"
28208 with the dispatch groups that the processor dispatcher is expected to
28209 form in practice. It tries to achieve this synchronization by forcing the
28210 estimated processor grouping on the compiler (as opposed to the function
28211 'pad_goups' which tries to force the scheduler's grouping on the processor).
28213 The function scans the insn sequence between PREV_HEAD_INSN and TAIL and
28214 examines the (estimated) dispatch groups that will be formed by the processor
28215 dispatcher. It marks these group boundaries to reflect the estimated
28216 processor grouping, overriding the grouping that the scheduler had marked.
28217 Depending on the value of the flag '-minsert-sched-nops' this function can
28218 force certain insns into separate groups or force a certain distance between
28219 them by inserting nops, for example, if there exists a "costly dependence"
28220 between the insns.
28222 The function estimates the group boundaries that the processor will form as
28223 follows: It keeps track of how many vacant issue slots are available after
28224 each insn. A subsequent insn will start a new group if one of the following
28225 4 cases applies:
28226 - no more vacant issue slots remain in the current dispatch group.
28227 - only the last issue slot, which is the branch slot, is vacant, but the next
28228 insn is not a branch.
28229 - only the last 2 or less issue slots, including the branch slot, are vacant,
28230 which means that a cracked insn (which occupies two issue slots) can't be
28231 issued in this group.
28232 - less than 'issue_rate' slots are vacant, and the next insn always needs to
28233 start a new group. */
28235 static int
28236 redefine_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28237 rtx_insn *tail)
28239 rtx_insn *insn, *next_insn;
28240 int issue_rate;
28241 int can_issue_more;
28242 int slot, i;
28243 bool group_end;
28244 int group_count = 0;
28245 rtx *group_insns;
28247 /* Initialize. */
28248 issue_rate = rs6000_issue_rate ();
28249 group_insns = XALLOCAVEC (rtx, issue_rate);
28250 for (i = 0; i < issue_rate; i++)
28252 group_insns[i] = 0;
28254 can_issue_more = issue_rate;
28255 slot = 0;
28256 insn = get_next_active_insn (prev_head_insn, tail);
28257 group_end = false;
28259 while (insn != NULL_RTX)
28261 slot = (issue_rate - can_issue_more);
28262 group_insns[slot] = insn;
28263 can_issue_more =
28264 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28265 if (insn_terminates_group_p (insn, current_group))
28266 can_issue_more = 0;
28268 next_insn = get_next_active_insn (insn, tail);
28269 if (next_insn == NULL_RTX)
28270 return group_count + 1;
28272 /* Is next_insn going to start a new group? */
28273 group_end
28274 = (can_issue_more == 0
28275 || (can_issue_more == 1 && !is_branch_slot_insn (next_insn))
28276 || (can_issue_more <= 2 && is_cracked_insn (next_insn))
28277 || (can_issue_more < issue_rate &&
28278 insn_terminates_group_p (next_insn, previous_group)));
28280 can_issue_more = force_new_group (sched_verbose, dump, group_insns,
28281 next_insn, &group_end, can_issue_more,
28282 &group_count);
28284 if (group_end)
28286 group_count++;
28287 can_issue_more = 0;
28288 for (i = 0; i < issue_rate; i++)
28290 group_insns[i] = 0;
28294 if (GET_MODE (next_insn) == TImode && can_issue_more)
28295 PUT_MODE (next_insn, VOIDmode);
28296 else if (!can_issue_more && GET_MODE (next_insn) != TImode)
28297 PUT_MODE (next_insn, TImode);
28299 insn = next_insn;
28300 if (can_issue_more == 0)
28301 can_issue_more = issue_rate;
28302 } /* while */
28304 return group_count;
28307 /* Scan the insn sequence between PREV_HEAD_INSN and TAIL and examine the
28308 dispatch group boundaries that the scheduler had marked. Pad with nops
28309 any dispatch groups which have vacant issue slots, in order to force the
28310 scheduler's grouping on the processor dispatcher. The function
28311 returns the number of dispatch groups found. */
28313 static int
28314 pad_groups (FILE *dump, int sched_verbose, rtx_insn *prev_head_insn,
28315 rtx_insn *tail)
28317 rtx_insn *insn, *next_insn;
28318 rtx nop;
28319 int issue_rate;
28320 int can_issue_more;
28321 int group_end;
28322 int group_count = 0;
28324 /* Initialize issue_rate. */
28325 issue_rate = rs6000_issue_rate ();
28326 can_issue_more = issue_rate;
28328 insn = get_next_active_insn (prev_head_insn, tail);
28329 next_insn = get_next_active_insn (insn, tail);
28331 while (insn != NULL_RTX)
28333 can_issue_more =
28334 rs6000_variable_issue (dump, sched_verbose, insn, can_issue_more);
28336 group_end = (next_insn == NULL_RTX || GET_MODE (next_insn) == TImode);
28338 if (next_insn == NULL_RTX)
28339 break;
28341 if (group_end)
28343 /* If the scheduler had marked group termination at this location
28344 (between insn and next_insn), and neither insn nor next_insn will
28345 force group termination, pad the group with nops to force group
28346 termination. */
28347 if (can_issue_more
28348 && (rs6000_sched_insert_nops == sched_finish_pad_groups)
28349 && !insn_terminates_group_p (insn, current_group)
28350 && !insn_terminates_group_p (next_insn, previous_group))
28352 if (!is_branch_slot_insn (next_insn))
28353 can_issue_more--;
28355 while (can_issue_more)
28357 nop = gen_nop ();
28358 emit_insn_before (nop, next_insn);
28359 can_issue_more--;
28363 can_issue_more = issue_rate;
28364 group_count++;
28367 insn = next_insn;
28368 next_insn = get_next_active_insn (insn, tail);
28371 return group_count;
28374 /* We're beginning a new block. Initialize data structures as necessary. */
28376 static void
28377 rs6000_sched_init (FILE *dump ATTRIBUTE_UNUSED,
28378 int sched_verbose ATTRIBUTE_UNUSED,
28379 int max_ready ATTRIBUTE_UNUSED)
28381 last_scheduled_insn = NULL_RTX;
28382 load_store_pendulum = 0;
28385 /* The following function is called at the end of scheduling BB.
28386 After reload, it inserts nops at insn group bundling. */
28388 static void
28389 rs6000_sched_finish (FILE *dump, int sched_verbose)
28391 int n_groups;
28393 if (sched_verbose)
28394 fprintf (dump, "=== Finishing schedule.\n");
28396 if (reload_completed && rs6000_sched_groups)
28398 /* Do not run sched_finish hook when selective scheduling enabled. */
28399 if (sel_sched_p ())
28400 return;
28402 if (rs6000_sched_insert_nops == sched_finish_none)
28403 return;
28405 if (rs6000_sched_insert_nops == sched_finish_pad_groups)
28406 n_groups = pad_groups (dump, sched_verbose,
28407 current_sched_info->prev_head,
28408 current_sched_info->next_tail);
28409 else
28410 n_groups = redefine_groups (dump, sched_verbose,
28411 current_sched_info->prev_head,
28412 current_sched_info->next_tail);
28414 if (sched_verbose >= 6)
28416 fprintf (dump, "ngroups = %d\n", n_groups);
28417 print_rtl (dump, current_sched_info->prev_head);
28418 fprintf (dump, "Done finish_sched\n");
28423 struct _rs6000_sched_context
28425 short cached_can_issue_more;
28426 rtx last_scheduled_insn;
28427 int load_store_pendulum;
28430 typedef struct _rs6000_sched_context rs6000_sched_context_def;
28431 typedef rs6000_sched_context_def *rs6000_sched_context_t;
28433 /* Allocate store for new scheduling context. */
28434 static void *
28435 rs6000_alloc_sched_context (void)
28437 return xmalloc (sizeof (rs6000_sched_context_def));
28440 /* If CLEAN_P is true then initializes _SC with clean data,
28441 and from the global context otherwise. */
28442 static void
28443 rs6000_init_sched_context (void *_sc, bool clean_p)
28445 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28447 if (clean_p)
28449 sc->cached_can_issue_more = 0;
28450 sc->last_scheduled_insn = NULL_RTX;
28451 sc->load_store_pendulum = 0;
28453 else
28455 sc->cached_can_issue_more = cached_can_issue_more;
28456 sc->last_scheduled_insn = last_scheduled_insn;
28457 sc->load_store_pendulum = load_store_pendulum;
28461 /* Sets the global scheduling context to the one pointed to by _SC. */
28462 static void
28463 rs6000_set_sched_context (void *_sc)
28465 rs6000_sched_context_t sc = (rs6000_sched_context_t) _sc;
28467 gcc_assert (sc != NULL);
28469 cached_can_issue_more = sc->cached_can_issue_more;
28470 last_scheduled_insn = sc->last_scheduled_insn;
28471 load_store_pendulum = sc->load_store_pendulum;
28474 /* Free _SC. */
28475 static void
28476 rs6000_free_sched_context (void *_sc)
28478 gcc_assert (_sc != NULL);
28480 free (_sc);
28484 /* Length in units of the trampoline for entering a nested function. */
28487 rs6000_trampoline_size (void)
28489 int ret = 0;
28491 switch (DEFAULT_ABI)
28493 default:
28494 gcc_unreachable ();
28496 case ABI_AIX:
28497 ret = (TARGET_32BIT) ? 12 : 24;
28498 break;
28500 case ABI_ELFv2:
28501 gcc_assert (!TARGET_32BIT);
28502 ret = 32;
28503 break;
28505 case ABI_DARWIN:
28506 case ABI_V4:
28507 ret = (TARGET_32BIT) ? 40 : 48;
28508 break;
28511 return ret;
28514 /* Emit RTL insns to initialize the variable parts of a trampoline.
28515 FNADDR is an RTX for the address of the function's pure code.
28516 CXT is an RTX for the static chain value for the function. */
28518 static void
28519 rs6000_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
28521 int regsize = (TARGET_32BIT) ? 4 : 8;
28522 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
28523 rtx ctx_reg = force_reg (Pmode, cxt);
28524 rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
28526 switch (DEFAULT_ABI)
28528 default:
28529 gcc_unreachable ();
28531 /* Under AIX, just build the 3 word function descriptor */
28532 case ABI_AIX:
28534 rtx fnmem, fn_reg, toc_reg;
28536 if (!TARGET_POINTERS_TO_NESTED_FUNCTIONS)
28537 error ("You cannot take the address of a nested function if you use "
28538 "the -mno-pointers-to-nested-functions option.");
28540 fnmem = gen_const_mem (Pmode, force_reg (Pmode, fnaddr));
28541 fn_reg = gen_reg_rtx (Pmode);
28542 toc_reg = gen_reg_rtx (Pmode);
28544 /* Macro to shorten the code expansions below. */
28545 # define MEM_PLUS(MEM, OFFSET) adjust_address (MEM, Pmode, OFFSET)
28547 m_tramp = replace_equiv_address (m_tramp, addr);
28549 emit_move_insn (fn_reg, MEM_PLUS (fnmem, 0));
28550 emit_move_insn (toc_reg, MEM_PLUS (fnmem, regsize));
28551 emit_move_insn (MEM_PLUS (m_tramp, 0), fn_reg);
28552 emit_move_insn (MEM_PLUS (m_tramp, regsize), toc_reg);
28553 emit_move_insn (MEM_PLUS (m_tramp, 2*regsize), ctx_reg);
28555 # undef MEM_PLUS
28557 break;
28559 /* Under V.4/eabi/darwin, __trampoline_setup does the real work. */
28560 case ABI_ELFv2:
28561 case ABI_DARWIN:
28562 case ABI_V4:
28563 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
28564 LCT_NORMAL, VOIDmode, 4,
28565 addr, Pmode,
28566 GEN_INT (rs6000_trampoline_size ()), SImode,
28567 fnaddr, Pmode,
28568 ctx_reg, Pmode);
28569 break;
28574 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
28575 identifier as an argument, so the front end shouldn't look it up. */
28577 static bool
28578 rs6000_attribute_takes_identifier_p (const_tree attr_id)
28580 return is_attribute_p ("altivec", attr_id);
28583 /* Handle the "altivec" attribute. The attribute may have
28584 arguments as follows:
28586 __attribute__((altivec(vector__)))
28587 __attribute__((altivec(pixel__))) (always followed by 'unsigned short')
28588 __attribute__((altivec(bool__))) (always followed by 'unsigned')
28590 and may appear more than once (e.g., 'vector bool char') in a
28591 given declaration. */
28593 static tree
28594 rs6000_handle_altivec_attribute (tree *node,
28595 tree name ATTRIBUTE_UNUSED,
28596 tree args,
28597 int flags ATTRIBUTE_UNUSED,
28598 bool *no_add_attrs)
28600 tree type = *node, result = NULL_TREE;
28601 machine_mode mode;
28602 int unsigned_p;
28603 char altivec_type
28604 = ((args && TREE_CODE (args) == TREE_LIST && TREE_VALUE (args)
28605 && TREE_CODE (TREE_VALUE (args)) == IDENTIFIER_NODE)
28606 ? *IDENTIFIER_POINTER (TREE_VALUE (args))
28607 : '?');
28609 while (POINTER_TYPE_P (type)
28610 || TREE_CODE (type) == FUNCTION_TYPE
28611 || TREE_CODE (type) == METHOD_TYPE
28612 || TREE_CODE (type) == ARRAY_TYPE)
28613 type = TREE_TYPE (type);
28615 mode = TYPE_MODE (type);
28617 /* Check for invalid AltiVec type qualifiers. */
28618 if (type == long_double_type_node)
28619 error ("use of %<long double%> in AltiVec types is invalid");
28620 else if (type == boolean_type_node)
28621 error ("use of boolean types in AltiVec types is invalid");
28622 else if (TREE_CODE (type) == COMPLEX_TYPE)
28623 error ("use of %<complex%> in AltiVec types is invalid");
28624 else if (DECIMAL_FLOAT_MODE_P (mode))
28625 error ("use of decimal floating point types in AltiVec types is invalid");
28626 else if (!TARGET_VSX)
28628 if (type == long_unsigned_type_node || type == long_integer_type_node)
28630 if (TARGET_64BIT)
28631 error ("use of %<long%> in AltiVec types is invalid for "
28632 "64-bit code without -mvsx");
28633 else if (rs6000_warn_altivec_long)
28634 warning (0, "use of %<long%> in AltiVec types is deprecated; "
28635 "use %<int%>");
28637 else if (type == long_long_unsigned_type_node
28638 || type == long_long_integer_type_node)
28639 error ("use of %<long long%> in AltiVec types is invalid without "
28640 "-mvsx");
28641 else if (type == double_type_node)
28642 error ("use of %<double%> in AltiVec types is invalid without -mvsx");
28645 switch (altivec_type)
28647 case 'v':
28648 unsigned_p = TYPE_UNSIGNED (type);
28649 switch (mode)
28651 case TImode:
28652 result = (unsigned_p ? unsigned_V1TI_type_node : V1TI_type_node);
28653 break;
28654 case DImode:
28655 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
28656 break;
28657 case SImode:
28658 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
28659 break;
28660 case HImode:
28661 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
28662 break;
28663 case QImode:
28664 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
28665 break;
28666 case SFmode: result = V4SF_type_node; break;
28667 case DFmode: result = V2DF_type_node; break;
28668 /* If the user says 'vector int bool', we may be handed the 'bool'
28669 attribute _before_ the 'vector' attribute, and so select the
28670 proper type in the 'b' case below. */
28671 case V4SImode: case V8HImode: case V16QImode: case V4SFmode:
28672 case V2DImode: case V2DFmode:
28673 result = type;
28674 default: break;
28676 break;
28677 case 'b':
28678 switch (mode)
28680 case DImode: case V2DImode: result = bool_V2DI_type_node; break;
28681 case SImode: case V4SImode: result = bool_V4SI_type_node; break;
28682 case HImode: case V8HImode: result = bool_V8HI_type_node; break;
28683 case QImode: case V16QImode: result = bool_V16QI_type_node;
28684 default: break;
28686 break;
28687 case 'p':
28688 switch (mode)
28690 case V8HImode: result = pixel_V8HI_type_node;
28691 default: break;
28693 default: break;
28696 /* Propagate qualifiers attached to the element type
28697 onto the vector type. */
28698 if (result && result != type && TYPE_QUALS (type))
28699 result = build_qualified_type (result, TYPE_QUALS (type));
28701 *no_add_attrs = true; /* No need to hang on to the attribute. */
28703 if (result)
28704 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
28706 return NULL_TREE;
28709 /* AltiVec defines four built-in scalar types that serve as vector
28710 elements; we must teach the compiler how to mangle them. */
28712 static const char *
28713 rs6000_mangle_type (const_tree type)
28715 type = TYPE_MAIN_VARIANT (type);
28717 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
28718 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
28719 return NULL;
28721 if (type == bool_char_type_node) return "U6__boolc";
28722 if (type == bool_short_type_node) return "U6__bools";
28723 if (type == pixel_type_node) return "u7__pixel";
28724 if (type == bool_int_type_node) return "U6__booli";
28725 if (type == bool_long_type_node) return "U6__booll";
28727 /* Mangle IBM extended float long double as `g' (__float128) on
28728 powerpc*-linux where long-double-64 previously was the default. */
28729 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
28730 && TARGET_ELF
28731 && TARGET_LONG_DOUBLE_128
28732 && !TARGET_IEEEQUAD)
28733 return "g";
28735 /* For all other types, use normal C++ mangling. */
28736 return NULL;
28739 /* Handle a "longcall" or "shortcall" attribute; arguments as in
28740 struct attribute_spec.handler. */
28742 static tree
28743 rs6000_handle_longcall_attribute (tree *node, tree name,
28744 tree args ATTRIBUTE_UNUSED,
28745 int flags ATTRIBUTE_UNUSED,
28746 bool *no_add_attrs)
28748 if (TREE_CODE (*node) != FUNCTION_TYPE
28749 && TREE_CODE (*node) != FIELD_DECL
28750 && TREE_CODE (*node) != TYPE_DECL)
28752 warning (OPT_Wattributes, "%qE attribute only applies to functions",
28753 name);
28754 *no_add_attrs = true;
28757 return NULL_TREE;
28760 /* Set longcall attributes on all functions declared when
28761 rs6000_default_long_calls is true. */
28762 static void
28763 rs6000_set_default_type_attributes (tree type)
28765 if (rs6000_default_long_calls
28766 && (TREE_CODE (type) == FUNCTION_TYPE
28767 || TREE_CODE (type) == METHOD_TYPE))
28768 TYPE_ATTRIBUTES (type) = tree_cons (get_identifier ("longcall"),
28769 NULL_TREE,
28770 TYPE_ATTRIBUTES (type));
28772 #if TARGET_MACHO
28773 darwin_set_default_type_attributes (type);
28774 #endif
28777 /* Return a reference suitable for calling a function with the
28778 longcall attribute. */
28781 rs6000_longcall_ref (rtx call_ref)
28783 const char *call_name;
28784 tree node;
28786 if (GET_CODE (call_ref) != SYMBOL_REF)
28787 return call_ref;
28789 /* System V adds '.' to the internal name, so skip them. */
28790 call_name = XSTR (call_ref, 0);
28791 if (*call_name == '.')
28793 while (*call_name == '.')
28794 call_name++;
28796 node = get_identifier (call_name);
28797 call_ref = gen_rtx_SYMBOL_REF (VOIDmode, IDENTIFIER_POINTER (node));
28800 return force_reg (Pmode, call_ref);
28803 #ifndef TARGET_USE_MS_BITFIELD_LAYOUT
28804 #define TARGET_USE_MS_BITFIELD_LAYOUT 0
28805 #endif
28807 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
28808 struct attribute_spec.handler. */
28809 static tree
28810 rs6000_handle_struct_attribute (tree *node, tree name,
28811 tree args ATTRIBUTE_UNUSED,
28812 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
28814 tree *type = NULL;
28815 if (DECL_P (*node))
28817 if (TREE_CODE (*node) == TYPE_DECL)
28818 type = &TREE_TYPE (*node);
28820 else
28821 type = node;
28823 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
28824 || TREE_CODE (*type) == UNION_TYPE)))
28826 warning (OPT_Wattributes, "%qE attribute ignored", name);
28827 *no_add_attrs = true;
28830 else if ((is_attribute_p ("ms_struct", name)
28831 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
28832 || ((is_attribute_p ("gcc_struct", name)
28833 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
28835 warning (OPT_Wattributes, "%qE incompatible attribute ignored",
28836 name);
28837 *no_add_attrs = true;
28840 return NULL_TREE;
28843 static bool
28844 rs6000_ms_bitfield_layout_p (const_tree record_type)
28846 return (TARGET_USE_MS_BITFIELD_LAYOUT &&
28847 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
28848 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
28851 #ifdef USING_ELFOS_H
28853 /* A get_unnamed_section callback, used for switching to toc_section. */
28855 static void
28856 rs6000_elf_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
28858 if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28859 && TARGET_MINIMAL_TOC
28860 && !TARGET_RELOCATABLE)
28862 if (!toc_initialized)
28864 toc_initialized = 1;
28865 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28866 (*targetm.asm_out.internal_label) (asm_out_file, "LCTOC", 0);
28867 fprintf (asm_out_file, "\t.tc ");
28868 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1[TC],");
28869 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28870 fprintf (asm_out_file, "\n");
28872 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28873 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28874 fprintf (asm_out_file, " = .+32768\n");
28876 else
28877 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28879 else if ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
28880 && !TARGET_RELOCATABLE)
28881 fprintf (asm_out_file, "%s\n", TOC_SECTION_ASM_OP);
28882 else
28884 fprintf (asm_out_file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
28885 if (!toc_initialized)
28887 ASM_OUTPUT_INTERNAL_LABEL_PREFIX (asm_out_file, "LCTOC1");
28888 fprintf (asm_out_file, " = .+32768\n");
28889 toc_initialized = 1;
28894 /* Implement TARGET_ASM_INIT_SECTIONS. */
28896 static void
28897 rs6000_elf_asm_init_sections (void)
28899 toc_section
28900 = get_unnamed_section (0, rs6000_elf_output_toc_section_asm_op, NULL);
28902 sdata2_section
28903 = get_unnamed_section (SECTION_WRITE, output_section_asm_op,
28904 SDATA2_SECTION_ASM_OP);
28907 /* Implement TARGET_SELECT_RTX_SECTION. */
28909 static section *
28910 rs6000_elf_select_rtx_section (machine_mode mode, rtx x,
28911 unsigned HOST_WIDE_INT align)
28913 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
28914 return toc_section;
28915 else
28916 return default_elf_select_rtx_section (mode, x, align);
28919 /* For a SYMBOL_REF, set generic flags and then perform some
28920 target-specific processing.
28922 When the AIX ABI is requested on a non-AIX system, replace the
28923 function name with the real name (with a leading .) rather than the
28924 function descriptor name. This saves a lot of overriding code to
28925 read the prefixes. */
28927 static void rs6000_elf_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
28928 static void
28929 rs6000_elf_encode_section_info (tree decl, rtx rtl, int first)
28931 default_encode_section_info (decl, rtl, first);
28933 if (first
28934 && TREE_CODE (decl) == FUNCTION_DECL
28935 && !TARGET_AIX
28936 && DEFAULT_ABI == ABI_AIX)
28938 rtx sym_ref = XEXP (rtl, 0);
28939 size_t len = strlen (XSTR (sym_ref, 0));
28940 char *str = XALLOCAVEC (char, len + 2);
28941 str[0] = '.';
28942 memcpy (str + 1, XSTR (sym_ref, 0), len + 1);
28943 XSTR (sym_ref, 0) = ggc_alloc_string (str, len + 1);
28947 static inline bool
28948 compare_section_name (const char *section, const char *templ)
28950 int len;
28952 len = strlen (templ);
28953 return (strncmp (section, templ, len) == 0
28954 && (section[len] == 0 || section[len] == '.'));
28957 bool
28958 rs6000_elf_in_small_data_p (const_tree decl)
28960 if (rs6000_sdata == SDATA_NONE)
28961 return false;
28963 /* We want to merge strings, so we never consider them small data. */
28964 if (TREE_CODE (decl) == STRING_CST)
28965 return false;
28967 /* Functions are never in the small data area. */
28968 if (TREE_CODE (decl) == FUNCTION_DECL)
28969 return false;
28971 if (TREE_CODE (decl) == VAR_DECL && DECL_SECTION_NAME (decl))
28973 const char *section = DECL_SECTION_NAME (decl);
28974 if (compare_section_name (section, ".sdata")
28975 || compare_section_name (section, ".sdata2")
28976 || compare_section_name (section, ".gnu.linkonce.s")
28977 || compare_section_name (section, ".sbss")
28978 || compare_section_name (section, ".sbss2")
28979 || compare_section_name (section, ".gnu.linkonce.sb")
28980 || strcmp (section, ".PPC.EMB.sdata0") == 0
28981 || strcmp (section, ".PPC.EMB.sbss0") == 0)
28982 return true;
28984 else
28986 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (decl));
28988 if (size > 0
28989 && size <= g_switch_value
28990 /* If it's not public, and we're not going to reference it there,
28991 there's no need to put it in the small data section. */
28992 && (rs6000_sdata != SDATA_DATA || TREE_PUBLIC (decl)))
28993 return true;
28996 return false;
28999 #endif /* USING_ELFOS_H */
29001 /* Implement TARGET_USE_BLOCKS_FOR_CONSTANT_P. */
29003 static bool
29004 rs6000_use_blocks_for_constant_p (machine_mode mode, const_rtx x)
29006 return !ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode);
29009 /* Do not place thread-local symbols refs in the object blocks. */
29011 static bool
29012 rs6000_use_blocks_for_decl_p (const_tree decl)
29014 return !DECL_THREAD_LOCAL_P (decl);
29017 /* Return a REG that occurs in ADDR with coefficient 1.
29018 ADDR can be effectively incremented by incrementing REG.
29020 r0 is special and we must not select it as an address
29021 register by this routine since our caller will try to
29022 increment the returned register via an "la" instruction. */
29025 find_addr_reg (rtx addr)
29027 while (GET_CODE (addr) == PLUS)
29029 if (GET_CODE (XEXP (addr, 0)) == REG
29030 && REGNO (XEXP (addr, 0)) != 0)
29031 addr = XEXP (addr, 0);
29032 else if (GET_CODE (XEXP (addr, 1)) == REG
29033 && REGNO (XEXP (addr, 1)) != 0)
29034 addr = XEXP (addr, 1);
29035 else if (CONSTANT_P (XEXP (addr, 0)))
29036 addr = XEXP (addr, 1);
29037 else if (CONSTANT_P (XEXP (addr, 1)))
29038 addr = XEXP (addr, 0);
29039 else
29040 gcc_unreachable ();
29042 gcc_assert (GET_CODE (addr) == REG && REGNO (addr) != 0);
29043 return addr;
29046 void
29047 rs6000_fatal_bad_address (rtx op)
29049 fatal_insn ("bad address", op);
29052 #if TARGET_MACHO
29054 typedef struct branch_island_d {
29055 tree function_name;
29056 tree label_name;
29057 int line_number;
29058 } branch_island;
29061 static vec<branch_island, va_gc> *branch_islands;
29063 /* Remember to generate a branch island for far calls to the given
29064 function. */
29066 static void
29067 add_compiler_branch_island (tree label_name, tree function_name,
29068 int line_number)
29070 branch_island bi = {function_name, label_name, line_number};
29071 vec_safe_push (branch_islands, bi);
29074 /* Generate far-jump branch islands for everything recorded in
29075 branch_islands. Invoked immediately after the last instruction of
29076 the epilogue has been emitted; the branch islands must be appended
29077 to, and contiguous with, the function body. Mach-O stubs are
29078 generated in machopic_output_stub(). */
29080 static void
29081 macho_branch_islands (void)
29083 char tmp_buf[512];
29085 while (!vec_safe_is_empty (branch_islands))
29087 branch_island *bi = &branch_islands->last ();
29088 const char *label = IDENTIFIER_POINTER (bi->label_name);
29089 const char *name = IDENTIFIER_POINTER (bi->function_name);
29090 char name_buf[512];
29091 /* Cheap copy of the details from the Darwin ASM_OUTPUT_LABELREF(). */
29092 if (name[0] == '*' || name[0] == '&')
29093 strcpy (name_buf, name+1);
29094 else
29096 name_buf[0] = '_';
29097 strcpy (name_buf+1, name);
29099 strcpy (tmp_buf, "\n");
29100 strcat (tmp_buf, label);
29101 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29102 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29103 dbxout_stabd (N_SLINE, bi->line_number);
29104 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29105 if (flag_pic)
29107 if (TARGET_LINK_STACK)
29109 char name[32];
29110 get_ppc476_thunk_name (name);
29111 strcat (tmp_buf, ":\n\tmflr r0\n\tbl ");
29112 strcat (tmp_buf, name);
29113 strcat (tmp_buf, "\n");
29114 strcat (tmp_buf, label);
29115 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29117 else
29119 strcat (tmp_buf, ":\n\tmflr r0\n\tbcl 20,31,");
29120 strcat (tmp_buf, label);
29121 strcat (tmp_buf, "_pic\n");
29122 strcat (tmp_buf, label);
29123 strcat (tmp_buf, "_pic:\n\tmflr r11\n");
29126 strcat (tmp_buf, "\taddis r11,r11,ha16(");
29127 strcat (tmp_buf, name_buf);
29128 strcat (tmp_buf, " - ");
29129 strcat (tmp_buf, label);
29130 strcat (tmp_buf, "_pic)\n");
29132 strcat (tmp_buf, "\tmtlr r0\n");
29134 strcat (tmp_buf, "\taddi r12,r11,lo16(");
29135 strcat (tmp_buf, name_buf);
29136 strcat (tmp_buf, " - ");
29137 strcat (tmp_buf, label);
29138 strcat (tmp_buf, "_pic)\n");
29140 strcat (tmp_buf, "\tmtctr r12\n\tbctr\n");
29142 else
29144 strcat (tmp_buf, ":\nlis r12,hi16(");
29145 strcat (tmp_buf, name_buf);
29146 strcat (tmp_buf, ")\n\tori r12,r12,lo16(");
29147 strcat (tmp_buf, name_buf);
29148 strcat (tmp_buf, ")\n\tmtctr r12\n\tbctr");
29150 output_asm_insn (tmp_buf, 0);
29151 #if defined (DBX_DEBUGGING_INFO) || defined (XCOFF_DEBUGGING_INFO)
29152 if (write_symbols == DBX_DEBUG || write_symbols == XCOFF_DEBUG)
29153 dbxout_stabd (N_SLINE, bi->line_number);
29154 #endif /* DBX_DEBUGGING_INFO || XCOFF_DEBUGGING_INFO */
29155 branch_islands->pop ();
29159 /* NO_PREVIOUS_DEF checks in the link list whether the function name is
29160 already there or not. */
29162 static int
29163 no_previous_def (tree function_name)
29165 branch_island *bi;
29166 unsigned ix;
29168 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29169 if (function_name == bi->function_name)
29170 return 0;
29171 return 1;
29174 /* GET_PREV_LABEL gets the label name from the previous definition of
29175 the function. */
29177 static tree
29178 get_prev_label (tree function_name)
29180 branch_island *bi;
29181 unsigned ix;
29183 FOR_EACH_VEC_SAFE_ELT (branch_islands, ix, bi)
29184 if (function_name == bi->function_name)
29185 return bi->label_name;
29186 return NULL_TREE;
29189 /* INSN is either a function call or a millicode call. It may have an
29190 unconditional jump in its delay slot.
29192 CALL_DEST is the routine we are calling. */
29194 char *
29195 output_call (rtx_insn *insn, rtx *operands, int dest_operand_number,
29196 int cookie_operand_number)
29198 static char buf[256];
29199 if (darwin_emit_branch_islands
29200 && GET_CODE (operands[dest_operand_number]) == SYMBOL_REF
29201 && (INTVAL (operands[cookie_operand_number]) & CALL_LONG))
29203 tree labelname;
29204 tree funname = get_identifier (XSTR (operands[dest_operand_number], 0));
29206 if (no_previous_def (funname))
29208 rtx label_rtx = gen_label_rtx ();
29209 char *label_buf, temp_buf[256];
29210 ASM_GENERATE_INTERNAL_LABEL (temp_buf, "L",
29211 CODE_LABEL_NUMBER (label_rtx));
29212 label_buf = temp_buf[0] == '*' ? temp_buf + 1 : temp_buf;
29213 labelname = get_identifier (label_buf);
29214 add_compiler_branch_island (labelname, funname, insn_line (insn));
29216 else
29217 labelname = get_prev_label (funname);
29219 /* "jbsr foo, L42" is Mach-O for "Link as 'bl foo' if a 'bl'
29220 instruction will reach 'foo', otherwise link as 'bl L42'".
29221 "L42" should be a 'branch island', that will do a far jump to
29222 'foo'. Branch islands are generated in
29223 macho_branch_islands(). */
29224 sprintf (buf, "jbsr %%z%d,%.246s",
29225 dest_operand_number, IDENTIFIER_POINTER (labelname));
29227 else
29228 sprintf (buf, "bl %%z%d", dest_operand_number);
29229 return buf;
29232 /* Generate PIC and indirect symbol stubs. */
29234 void
29235 machopic_output_stub (FILE *file, const char *symb, const char *stub)
29237 unsigned int length;
29238 char *symbol_name, *lazy_ptr_name;
29239 char *local_label_0;
29240 static int label = 0;
29242 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
29243 symb = (*targetm.strip_name_encoding) (symb);
29246 length = strlen (symb);
29247 symbol_name = XALLOCAVEC (char, length + 32);
29248 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
29250 lazy_ptr_name = XALLOCAVEC (char, length + 32);
29251 GEN_LAZY_PTR_NAME_FOR_SYMBOL (lazy_ptr_name, symb, length);
29253 if (flag_pic == 2)
29254 switch_to_section (darwin_sections[machopic_picsymbol_stub1_section]);
29255 else
29256 switch_to_section (darwin_sections[machopic_symbol_stub1_section]);
29258 if (flag_pic == 2)
29260 fprintf (file, "\t.align 5\n");
29262 fprintf (file, "%s:\n", stub);
29263 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29265 label++;
29266 local_label_0 = XALLOCAVEC (char, sizeof ("\"L00000000000$spb\""));
29267 sprintf (local_label_0, "\"L%011d$spb\"", label);
29269 fprintf (file, "\tmflr r0\n");
29270 if (TARGET_LINK_STACK)
29272 char name[32];
29273 get_ppc476_thunk_name (name);
29274 fprintf (file, "\tbl %s\n", name);
29275 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29277 else
29279 fprintf (file, "\tbcl 20,31,%s\n", local_label_0);
29280 fprintf (file, "%s:\n\tmflr r11\n", local_label_0);
29282 fprintf (file, "\taddis r11,r11,ha16(%s-%s)\n",
29283 lazy_ptr_name, local_label_0);
29284 fprintf (file, "\tmtlr r0\n");
29285 fprintf (file, "\t%s r12,lo16(%s-%s)(r11)\n",
29286 (TARGET_64BIT ? "ldu" : "lwzu"),
29287 lazy_ptr_name, local_label_0);
29288 fprintf (file, "\tmtctr r12\n");
29289 fprintf (file, "\tbctr\n");
29291 else
29293 fprintf (file, "\t.align 4\n");
29295 fprintf (file, "%s:\n", stub);
29296 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29298 fprintf (file, "\tlis r11,ha16(%s)\n", lazy_ptr_name);
29299 fprintf (file, "\t%s r12,lo16(%s)(r11)\n",
29300 (TARGET_64BIT ? "ldu" : "lwzu"),
29301 lazy_ptr_name);
29302 fprintf (file, "\tmtctr r12\n");
29303 fprintf (file, "\tbctr\n");
29306 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
29307 fprintf (file, "%s:\n", lazy_ptr_name);
29308 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
29309 fprintf (file, "%sdyld_stub_binding_helper\n",
29310 (TARGET_64BIT ? DOUBLE_INT_ASM_OP : "\t.long\t"));
29313 /* Legitimize PIC addresses. If the address is already
29314 position-independent, we return ORIG. Newly generated
29315 position-independent addresses go into a reg. This is REG if non
29316 zero, otherwise we allocate register(s) as necessary. */
29318 #define SMALL_INT(X) ((UINTVAL (X) + 0x8000) < 0x10000)
29321 rs6000_machopic_legitimize_pic_address (rtx orig, machine_mode mode,
29322 rtx reg)
29324 rtx base, offset;
29326 if (reg == NULL && ! reload_in_progress && ! reload_completed)
29327 reg = gen_reg_rtx (Pmode);
29329 if (GET_CODE (orig) == CONST)
29331 rtx reg_temp;
29333 if (GET_CODE (XEXP (orig, 0)) == PLUS
29334 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
29335 return orig;
29337 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
29339 /* Use a different reg for the intermediate value, as
29340 it will be marked UNCHANGING. */
29341 reg_temp = !can_create_pseudo_p () ? reg : gen_reg_rtx (Pmode);
29342 base = rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 0),
29343 Pmode, reg_temp);
29344 offset =
29345 rs6000_machopic_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
29346 Pmode, reg);
29348 if (GET_CODE (offset) == CONST_INT)
29350 if (SMALL_INT (offset))
29351 return plus_constant (Pmode, base, INTVAL (offset));
29352 else if (! reload_in_progress && ! reload_completed)
29353 offset = force_reg (Pmode, offset);
29354 else
29356 rtx mem = force_const_mem (Pmode, orig);
29357 return machopic_legitimize_pic_address (mem, Pmode, reg);
29360 return gen_rtx_PLUS (Pmode, base, offset);
29363 /* Fall back on generic machopic code. */
29364 return machopic_legitimize_pic_address (orig, mode, reg);
29367 /* Output a .machine directive for the Darwin assembler, and call
29368 the generic start_file routine. */
29370 static void
29371 rs6000_darwin_file_start (void)
29373 static const struct
29375 const char *arg;
29376 const char *name;
29377 HOST_WIDE_INT if_set;
29378 } mapping[] = {
29379 { "ppc64", "ppc64", MASK_64BIT },
29380 { "970", "ppc970", MASK_PPC_GPOPT | MASK_MFCRF | MASK_POWERPC64 },
29381 { "power4", "ppc970", 0 },
29382 { "G5", "ppc970", 0 },
29383 { "7450", "ppc7450", 0 },
29384 { "7400", "ppc7400", MASK_ALTIVEC },
29385 { "G4", "ppc7400", 0 },
29386 { "750", "ppc750", 0 },
29387 { "740", "ppc750", 0 },
29388 { "G3", "ppc750", 0 },
29389 { "604e", "ppc604e", 0 },
29390 { "604", "ppc604", 0 },
29391 { "603e", "ppc603", 0 },
29392 { "603", "ppc603", 0 },
29393 { "601", "ppc601", 0 },
29394 { NULL, "ppc", 0 } };
29395 const char *cpu_id = "";
29396 size_t i;
29398 rs6000_file_start ();
29399 darwin_file_start ();
29401 /* Determine the argument to -mcpu=. Default to G3 if not specified. */
29403 if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
29404 cpu_id = rs6000_default_cpu;
29406 if (global_options_set.x_rs6000_cpu_index)
29407 cpu_id = processor_target_table[rs6000_cpu_index].name;
29409 /* Look through the mapping array. Pick the first name that either
29410 matches the argument, has a bit set in IF_SET that is also set
29411 in the target flags, or has a NULL name. */
29413 i = 0;
29414 while (mapping[i].arg != NULL
29415 && strcmp (mapping[i].arg, cpu_id) != 0
29416 && (mapping[i].if_set & rs6000_isa_flags) == 0)
29417 i++;
29419 fprintf (asm_out_file, "\t.machine %s\n", mapping[i].name);
29422 #endif /* TARGET_MACHO */
29424 #if TARGET_ELF
29425 static int
29426 rs6000_elf_reloc_rw_mask (void)
29428 if (flag_pic)
29429 return 3;
29430 else if (DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
29431 return 2;
29432 else
29433 return 0;
29436 /* Record an element in the table of global constructors. SYMBOL is
29437 a SYMBOL_REF of the function to be called; PRIORITY is a number
29438 between 0 and MAX_INIT_PRIORITY.
29440 This differs from default_named_section_asm_out_constructor in
29441 that we have special handling for -mrelocatable. */
29443 static void rs6000_elf_asm_out_constructor (rtx, int) ATTRIBUTE_UNUSED;
29444 static void
29445 rs6000_elf_asm_out_constructor (rtx symbol, int priority)
29447 const char *section = ".ctors";
29448 char buf[16];
29450 if (priority != DEFAULT_INIT_PRIORITY)
29452 sprintf (buf, ".ctors.%.5u",
29453 /* Invert the numbering so the linker puts us in the proper
29454 order; constructors are run from right to left, and the
29455 linker sorts in increasing order. */
29456 MAX_INIT_PRIORITY - priority);
29457 section = buf;
29460 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29461 assemble_align (POINTER_SIZE);
29463 if (TARGET_RELOCATABLE)
29465 fputs ("\t.long (", asm_out_file);
29466 output_addr_const (asm_out_file, symbol);
29467 fputs (")@fixup\n", asm_out_file);
29469 else
29470 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29473 static void rs6000_elf_asm_out_destructor (rtx, int) ATTRIBUTE_UNUSED;
29474 static void
29475 rs6000_elf_asm_out_destructor (rtx symbol, int priority)
29477 const char *section = ".dtors";
29478 char buf[16];
29480 if (priority != DEFAULT_INIT_PRIORITY)
29482 sprintf (buf, ".dtors.%.5u",
29483 /* Invert the numbering so the linker puts us in the proper
29484 order; constructors are run from right to left, and the
29485 linker sorts in increasing order. */
29486 MAX_INIT_PRIORITY - priority);
29487 section = buf;
29490 switch_to_section (get_section (section, SECTION_WRITE, NULL));
29491 assemble_align (POINTER_SIZE);
29493 if (TARGET_RELOCATABLE)
29495 fputs ("\t.long (", asm_out_file);
29496 output_addr_const (asm_out_file, symbol);
29497 fputs (")@fixup\n", asm_out_file);
29499 else
29500 assemble_integer (symbol, POINTER_SIZE / BITS_PER_UNIT, POINTER_SIZE, 1);
29503 void
29504 rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
29506 if (TARGET_64BIT && DEFAULT_ABI != ABI_ELFv2)
29508 fputs ("\t.section\t\".opd\",\"aw\"\n\t.align 3\n", file);
29509 ASM_OUTPUT_LABEL (file, name);
29510 fputs (DOUBLE_INT_ASM_OP, file);
29511 rs6000_output_function_entry (file, name);
29512 fputs (",.TOC.@tocbase,0\n\t.previous\n", file);
29513 if (DOT_SYMBOLS)
29515 fputs ("\t.size\t", file);
29516 assemble_name (file, name);
29517 fputs (",24\n\t.type\t.", file);
29518 assemble_name (file, name);
29519 fputs (",@function\n", file);
29520 if (TREE_PUBLIC (decl) && ! DECL_WEAK (decl))
29522 fputs ("\t.globl\t.", file);
29523 assemble_name (file, name);
29524 putc ('\n', file);
29527 else
29528 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29529 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29530 rs6000_output_function_entry (file, name);
29531 fputs (":\n", file);
29532 return;
29535 if (TARGET_RELOCATABLE
29536 && !TARGET_SECURE_PLT
29537 && (get_pool_size () != 0 || crtl->profile)
29538 && uses_TOC ())
29540 char buf[256];
29542 (*targetm.asm_out.internal_label) (file, "LCL", rs6000_pic_labelno);
29544 ASM_GENERATE_INTERNAL_LABEL (buf, "LCTOC", 1);
29545 fprintf (file, "\t.long ");
29546 assemble_name (file, buf);
29547 putc ('-', file);
29548 ASM_GENERATE_INTERNAL_LABEL (buf, "LCF", rs6000_pic_labelno);
29549 assemble_name (file, buf);
29550 putc ('\n', file);
29553 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
29554 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
29556 if (DEFAULT_ABI == ABI_AIX)
29558 const char *desc_name, *orig_name;
29560 orig_name = (*targetm.strip_name_encoding) (name);
29561 desc_name = orig_name;
29562 while (*desc_name == '.')
29563 desc_name++;
29565 if (TREE_PUBLIC (decl))
29566 fprintf (file, "\t.globl %s\n", desc_name);
29568 fprintf (file, "%s\n", MINIMAL_TOC_SECTION_ASM_OP);
29569 fprintf (file, "%s:\n", desc_name);
29570 fprintf (file, "\t.long %s\n", orig_name);
29571 fputs ("\t.long _GLOBAL_OFFSET_TABLE_\n", file);
29572 fputs ("\t.long 0\n", file);
29573 fprintf (file, "\t.previous\n");
29575 ASM_OUTPUT_LABEL (file, name);
29578 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
29579 static void
29580 rs6000_elf_file_end (void)
29582 #ifdef HAVE_AS_GNU_ATTRIBUTE
29583 if (TARGET_32BIT && DEFAULT_ABI == ABI_V4)
29585 if (rs6000_passes_float)
29586 fprintf (asm_out_file, "\t.gnu_attribute 4, %d\n",
29587 ((TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT) ? 1
29588 : (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT) ? 3
29589 : 2));
29590 if (rs6000_passes_vector)
29591 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
29592 (TARGET_ALTIVEC_ABI ? 2
29593 : TARGET_SPE_ABI ? 3
29594 : 1));
29595 if (rs6000_returns_struct)
29596 fprintf (asm_out_file, "\t.gnu_attribute 12, %d\n",
29597 aix_struct_return ? 2 : 1);
29599 #endif
29600 #if defined (POWERPC_LINUX) || defined (POWERPC_FREEBSD)
29601 if (TARGET_32BIT || DEFAULT_ABI == ABI_ELFv2)
29602 file_end_indicate_exec_stack ();
29603 #endif
29605 #endif
29607 #if TARGET_XCOFF
29608 static void
29609 rs6000_xcoff_asm_output_anchor (rtx symbol)
29611 char buffer[100];
29613 sprintf (buffer, "$ + " HOST_WIDE_INT_PRINT_DEC,
29614 SYMBOL_REF_BLOCK_OFFSET (symbol));
29615 fprintf (asm_out_file, "%s", SET_ASM_OP);
29616 RS6000_OUTPUT_BASENAME (asm_out_file, XSTR (symbol, 0));
29617 fprintf (asm_out_file, ",");
29618 RS6000_OUTPUT_BASENAME (asm_out_file, buffer);
29619 fprintf (asm_out_file, "\n");
29622 static void
29623 rs6000_xcoff_asm_globalize_label (FILE *stream, const char *name)
29625 fputs (GLOBAL_ASM_OP, stream);
29626 RS6000_OUTPUT_BASENAME (stream, name);
29627 putc ('\n', stream);
29630 /* A get_unnamed_decl callback, used for read-only sections. PTR
29631 points to the section string variable. */
29633 static void
29634 rs6000_xcoff_output_readonly_section_asm_op (const void *directive)
29636 fprintf (asm_out_file, "\t.csect %s[RO],%s\n",
29637 *(const char *const *) directive,
29638 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29641 /* Likewise for read-write sections. */
29643 static void
29644 rs6000_xcoff_output_readwrite_section_asm_op (const void *directive)
29646 fprintf (asm_out_file, "\t.csect %s[RW],%s\n",
29647 *(const char *const *) directive,
29648 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29651 static void
29652 rs6000_xcoff_output_tls_section_asm_op (const void *directive)
29654 fprintf (asm_out_file, "\t.csect %s[TL],%s\n",
29655 *(const char *const *) directive,
29656 XCOFF_CSECT_DEFAULT_ALIGNMENT_STR);
29659 /* A get_unnamed_section callback, used for switching to toc_section. */
29661 static void
29662 rs6000_xcoff_output_toc_section_asm_op (const void *data ATTRIBUTE_UNUSED)
29664 if (TARGET_MINIMAL_TOC)
29666 /* toc_section is always selected at least once from
29667 rs6000_xcoff_file_start, so this is guaranteed to
29668 always be defined once and only once in each file. */
29669 if (!toc_initialized)
29671 fputs ("\t.toc\nLCTOC..1:\n", asm_out_file);
29672 fputs ("\t.tc toc_table[TC],toc_table[RW]\n", asm_out_file);
29673 toc_initialized = 1;
29675 fprintf (asm_out_file, "\t.csect toc_table[RW]%s\n",
29676 (TARGET_32BIT ? "" : ",3"));
29678 else
29679 fputs ("\t.toc\n", asm_out_file);
29682 /* Implement TARGET_ASM_INIT_SECTIONS. */
29684 static void
29685 rs6000_xcoff_asm_init_sections (void)
29687 read_only_data_section
29688 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29689 &xcoff_read_only_section_name);
29691 private_data_section
29692 = get_unnamed_section (SECTION_WRITE,
29693 rs6000_xcoff_output_readwrite_section_asm_op,
29694 &xcoff_private_data_section_name);
29696 tls_data_section
29697 = get_unnamed_section (SECTION_TLS,
29698 rs6000_xcoff_output_tls_section_asm_op,
29699 &xcoff_tls_data_section_name);
29701 tls_private_data_section
29702 = get_unnamed_section (SECTION_TLS,
29703 rs6000_xcoff_output_tls_section_asm_op,
29704 &xcoff_private_data_section_name);
29706 read_only_private_data_section
29707 = get_unnamed_section (0, rs6000_xcoff_output_readonly_section_asm_op,
29708 &xcoff_private_data_section_name);
29710 toc_section
29711 = get_unnamed_section (0, rs6000_xcoff_output_toc_section_asm_op, NULL);
29713 readonly_data_section = read_only_data_section;
29714 exception_section = data_section;
29717 static int
29718 rs6000_xcoff_reloc_rw_mask (void)
29720 return 3;
29723 static void
29724 rs6000_xcoff_asm_named_section (const char *name, unsigned int flags,
29725 tree decl ATTRIBUTE_UNUSED)
29727 int smclass;
29728 static const char * const suffix[4] = { "PR", "RO", "RW", "TL" };
29730 if (flags & SECTION_CODE)
29731 smclass = 0;
29732 else if (flags & SECTION_TLS)
29733 smclass = 3;
29734 else if (flags & SECTION_WRITE)
29735 smclass = 2;
29736 else
29737 smclass = 1;
29739 fprintf (asm_out_file, "\t.csect %s%s[%s],%u\n",
29740 (flags & SECTION_CODE) ? "." : "",
29741 name, suffix[smclass], flags & SECTION_ENTSIZE);
29744 #define IN_NAMED_SECTION(DECL) \
29745 ((TREE_CODE (DECL) == FUNCTION_DECL || TREE_CODE (DECL) == VAR_DECL) \
29746 && DECL_SECTION_NAME (DECL) != NULL)
29748 static section *
29749 rs6000_xcoff_select_section (tree decl, int reloc,
29750 unsigned HOST_WIDE_INT align)
29752 /* Place variables with alignment stricter than BIGGEST_ALIGNMENT into
29753 named section. */
29754 if (align > BIGGEST_ALIGNMENT)
29756 resolve_unique_section (decl, reloc, true);
29757 if (IN_NAMED_SECTION (decl))
29758 return get_named_section (decl, NULL, reloc);
29761 if (decl_readonly_section (decl, reloc))
29763 if (TREE_PUBLIC (decl))
29764 return read_only_data_section;
29765 else
29766 return read_only_private_data_section;
29768 else
29770 #if HAVE_AS_TLS
29771 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
29773 if (TREE_PUBLIC (decl))
29774 return tls_data_section;
29775 else if (bss_initializer_p (decl))
29777 /* Convert to COMMON to emit in BSS. */
29778 DECL_COMMON (decl) = 1;
29779 return tls_comm_section;
29781 else
29782 return tls_private_data_section;
29784 else
29785 #endif
29786 if (TREE_PUBLIC (decl))
29787 return data_section;
29788 else
29789 return private_data_section;
29793 static void
29794 rs6000_xcoff_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
29796 const char *name;
29798 /* Use select_section for private data and uninitialized data with
29799 alignment <= BIGGEST_ALIGNMENT. */
29800 if (!TREE_PUBLIC (decl)
29801 || DECL_COMMON (decl)
29802 || (DECL_INITIAL (decl) == NULL_TREE
29803 && DECL_ALIGN (decl) <= BIGGEST_ALIGNMENT)
29804 || DECL_INITIAL (decl) == error_mark_node
29805 || (flag_zero_initialized_in_bss
29806 && initializer_zerop (DECL_INITIAL (decl))))
29807 return;
29809 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
29810 name = (*targetm.strip_name_encoding) (name);
29811 set_decl_section_name (decl, name);
29814 /* Select section for constant in constant pool.
29816 On RS/6000, all constants are in the private read-only data area.
29817 However, if this is being placed in the TOC it must be output as a
29818 toc entry. */
29820 static section *
29821 rs6000_xcoff_select_rtx_section (machine_mode mode, rtx x,
29822 unsigned HOST_WIDE_INT align ATTRIBUTE_UNUSED)
29824 if (ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (x, mode))
29825 return toc_section;
29826 else
29827 return read_only_private_data_section;
29830 /* Remove any trailing [DS] or the like from the symbol name. */
29832 static const char *
29833 rs6000_xcoff_strip_name_encoding (const char *name)
29835 size_t len;
29836 if (*name == '*')
29837 name++;
29838 len = strlen (name);
29839 if (name[len - 1] == ']')
29840 return ggc_alloc_string (name, len - 4);
29841 else
29842 return name;
29845 /* Section attributes. AIX is always PIC. */
29847 static unsigned int
29848 rs6000_xcoff_section_type_flags (tree decl, const char *name, int reloc)
29850 unsigned int align;
29851 unsigned int flags = default_section_type_flags (decl, name, reloc);
29853 /* Align to at least UNIT size. */
29854 if ((flags & SECTION_CODE) != 0 || !decl || !DECL_P (decl))
29855 align = MIN_UNITS_PER_WORD;
29856 else
29857 /* Increase alignment of large objects if not already stricter. */
29858 align = MAX ((DECL_ALIGN (decl) / BITS_PER_UNIT),
29859 int_size_in_bytes (TREE_TYPE (decl)) > MIN_UNITS_PER_WORD
29860 ? UNITS_PER_FP_WORD : MIN_UNITS_PER_WORD);
29862 return flags | (exact_log2 (align) & SECTION_ENTSIZE);
29865 /* Output at beginning of assembler file.
29867 Initialize the section names for the RS/6000 at this point.
29869 Specify filename, including full path, to assembler.
29871 We want to go into the TOC section so at least one .toc will be emitted.
29872 Also, in order to output proper .bs/.es pairs, we need at least one static
29873 [RW] section emitted.
29875 Finally, declare mcount when profiling to make the assembler happy. */
29877 static void
29878 rs6000_xcoff_file_start (void)
29880 rs6000_gen_section_name (&xcoff_bss_section_name,
29881 main_input_filename, ".bss_");
29882 rs6000_gen_section_name (&xcoff_private_data_section_name,
29883 main_input_filename, ".rw_");
29884 rs6000_gen_section_name (&xcoff_read_only_section_name,
29885 main_input_filename, ".ro_");
29886 rs6000_gen_section_name (&xcoff_tls_data_section_name,
29887 main_input_filename, ".tls_");
29888 rs6000_gen_section_name (&xcoff_tbss_section_name,
29889 main_input_filename, ".tbss_[UL]");
29891 fputs ("\t.file\t", asm_out_file);
29892 output_quoted_string (asm_out_file, main_input_filename);
29893 fputc ('\n', asm_out_file);
29894 if (write_symbols != NO_DEBUG)
29895 switch_to_section (private_data_section);
29896 switch_to_section (text_section);
29897 if (profile_flag)
29898 fprintf (asm_out_file, "\t.extern %s\n", RS6000_MCOUNT);
29899 rs6000_file_start ();
29902 /* Output at end of assembler file.
29903 On the RS/6000, referencing data should automatically pull in text. */
29905 static void
29906 rs6000_xcoff_file_end (void)
29908 switch_to_section (text_section);
29909 fputs ("_section_.text:\n", asm_out_file);
29910 switch_to_section (data_section);
29911 fputs (TARGET_32BIT
29912 ? "\t.long _section_.text\n" : "\t.llong _section_.text\n",
29913 asm_out_file);
29916 struct declare_alias_data
29918 FILE *file;
29919 bool function_descriptor;
29922 /* Declare alias N. A helper function for for_node_and_aliases. */
29924 static bool
29925 rs6000_declare_alias (struct symtab_node *n, void *d)
29927 struct declare_alias_data *data = (struct declare_alias_data *)d;
29928 /* Main symbol is output specially, because varasm machinery does part of
29929 the job for us - we do not need to declare .globl/lglobs and such. */
29930 if (!n->alias || n->weakref)
29931 return false;
29933 if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (n->decl)))
29934 return false;
29936 /* Prevent assemble_alias from trying to use .set pseudo operation
29937 that does not behave as expected by the middle-end. */
29938 TREE_ASM_WRITTEN (n->decl) = true;
29940 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (n->decl));
29941 char *buffer = (char *) alloca (strlen (name) + 2);
29942 char *p;
29943 int dollar_inside = 0;
29945 strcpy (buffer, name);
29946 p = strchr (buffer, '$');
29947 while (p) {
29948 *p = '_';
29949 dollar_inside++;
29950 p = strchr (p + 1, '$');
29952 if (TREE_PUBLIC (n->decl))
29954 if (!RS6000_WEAK || !DECL_WEAK (n->decl))
29956 if (dollar_inside) {
29957 if (data->function_descriptor)
29958 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29959 else
29960 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29962 if (data->function_descriptor)
29963 fputs ("\t.globl .", data->file);
29964 else
29965 fputs ("\t.globl ", data->file);
29966 RS6000_OUTPUT_BASENAME (data->file, buffer);
29967 putc ('\n', data->file);
29969 else if (DECL_WEAK (n->decl) && !data->function_descriptor)
29970 ASM_WEAKEN_DECL (data->file, n->decl, name, NULL);
29972 else
29974 if (dollar_inside)
29976 if (data->function_descriptor)
29977 fprintf(data->file, "\t.rename %s,\"%s\"\n", buffer, name);
29978 else
29979 fprintf(data->file, "\t.rename .%s,\".%s\"\n", buffer, name);
29981 if (data->function_descriptor)
29982 fputs ("\t.lglobl .", data->file);
29983 else
29984 fputs ("\t.lglobl ", data->file);
29985 RS6000_OUTPUT_BASENAME (data->file, buffer);
29986 putc ('\n', data->file);
29988 if (data->function_descriptor)
29989 fputs (".", data->file);
29990 RS6000_OUTPUT_BASENAME (data->file, buffer);
29991 fputs (":\n", data->file);
29992 return false;
29995 /* This macro produces the initial definition of a function name.
29996 On the RS/6000, we need to place an extra '.' in the function name and
29997 output the function descriptor.
29998 Dollar signs are converted to underscores.
30000 The csect for the function will have already been created when
30001 text_section was selected. We do have to go back to that csect, however.
30003 The third and fourth parameters to the .function pseudo-op (16 and 044)
30004 are placeholders which no longer have any use.
30006 Because AIX assembler's .set command has unexpected semantics, we output
30007 all aliases as alternative labels in front of the definition. */
30009 void
30010 rs6000_xcoff_declare_function_name (FILE *file, const char *name, tree decl)
30012 char *buffer = (char *) alloca (strlen (name) + 1);
30013 char *p;
30014 int dollar_inside = 0;
30015 struct declare_alias_data data = {file, false};
30017 strcpy (buffer, name);
30018 p = strchr (buffer, '$');
30019 while (p) {
30020 *p = '_';
30021 dollar_inside++;
30022 p = strchr (p + 1, '$');
30024 if (TREE_PUBLIC (decl))
30026 if (!RS6000_WEAK || !DECL_WEAK (decl))
30028 if (dollar_inside) {
30029 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30030 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30032 fputs ("\t.globl .", file);
30033 RS6000_OUTPUT_BASENAME (file, buffer);
30034 putc ('\n', file);
30037 else
30039 if (dollar_inside) {
30040 fprintf(file, "\t.rename .%s,\".%s\"\n", buffer, name);
30041 fprintf(file, "\t.rename %s,\"%s\"\n", buffer, name);
30043 fputs ("\t.lglobl .", file);
30044 RS6000_OUTPUT_BASENAME (file, buffer);
30045 putc ('\n', file);
30047 fputs ("\t.csect ", file);
30048 RS6000_OUTPUT_BASENAME (file, buffer);
30049 fputs (TARGET_32BIT ? "[DS]\n" : "[DS],3\n", file);
30050 RS6000_OUTPUT_BASENAME (file, buffer);
30051 fputs (":\n", file);
30052 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30053 fputs (TARGET_32BIT ? "\t.long ." : "\t.llong .", file);
30054 RS6000_OUTPUT_BASENAME (file, buffer);
30055 fputs (", TOC[tc0], 0\n", file);
30056 in_section = NULL;
30057 switch_to_section (function_section (decl));
30058 putc ('.', file);
30059 RS6000_OUTPUT_BASENAME (file, buffer);
30060 fputs (":\n", file);
30061 data.function_descriptor = true;
30062 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30063 if (write_symbols != NO_DEBUG && !DECL_IGNORED_P (decl))
30064 xcoffout_declare_function (file, decl, buffer);
30065 return;
30068 /* This macro produces the initial definition of a object (variable) name.
30069 Because AIX assembler's .set command has unexpected semantics, we output
30070 all aliases as alternative labels in front of the definition. */
30072 void
30073 rs6000_xcoff_declare_object_name (FILE *file, const char *name, tree decl)
30075 struct declare_alias_data data = {file, false};
30076 RS6000_OUTPUT_BASENAME (file, name);
30077 fputs (":\n", file);
30078 symtab_node::get (decl)->call_for_symbol_and_aliases (rs6000_declare_alias, &data, true);
30081 #ifdef HAVE_AS_TLS
30082 static void
30083 rs6000_xcoff_encode_section_info (tree decl, rtx rtl, int first)
30085 rtx symbol;
30086 int flags;
30088 default_encode_section_info (decl, rtl, first);
30090 /* Careful not to prod global register variables. */
30091 if (!MEM_P (rtl))
30092 return;
30093 symbol = XEXP (rtl, 0);
30094 if (GET_CODE (symbol) != SYMBOL_REF)
30095 return;
30097 flags = SYMBOL_REF_FLAGS (symbol);
30099 if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
30100 flags &= ~SYMBOL_FLAG_HAS_BLOCK_INFO;
30102 SYMBOL_REF_FLAGS (symbol) = flags;
30104 #endif /* HAVE_AS_TLS */
30105 #endif /* TARGET_XCOFF */
30107 /* Compute a (partial) cost for rtx X. Return true if the complete
30108 cost has been computed, and false if subexpressions should be
30109 scanned. In either case, *TOTAL contains the cost result. */
30111 static bool
30112 rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
30113 int *total, bool speed)
30115 machine_mode mode = GET_MODE (x);
30117 switch (code)
30119 /* On the RS/6000, if it is valid in the insn, it is free. */
30120 case CONST_INT:
30121 if (((outer_code == SET
30122 || outer_code == PLUS
30123 || outer_code == MINUS)
30124 && (satisfies_constraint_I (x)
30125 || satisfies_constraint_L (x)))
30126 || (outer_code == AND
30127 && (satisfies_constraint_K (x)
30128 || (mode == SImode
30129 ? satisfies_constraint_L (x)
30130 : satisfies_constraint_J (x))
30131 || mask_operand (x, mode)
30132 || (mode == DImode
30133 && mask64_operand (x, DImode))))
30134 || ((outer_code == IOR || outer_code == XOR)
30135 && (satisfies_constraint_K (x)
30136 || (mode == SImode
30137 ? satisfies_constraint_L (x)
30138 : satisfies_constraint_J (x))))
30139 || outer_code == ASHIFT
30140 || outer_code == ASHIFTRT
30141 || outer_code == LSHIFTRT
30142 || outer_code == ROTATE
30143 || outer_code == ROTATERT
30144 || outer_code == ZERO_EXTRACT
30145 || (outer_code == MULT
30146 && satisfies_constraint_I (x))
30147 || ((outer_code == DIV || outer_code == UDIV
30148 || outer_code == MOD || outer_code == UMOD)
30149 && exact_log2 (INTVAL (x)) >= 0)
30150 || (outer_code == COMPARE
30151 && (satisfies_constraint_I (x)
30152 || satisfies_constraint_K (x)))
30153 || ((outer_code == EQ || outer_code == NE)
30154 && (satisfies_constraint_I (x)
30155 || satisfies_constraint_K (x)
30156 || (mode == SImode
30157 ? satisfies_constraint_L (x)
30158 : satisfies_constraint_J (x))))
30159 || (outer_code == GTU
30160 && satisfies_constraint_I (x))
30161 || (outer_code == LTU
30162 && satisfies_constraint_P (x)))
30164 *total = 0;
30165 return true;
30167 else if ((outer_code == PLUS
30168 && reg_or_add_cint_operand (x, VOIDmode))
30169 || (outer_code == MINUS
30170 && reg_or_sub_cint_operand (x, VOIDmode))
30171 || ((outer_code == SET
30172 || outer_code == IOR
30173 || outer_code == XOR)
30174 && (INTVAL (x)
30175 & ~ (unsigned HOST_WIDE_INT) 0xffffffff) == 0))
30177 *total = COSTS_N_INSNS (1);
30178 return true;
30180 /* FALLTHRU */
30182 case CONST_DOUBLE:
30183 case CONST_WIDE_INT:
30184 case CONST:
30185 case HIGH:
30186 case SYMBOL_REF:
30187 case MEM:
30188 /* When optimizing for size, MEM should be slightly more expensive
30189 than generating address, e.g., (plus (reg) (const)).
30190 L1 cache latency is about two instructions. */
30191 *total = !speed ? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (2);
30192 return true;
30194 case LABEL_REF:
30195 *total = 0;
30196 return true;
30198 case PLUS:
30199 case MINUS:
30200 if (FLOAT_MODE_P (mode))
30201 *total = rs6000_cost->fp;
30202 else
30203 *total = COSTS_N_INSNS (1);
30204 return false;
30206 case MULT:
30207 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30208 && satisfies_constraint_I (XEXP (x, 1)))
30210 if (INTVAL (XEXP (x, 1)) >= -256
30211 && INTVAL (XEXP (x, 1)) <= 255)
30212 *total = rs6000_cost->mulsi_const9;
30213 else
30214 *total = rs6000_cost->mulsi_const;
30216 else if (mode == SFmode)
30217 *total = rs6000_cost->fp;
30218 else if (FLOAT_MODE_P (mode))
30219 *total = rs6000_cost->dmul;
30220 else if (mode == DImode)
30221 *total = rs6000_cost->muldi;
30222 else
30223 *total = rs6000_cost->mulsi;
30224 return false;
30226 case FMA:
30227 if (mode == SFmode)
30228 *total = rs6000_cost->fp;
30229 else
30230 *total = rs6000_cost->dmul;
30231 break;
30233 case DIV:
30234 case MOD:
30235 if (FLOAT_MODE_P (mode))
30237 *total = mode == DFmode ? rs6000_cost->ddiv
30238 : rs6000_cost->sdiv;
30239 return false;
30241 /* FALLTHRU */
30243 case UDIV:
30244 case UMOD:
30245 if (GET_CODE (XEXP (x, 1)) == CONST_INT
30246 && exact_log2 (INTVAL (XEXP (x, 1))) >= 0)
30248 if (code == DIV || code == MOD)
30249 /* Shift, addze */
30250 *total = COSTS_N_INSNS (2);
30251 else
30252 /* Shift */
30253 *total = COSTS_N_INSNS (1);
30255 else
30257 if (GET_MODE (XEXP (x, 1)) == DImode)
30258 *total = rs6000_cost->divdi;
30259 else
30260 *total = rs6000_cost->divsi;
30262 /* Add in shift and subtract for MOD. */
30263 if (code == MOD || code == UMOD)
30264 *total += COSTS_N_INSNS (2);
30265 return false;
30267 case CTZ:
30268 case FFS:
30269 *total = COSTS_N_INSNS (4);
30270 return false;
30272 case POPCOUNT:
30273 *total = COSTS_N_INSNS (TARGET_POPCNTD ? 1 : 6);
30274 return false;
30276 case PARITY:
30277 *total = COSTS_N_INSNS (TARGET_CMPB ? 2 : 6);
30278 return false;
30280 case NOT:
30281 if (outer_code == AND || outer_code == IOR || outer_code == XOR)
30283 *total = 0;
30284 return false;
30286 /* FALLTHRU */
30288 case AND:
30289 case CLZ:
30290 case IOR:
30291 case XOR:
30292 case ZERO_EXTRACT:
30293 *total = COSTS_N_INSNS (1);
30294 return false;
30296 case ASHIFT:
30297 case ASHIFTRT:
30298 case LSHIFTRT:
30299 case ROTATE:
30300 case ROTATERT:
30301 /* Handle mul_highpart. */
30302 if (outer_code == TRUNCATE
30303 && GET_CODE (XEXP (x, 0)) == MULT)
30305 if (mode == DImode)
30306 *total = rs6000_cost->muldi;
30307 else
30308 *total = rs6000_cost->mulsi;
30309 return true;
30311 else if (outer_code == AND)
30312 *total = 0;
30313 else
30314 *total = COSTS_N_INSNS (1);
30315 return false;
30317 case SIGN_EXTEND:
30318 case ZERO_EXTEND:
30319 if (GET_CODE (XEXP (x, 0)) == MEM)
30320 *total = 0;
30321 else
30322 *total = COSTS_N_INSNS (1);
30323 return false;
30325 case COMPARE:
30326 case NEG:
30327 case ABS:
30328 if (!FLOAT_MODE_P (mode))
30330 *total = COSTS_N_INSNS (1);
30331 return false;
30333 /* FALLTHRU */
30335 case FLOAT:
30336 case UNSIGNED_FLOAT:
30337 case FIX:
30338 case UNSIGNED_FIX:
30339 case FLOAT_TRUNCATE:
30340 *total = rs6000_cost->fp;
30341 return false;
30343 case FLOAT_EXTEND:
30344 if (mode == DFmode)
30345 *total = 0;
30346 else
30347 *total = rs6000_cost->fp;
30348 return false;
30350 case UNSPEC:
30351 switch (XINT (x, 1))
30353 case UNSPEC_FRSP:
30354 *total = rs6000_cost->fp;
30355 return true;
30357 default:
30358 break;
30360 break;
30362 case CALL:
30363 case IF_THEN_ELSE:
30364 if (!speed)
30366 *total = COSTS_N_INSNS (1);
30367 return true;
30369 else if (FLOAT_MODE_P (mode)
30370 && TARGET_PPC_GFXOPT && TARGET_HARD_FLOAT && TARGET_FPRS)
30372 *total = rs6000_cost->fp;
30373 return false;
30375 break;
30377 case NE:
30378 case EQ:
30379 case GTU:
30380 case LTU:
30381 /* Carry bit requires mode == Pmode.
30382 NEG or PLUS already counted so only add one. */
30383 if (mode == Pmode
30384 && (outer_code == NEG || outer_code == PLUS))
30386 *total = COSTS_N_INSNS (1);
30387 return true;
30389 if (outer_code == SET)
30391 if (XEXP (x, 1) == const0_rtx)
30393 if (TARGET_ISEL && !TARGET_MFCRF)
30394 *total = COSTS_N_INSNS (8);
30395 else
30396 *total = COSTS_N_INSNS (2);
30397 return true;
30399 else if (mode == Pmode)
30401 *total = COSTS_N_INSNS (3);
30402 return false;
30405 /* FALLTHRU */
30407 case GT:
30408 case LT:
30409 case UNORDERED:
30410 if (outer_code == SET && (XEXP (x, 1) == const0_rtx))
30412 if (TARGET_ISEL && !TARGET_MFCRF)
30413 *total = COSTS_N_INSNS (8);
30414 else
30415 *total = COSTS_N_INSNS (2);
30416 return true;
30418 /* CC COMPARE. */
30419 if (outer_code == COMPARE)
30421 *total = 0;
30422 return true;
30424 break;
30426 default:
30427 break;
30430 return false;
30433 /* Debug form of r6000_rtx_costs that is selected if -mdebug=cost. */
30435 static bool
30436 rs6000_debug_rtx_costs (rtx x, int code, int outer_code, int opno, int *total,
30437 bool speed)
30439 bool ret = rs6000_rtx_costs (x, code, outer_code, opno, total, speed);
30441 fprintf (stderr,
30442 "\nrs6000_rtx_costs, return = %s, code = %s, outer_code = %s, "
30443 "opno = %d, total = %d, speed = %s, x:\n",
30444 ret ? "complete" : "scan inner",
30445 GET_RTX_NAME (code),
30446 GET_RTX_NAME (outer_code),
30447 opno,
30448 *total,
30449 speed ? "true" : "false");
30451 debug_rtx (x);
30453 return ret;
30456 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost. */
30458 static int
30459 rs6000_debug_address_cost (rtx x, machine_mode mode,
30460 addr_space_t as, bool speed)
30462 int ret = TARGET_ADDRESS_COST (x, mode, as, speed);
30464 fprintf (stderr, "\nrs6000_address_cost, return = %d, speed = %s, x:\n",
30465 ret, speed ? "true" : "false");
30466 debug_rtx (x);
30468 return ret;
30472 /* A C expression returning the cost of moving data from a register of class
30473 CLASS1 to one of CLASS2. */
30475 static int
30476 rs6000_register_move_cost (machine_mode mode,
30477 reg_class_t from, reg_class_t to)
30479 int ret;
30481 if (TARGET_DEBUG_COST)
30482 dbg_cost_ctrl++;
30484 /* Moves from/to GENERAL_REGS. */
30485 if (reg_classes_intersect_p (to, GENERAL_REGS)
30486 || reg_classes_intersect_p (from, GENERAL_REGS))
30488 reg_class_t rclass = from;
30490 if (! reg_classes_intersect_p (to, GENERAL_REGS))
30491 rclass = to;
30493 if (rclass == FLOAT_REGS || rclass == ALTIVEC_REGS || rclass == VSX_REGS)
30494 ret = (rs6000_memory_move_cost (mode, rclass, false)
30495 + rs6000_memory_move_cost (mode, GENERAL_REGS, false));
30497 /* It's more expensive to move CR_REGS than CR0_REGS because of the
30498 shift. */
30499 else if (rclass == CR_REGS)
30500 ret = 4;
30502 /* For those processors that have slow LR/CTR moves, make them more
30503 expensive than memory in order to bias spills to memory .*/
30504 else if ((rs6000_cpu == PROCESSOR_POWER6
30505 || rs6000_cpu == PROCESSOR_POWER7
30506 || rs6000_cpu == PROCESSOR_POWER8)
30507 && reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
30508 ret = 6 * hard_regno_nregs[0][mode];
30510 else
30511 /* A move will cost one instruction per GPR moved. */
30512 ret = 2 * hard_regno_nregs[0][mode];
30515 /* If we have VSX, we can easily move between FPR or Altivec registers. */
30516 else if (VECTOR_MEM_VSX_P (mode)
30517 && reg_classes_intersect_p (to, VSX_REGS)
30518 && reg_classes_intersect_p (from, VSX_REGS))
30519 ret = 2 * hard_regno_nregs[32][mode];
30521 /* Moving between two similar registers is just one instruction. */
30522 else if (reg_classes_intersect_p (to, from))
30523 ret = (mode == TFmode || mode == TDmode) ? 4 : 2;
30525 /* Everything else has to go through GENERAL_REGS. */
30526 else
30527 ret = (rs6000_register_move_cost (mode, GENERAL_REGS, to)
30528 + rs6000_register_move_cost (mode, from, GENERAL_REGS));
30530 if (TARGET_DEBUG_COST)
30532 if (dbg_cost_ctrl == 1)
30533 fprintf (stderr,
30534 "rs6000_register_move_cost:, ret=%d, mode=%s, from=%s, to=%s\n",
30535 ret, GET_MODE_NAME (mode), reg_class_names[from],
30536 reg_class_names[to]);
30537 dbg_cost_ctrl--;
30540 return ret;
30543 /* A C expressions returning the cost of moving data of MODE from a register to
30544 or from memory. */
30546 static int
30547 rs6000_memory_move_cost (machine_mode mode, reg_class_t rclass,
30548 bool in ATTRIBUTE_UNUSED)
30550 int ret;
30552 if (TARGET_DEBUG_COST)
30553 dbg_cost_ctrl++;
30555 if (reg_classes_intersect_p (rclass, GENERAL_REGS))
30556 ret = 4 * hard_regno_nregs[0][mode];
30557 else if ((reg_classes_intersect_p (rclass, FLOAT_REGS)
30558 || reg_classes_intersect_p (rclass, VSX_REGS)))
30559 ret = 4 * hard_regno_nregs[32][mode];
30560 else if (reg_classes_intersect_p (rclass, ALTIVEC_REGS))
30561 ret = 4 * hard_regno_nregs[FIRST_ALTIVEC_REGNO][mode];
30562 else
30563 ret = 4 + rs6000_register_move_cost (mode, rclass, GENERAL_REGS);
30565 if (TARGET_DEBUG_COST)
30567 if (dbg_cost_ctrl == 1)
30568 fprintf (stderr,
30569 "rs6000_memory_move_cost: ret=%d, mode=%s, rclass=%s, in=%d\n",
30570 ret, GET_MODE_NAME (mode), reg_class_names[rclass], in);
30571 dbg_cost_ctrl--;
30574 return ret;
30577 /* Returns a code for a target-specific builtin that implements
30578 reciprocal of the function, or NULL_TREE if not available. */
30580 static tree
30581 rs6000_builtin_reciprocal (unsigned int fn, bool md_fn,
30582 bool sqrt ATTRIBUTE_UNUSED)
30584 if (optimize_insn_for_size_p ())
30585 return NULL_TREE;
30587 if (md_fn)
30588 switch (fn)
30590 case VSX_BUILTIN_XVSQRTDP:
30591 if (!RS6000_RECIP_AUTO_RSQRTE_P (V2DFmode))
30592 return NULL_TREE;
30594 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_2DF];
30596 case VSX_BUILTIN_XVSQRTSP:
30597 if (!RS6000_RECIP_AUTO_RSQRTE_P (V4SFmode))
30598 return NULL_TREE;
30600 return rs6000_builtin_decls[VSX_BUILTIN_RSQRT_4SF];
30602 default:
30603 return NULL_TREE;
30606 else
30607 switch (fn)
30609 case BUILT_IN_SQRT:
30610 if (!RS6000_RECIP_AUTO_RSQRTE_P (DFmode))
30611 return NULL_TREE;
30613 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRT];
30615 case BUILT_IN_SQRTF:
30616 if (!RS6000_RECIP_AUTO_RSQRTE_P (SFmode))
30617 return NULL_TREE;
30619 return rs6000_builtin_decls[RS6000_BUILTIN_RSQRTF];
30621 default:
30622 return NULL_TREE;
30626 /* Load up a constant. If the mode is a vector mode, splat the value across
30627 all of the vector elements. */
30629 static rtx
30630 rs6000_load_constant_and_splat (machine_mode mode, REAL_VALUE_TYPE dconst)
30632 rtx reg;
30634 if (mode == SFmode || mode == DFmode)
30636 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, mode);
30637 reg = force_reg (mode, d);
30639 else if (mode == V4SFmode)
30641 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, SFmode);
30642 rtvec v = gen_rtvec (4, d, d, d, d);
30643 reg = gen_reg_rtx (mode);
30644 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30646 else if (mode == V2DFmode)
30648 rtx d = CONST_DOUBLE_FROM_REAL_VALUE (dconst, DFmode);
30649 rtvec v = gen_rtvec (2, d, d);
30650 reg = gen_reg_rtx (mode);
30651 rs6000_expand_vector_init (reg, gen_rtx_PARALLEL (mode, v));
30653 else
30654 gcc_unreachable ();
30656 return reg;
30659 /* Generate an FMA instruction. */
30661 static void
30662 rs6000_emit_madd (rtx target, rtx m1, rtx m2, rtx a)
30664 machine_mode mode = GET_MODE (target);
30665 rtx dst;
30667 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30668 gcc_assert (dst != NULL);
30670 if (dst != target)
30671 emit_move_insn (target, dst);
30674 /* Generate a FMSUB instruction: dst = fma(m1, m2, -a). */
30676 static void
30677 rs6000_emit_msub (rtx target, rtx m1, rtx m2, rtx a)
30679 machine_mode mode = GET_MODE (target);
30680 rtx dst;
30682 /* Altivec does not support fms directly;
30683 generate in terms of fma in that case. */
30684 if (optab_handler (fms_optab, mode) != CODE_FOR_nothing)
30685 dst = expand_ternary_op (mode, fms_optab, m1, m2, a, target, 0);
30686 else
30688 a = expand_unop (mode, neg_optab, a, NULL_RTX, 0);
30689 dst = expand_ternary_op (mode, fma_optab, m1, m2, a, target, 0);
30691 gcc_assert (dst != NULL);
30693 if (dst != target)
30694 emit_move_insn (target, dst);
30697 /* Generate a FNMSUB instruction: dst = -fma(m1, m2, -a). */
30699 static void
30700 rs6000_emit_nmsub (rtx dst, rtx m1, rtx m2, rtx a)
30702 machine_mode mode = GET_MODE (dst);
30703 rtx r;
30705 /* This is a tad more complicated, since the fnma_optab is for
30706 a different expression: fma(-m1, m2, a), which is the same
30707 thing except in the case of signed zeros.
30709 Fortunately we know that if FMA is supported that FNMSUB is
30710 also supported in the ISA. Just expand it directly. */
30712 gcc_assert (optab_handler (fma_optab, mode) != CODE_FOR_nothing);
30714 r = gen_rtx_NEG (mode, a);
30715 r = gen_rtx_FMA (mode, m1, m2, r);
30716 r = gen_rtx_NEG (mode, r);
30717 emit_insn (gen_rtx_SET (VOIDmode, dst, r));
30720 /* Newton-Raphson approximation of floating point divide DST = N/D. If NOTE_P,
30721 add a reg_note saying that this was a division. Support both scalar and
30722 vector divide. Assumes no trapping math and finite arguments. */
30724 void
30725 rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
30727 machine_mode mode = GET_MODE (dst);
30728 rtx one, x0, e0, x1, xprev, eprev, xnext, enext, u, v;
30729 int i;
30731 /* Low precision estimates guarantee 5 bits of accuracy. High
30732 precision estimates guarantee 14 bits of accuracy. SFmode
30733 requires 23 bits of accuracy. DFmode requires 52 bits of
30734 accuracy. Each pass at least doubles the accuracy, leading
30735 to the following. */
30736 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30737 if (mode == DFmode || mode == V2DFmode)
30738 passes++;
30740 enum insn_code code = optab_handler (smul_optab, mode);
30741 insn_gen_fn gen_mul = GEN_FCN (code);
30743 gcc_assert (code != CODE_FOR_nothing);
30745 one = rs6000_load_constant_and_splat (mode, dconst1);
30747 /* x0 = 1./d estimate */
30748 x0 = gen_reg_rtx (mode);
30749 emit_insn (gen_rtx_SET (VOIDmode, x0,
30750 gen_rtx_UNSPEC (mode, gen_rtvec (1, d),
30751 UNSPEC_FRES)));
30753 /* Each iteration but the last calculates x_(i+1) = x_i * (2 - d * x_i). */
30754 if (passes > 1) {
30756 /* e0 = 1. - d * x0 */
30757 e0 = gen_reg_rtx (mode);
30758 rs6000_emit_nmsub (e0, d, x0, one);
30760 /* x1 = x0 + e0 * x0 */
30761 x1 = gen_reg_rtx (mode);
30762 rs6000_emit_madd (x1, e0, x0, x0);
30764 for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
30765 ++i, xprev = xnext, eprev = enext) {
30767 /* enext = eprev * eprev */
30768 enext = gen_reg_rtx (mode);
30769 emit_insn (gen_mul (enext, eprev, eprev));
30771 /* xnext = xprev + enext * xprev */
30772 xnext = gen_reg_rtx (mode);
30773 rs6000_emit_madd (xnext, enext, xprev, xprev);
30776 } else
30777 xprev = x0;
30779 /* The last iteration calculates x_(i+1) = n * x_i * (2 - d * x_i). */
30781 /* u = n * xprev */
30782 u = gen_reg_rtx (mode);
30783 emit_insn (gen_mul (u, n, xprev));
30785 /* v = n - (d * u) */
30786 v = gen_reg_rtx (mode);
30787 rs6000_emit_nmsub (v, d, u, n);
30789 /* dst = (v * xprev) + u */
30790 rs6000_emit_madd (dst, v, xprev, u);
30792 if (note_p)
30793 add_reg_note (get_last_insn (), REG_EQUAL, gen_rtx_DIV (mode, n, d));
30796 /* Newton-Raphson approximation of single/double-precision floating point
30797 rsqrt. Assumes no trapping math and finite arguments. */
30799 void
30800 rs6000_emit_swrsqrt (rtx dst, rtx src)
30802 machine_mode mode = GET_MODE (src);
30803 rtx x0 = gen_reg_rtx (mode);
30804 rtx y = gen_reg_rtx (mode);
30806 /* Low precision estimates guarantee 5 bits of accuracy. High
30807 precision estimates guarantee 14 bits of accuracy. SFmode
30808 requires 23 bits of accuracy. DFmode requires 52 bits of
30809 accuracy. Each pass at least doubles the accuracy, leading
30810 to the following. */
30811 int passes = (TARGET_RECIP_PRECISION) ? 1 : 3;
30812 if (mode == DFmode || mode == V2DFmode)
30813 passes++;
30815 REAL_VALUE_TYPE dconst3_2;
30816 int i;
30817 rtx halfthree;
30818 enum insn_code code = optab_handler (smul_optab, mode);
30819 insn_gen_fn gen_mul = GEN_FCN (code);
30821 gcc_assert (code != CODE_FOR_nothing);
30823 /* Load up the constant 1.5 either as a scalar, or as a vector. */
30824 real_from_integer (&dconst3_2, VOIDmode, 3, SIGNED);
30825 SET_REAL_EXP (&dconst3_2, REAL_EXP (&dconst3_2) - 1);
30827 halfthree = rs6000_load_constant_and_splat (mode, dconst3_2);
30829 /* x0 = rsqrt estimate */
30830 emit_insn (gen_rtx_SET (VOIDmode, x0,
30831 gen_rtx_UNSPEC (mode, gen_rtvec (1, src),
30832 UNSPEC_RSQRT)));
30834 /* y = 0.5 * src = 1.5 * src - src -> fewer constants */
30835 rs6000_emit_msub (y, src, halfthree, src);
30837 for (i = 0; i < passes; i++)
30839 rtx x1 = gen_reg_rtx (mode);
30840 rtx u = gen_reg_rtx (mode);
30841 rtx v = gen_reg_rtx (mode);
30843 /* x1 = x0 * (1.5 - y * (x0 * x0)) */
30844 emit_insn (gen_mul (u, x0, x0));
30845 rs6000_emit_nmsub (v, y, u, halfthree);
30846 emit_insn (gen_mul (x1, x0, v));
30847 x0 = x1;
30850 emit_move_insn (dst, x0);
30851 return;
30854 /* Emit popcount intrinsic on TARGET_POPCNTB (Power5) and TARGET_POPCNTD
30855 (Power7) targets. DST is the target, and SRC is the argument operand. */
30857 void
30858 rs6000_emit_popcount (rtx dst, rtx src)
30860 machine_mode mode = GET_MODE (dst);
30861 rtx tmp1, tmp2;
30863 /* Use the PPC ISA 2.06 popcnt{w,d} instruction if we can. */
30864 if (TARGET_POPCNTD)
30866 if (mode == SImode)
30867 emit_insn (gen_popcntdsi2 (dst, src));
30868 else
30869 emit_insn (gen_popcntddi2 (dst, src));
30870 return;
30873 tmp1 = gen_reg_rtx (mode);
30875 if (mode == SImode)
30877 emit_insn (gen_popcntbsi2 (tmp1, src));
30878 tmp2 = expand_mult (SImode, tmp1, GEN_INT (0x01010101),
30879 NULL_RTX, 0);
30880 tmp2 = force_reg (SImode, tmp2);
30881 emit_insn (gen_lshrsi3 (dst, tmp2, GEN_INT (24)));
30883 else
30885 emit_insn (gen_popcntbdi2 (tmp1, src));
30886 tmp2 = expand_mult (DImode, tmp1,
30887 GEN_INT ((HOST_WIDE_INT)
30888 0x01010101 << 32 | 0x01010101),
30889 NULL_RTX, 0);
30890 tmp2 = force_reg (DImode, tmp2);
30891 emit_insn (gen_lshrdi3 (dst, tmp2, GEN_INT (56)));
30896 /* Emit parity intrinsic on TARGET_POPCNTB targets. DST is the
30897 target, and SRC is the argument operand. */
30899 void
30900 rs6000_emit_parity (rtx dst, rtx src)
30902 machine_mode mode = GET_MODE (dst);
30903 rtx tmp;
30905 tmp = gen_reg_rtx (mode);
30907 /* Use the PPC ISA 2.05 prtyw/prtyd instruction if we can. */
30908 if (TARGET_CMPB)
30910 if (mode == SImode)
30912 emit_insn (gen_popcntbsi2 (tmp, src));
30913 emit_insn (gen_paritysi2_cmpb (dst, tmp));
30915 else
30917 emit_insn (gen_popcntbdi2 (tmp, src));
30918 emit_insn (gen_paritydi2_cmpb (dst, tmp));
30920 return;
30923 if (mode == SImode)
30925 /* Is mult+shift >= shift+xor+shift+xor? */
30926 if (rs6000_cost->mulsi_const >= COSTS_N_INSNS (3))
30928 rtx tmp1, tmp2, tmp3, tmp4;
30930 tmp1 = gen_reg_rtx (SImode);
30931 emit_insn (gen_popcntbsi2 (tmp1, src));
30933 tmp2 = gen_reg_rtx (SImode);
30934 emit_insn (gen_lshrsi3 (tmp2, tmp1, GEN_INT (16)));
30935 tmp3 = gen_reg_rtx (SImode);
30936 emit_insn (gen_xorsi3 (tmp3, tmp1, tmp2));
30938 tmp4 = gen_reg_rtx (SImode);
30939 emit_insn (gen_lshrsi3 (tmp4, tmp3, GEN_INT (8)));
30940 emit_insn (gen_xorsi3 (tmp, tmp3, tmp4));
30942 else
30943 rs6000_emit_popcount (tmp, src);
30944 emit_insn (gen_andsi3 (dst, tmp, const1_rtx));
30946 else
30948 /* Is mult+shift >= shift+xor+shift+xor+shift+xor? */
30949 if (rs6000_cost->muldi >= COSTS_N_INSNS (5))
30951 rtx tmp1, tmp2, tmp3, tmp4, tmp5, tmp6;
30953 tmp1 = gen_reg_rtx (DImode);
30954 emit_insn (gen_popcntbdi2 (tmp1, src));
30956 tmp2 = gen_reg_rtx (DImode);
30957 emit_insn (gen_lshrdi3 (tmp2, tmp1, GEN_INT (32)));
30958 tmp3 = gen_reg_rtx (DImode);
30959 emit_insn (gen_xordi3 (tmp3, tmp1, tmp2));
30961 tmp4 = gen_reg_rtx (DImode);
30962 emit_insn (gen_lshrdi3 (tmp4, tmp3, GEN_INT (16)));
30963 tmp5 = gen_reg_rtx (DImode);
30964 emit_insn (gen_xordi3 (tmp5, tmp3, tmp4));
30966 tmp6 = gen_reg_rtx (DImode);
30967 emit_insn (gen_lshrdi3 (tmp6, tmp5, GEN_INT (8)));
30968 emit_insn (gen_xordi3 (tmp, tmp5, tmp6));
30970 else
30971 rs6000_emit_popcount (tmp, src);
30972 emit_insn (gen_anddi3 (dst, tmp, const1_rtx));
30976 /* Expand an Altivec constant permutation for little endian mode.
30977 There are two issues: First, the two input operands must be
30978 swapped so that together they form a double-wide array in LE
30979 order. Second, the vperm instruction has surprising behavior
30980 in LE mode: it interprets the elements of the source vectors
30981 in BE mode ("left to right") and interprets the elements of
30982 the destination vector in LE mode ("right to left"). To
30983 correct for this, we must subtract each element of the permute
30984 control vector from 31.
30986 For example, suppose we want to concatenate vr10 = {0, 1, 2, 3}
30987 with vr11 = {4, 5, 6, 7} and extract {0, 2, 4, 6} using a vperm.
30988 We place {0,1,2,3,8,9,10,11,16,17,18,19,24,25,26,27} in vr12 to
30989 serve as the permute control vector. Then, in BE mode,
30991 vperm 9,10,11,12
30993 places the desired result in vr9. However, in LE mode the
30994 vector contents will be
30996 vr10 = 00000003 00000002 00000001 00000000
30997 vr11 = 00000007 00000006 00000005 00000004
30999 The result of the vperm using the same permute control vector is
31001 vr9 = 05000000 07000000 01000000 03000000
31003 That is, the leftmost 4 bytes of vr10 are interpreted as the
31004 source for the rightmost 4 bytes of vr9, and so on.
31006 If we change the permute control vector to
31008 vr12 = {31,20,29,28,23,22,21,20,15,14,13,12,7,6,5,4}
31010 and issue
31012 vperm 9,11,10,12
31014 we get the desired
31016 vr9 = 00000006 00000004 00000002 00000000. */
31018 void
31019 altivec_expand_vec_perm_const_le (rtx operands[4])
31021 unsigned int i;
31022 rtx perm[16];
31023 rtx constv, unspec;
31024 rtx target = operands[0];
31025 rtx op0 = operands[1];
31026 rtx op1 = operands[2];
31027 rtx sel = operands[3];
31029 /* Unpack and adjust the constant selector. */
31030 for (i = 0; i < 16; ++i)
31032 rtx e = XVECEXP (sel, 0, i);
31033 unsigned int elt = 31 - (INTVAL (e) & 31);
31034 perm[i] = GEN_INT (elt);
31037 /* Expand to a permute, swapping the inputs and using the
31038 adjusted selector. */
31039 if (!REG_P (op0))
31040 op0 = force_reg (V16QImode, op0);
31041 if (!REG_P (op1))
31042 op1 = force_reg (V16QImode, op1);
31044 constv = gen_rtx_CONST_VECTOR (V16QImode, gen_rtvec_v (16, perm));
31045 constv = force_reg (V16QImode, constv);
31046 unspec = gen_rtx_UNSPEC (V16QImode, gen_rtvec (3, op1, op0, constv),
31047 UNSPEC_VPERM);
31048 if (!REG_P (target))
31050 rtx tmp = gen_reg_rtx (V16QImode);
31051 emit_move_insn (tmp, unspec);
31052 unspec = tmp;
31055 emit_move_insn (target, unspec);
31058 /* Similarly to altivec_expand_vec_perm_const_le, we must adjust the
31059 permute control vector. But here it's not a constant, so we must
31060 generate a vector NAND or NOR to do the adjustment. */
31062 void
31063 altivec_expand_vec_perm_le (rtx operands[4])
31065 rtx notx, iorx, unspec;
31066 rtx target = operands[0];
31067 rtx op0 = operands[1];
31068 rtx op1 = operands[2];
31069 rtx sel = operands[3];
31070 rtx tmp = target;
31071 rtx norreg = gen_reg_rtx (V16QImode);
31072 machine_mode mode = GET_MODE (target);
31074 /* Get everything in regs so the pattern matches. */
31075 if (!REG_P (op0))
31076 op0 = force_reg (mode, op0);
31077 if (!REG_P (op1))
31078 op1 = force_reg (mode, op1);
31079 if (!REG_P (sel))
31080 sel = force_reg (V16QImode, sel);
31081 if (!REG_P (target))
31082 tmp = gen_reg_rtx (mode);
31084 /* Invert the selector with a VNAND if available, else a VNOR.
31085 The VNAND is preferred for future fusion opportunities. */
31086 notx = gen_rtx_NOT (V16QImode, sel);
31087 iorx = (TARGET_P8_VECTOR
31088 ? gen_rtx_IOR (V16QImode, notx, notx)
31089 : gen_rtx_AND (V16QImode, notx, notx));
31090 emit_insn (gen_rtx_SET (VOIDmode, norreg, iorx));
31092 /* Permute with operands reversed and adjusted selector. */
31093 unspec = gen_rtx_UNSPEC (mode, gen_rtvec (3, op1, op0, norreg),
31094 UNSPEC_VPERM);
31096 /* Copy into target, possibly by way of a register. */
31097 if (!REG_P (target))
31099 emit_move_insn (tmp, unspec);
31100 unspec = tmp;
31103 emit_move_insn (target, unspec);
31106 /* Expand an Altivec constant permutation. Return true if we match
31107 an efficient implementation; false to fall back to VPERM. */
31109 bool
31110 altivec_expand_vec_perm_const (rtx operands[4])
31112 struct altivec_perm_insn {
31113 HOST_WIDE_INT mask;
31114 enum insn_code impl;
31115 unsigned char perm[16];
31117 static const struct altivec_perm_insn patterns[] = {
31118 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum_direct,
31119 { 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
31120 { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum_direct,
31121 { 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
31122 { OPTION_MASK_ALTIVEC,
31123 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghb_direct
31124 : CODE_FOR_altivec_vmrglb_direct),
31125 { 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
31126 { OPTION_MASK_ALTIVEC,
31127 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghh_direct
31128 : CODE_FOR_altivec_vmrglh_direct),
31129 { 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
31130 { OPTION_MASK_ALTIVEC,
31131 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrghw_direct
31132 : CODE_FOR_altivec_vmrglw_direct),
31133 { 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
31134 { OPTION_MASK_ALTIVEC,
31135 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglb_direct
31136 : CODE_FOR_altivec_vmrghb_direct),
31137 { 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
31138 { OPTION_MASK_ALTIVEC,
31139 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglh_direct
31140 : CODE_FOR_altivec_vmrghh_direct),
31141 { 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
31142 { OPTION_MASK_ALTIVEC,
31143 (BYTES_BIG_ENDIAN ? CODE_FOR_altivec_vmrglw_direct
31144 : CODE_FOR_altivec_vmrghw_direct),
31145 { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
31146 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
31147 { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
31148 { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
31149 { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
31152 unsigned int i, j, elt, which;
31153 unsigned char perm[16];
31154 rtx target, op0, op1, sel, x;
31155 bool one_vec;
31157 target = operands[0];
31158 op0 = operands[1];
31159 op1 = operands[2];
31160 sel = operands[3];
31162 /* Unpack the constant selector. */
31163 for (i = which = 0; i < 16; ++i)
31165 rtx e = XVECEXP (sel, 0, i);
31166 elt = INTVAL (e) & 31;
31167 which |= (elt < 16 ? 1 : 2);
31168 perm[i] = elt;
31171 /* Simplify the constant selector based on operands. */
31172 switch (which)
31174 default:
31175 gcc_unreachable ();
31177 case 3:
31178 one_vec = false;
31179 if (!rtx_equal_p (op0, op1))
31180 break;
31181 /* FALLTHRU */
31183 case 2:
31184 for (i = 0; i < 16; ++i)
31185 perm[i] &= 15;
31186 op0 = op1;
31187 one_vec = true;
31188 break;
31190 case 1:
31191 op1 = op0;
31192 one_vec = true;
31193 break;
31196 /* Look for splat patterns. */
31197 if (one_vec)
31199 elt = perm[0];
31201 for (i = 0; i < 16; ++i)
31202 if (perm[i] != elt)
31203 break;
31204 if (i == 16)
31206 if (!BYTES_BIG_ENDIAN)
31207 elt = 15 - elt;
31208 emit_insn (gen_altivec_vspltb_direct (target, op0, GEN_INT (elt)));
31209 return true;
31212 if (elt % 2 == 0)
31214 for (i = 0; i < 16; i += 2)
31215 if (perm[i] != elt || perm[i + 1] != elt + 1)
31216 break;
31217 if (i == 16)
31219 int field = BYTES_BIG_ENDIAN ? elt / 2 : 7 - elt / 2;
31220 x = gen_reg_rtx (V8HImode);
31221 emit_insn (gen_altivec_vsplth_direct (x, gen_lowpart (V8HImode, op0),
31222 GEN_INT (field)));
31223 emit_move_insn (target, gen_lowpart (V16QImode, x));
31224 return true;
31228 if (elt % 4 == 0)
31230 for (i = 0; i < 16; i += 4)
31231 if (perm[i] != elt
31232 || perm[i + 1] != elt + 1
31233 || perm[i + 2] != elt + 2
31234 || perm[i + 3] != elt + 3)
31235 break;
31236 if (i == 16)
31238 int field = BYTES_BIG_ENDIAN ? elt / 4 : 3 - elt / 4;
31239 x = gen_reg_rtx (V4SImode);
31240 emit_insn (gen_altivec_vspltw_direct (x, gen_lowpart (V4SImode, op0),
31241 GEN_INT (field)));
31242 emit_move_insn (target, gen_lowpart (V16QImode, x));
31243 return true;
31248 /* Look for merge and pack patterns. */
31249 for (j = 0; j < ARRAY_SIZE (patterns); ++j)
31251 bool swapped;
31253 if ((patterns[j].mask & rs6000_isa_flags) == 0)
31254 continue;
31256 elt = patterns[j].perm[0];
31257 if (perm[0] == elt)
31258 swapped = false;
31259 else if (perm[0] == elt + 16)
31260 swapped = true;
31261 else
31262 continue;
31263 for (i = 1; i < 16; ++i)
31265 elt = patterns[j].perm[i];
31266 if (swapped)
31267 elt = (elt >= 16 ? elt - 16 : elt + 16);
31268 else if (one_vec && elt >= 16)
31269 elt -= 16;
31270 if (perm[i] != elt)
31271 break;
31273 if (i == 16)
31275 enum insn_code icode = patterns[j].impl;
31276 machine_mode omode = insn_data[icode].operand[0].mode;
31277 machine_mode imode = insn_data[icode].operand[1].mode;
31279 /* For little-endian, don't use vpkuwum and vpkuhum if the
31280 underlying vector type is not V4SI and V8HI, respectively.
31281 For example, using vpkuwum with a V8HI picks up the even
31282 halfwords (BE numbering) when the even halfwords (LE
31283 numbering) are what we need. */
31284 if (!BYTES_BIG_ENDIAN
31285 && icode == CODE_FOR_altivec_vpkuwum_direct
31286 && ((GET_CODE (op0) == REG
31287 && GET_MODE (op0) != V4SImode)
31288 || (GET_CODE (op0) == SUBREG
31289 && GET_MODE (XEXP (op0, 0)) != V4SImode)))
31290 continue;
31291 if (!BYTES_BIG_ENDIAN
31292 && icode == CODE_FOR_altivec_vpkuhum_direct
31293 && ((GET_CODE (op0) == REG
31294 && GET_MODE (op0) != V8HImode)
31295 || (GET_CODE (op0) == SUBREG
31296 && GET_MODE (XEXP (op0, 0)) != V8HImode)))
31297 continue;
31299 /* For little-endian, the two input operands must be swapped
31300 (or swapped back) to ensure proper right-to-left numbering
31301 from 0 to 2N-1. */
31302 if (swapped ^ !BYTES_BIG_ENDIAN)
31303 x = op0, op0 = op1, op1 = x;
31304 if (imode != V16QImode)
31306 op0 = gen_lowpart (imode, op0);
31307 op1 = gen_lowpart (imode, op1);
31309 if (omode == V16QImode)
31310 x = target;
31311 else
31312 x = gen_reg_rtx (omode);
31313 emit_insn (GEN_FCN (icode) (x, op0, op1));
31314 if (omode != V16QImode)
31315 emit_move_insn (target, gen_lowpart (V16QImode, x));
31316 return true;
31320 if (!BYTES_BIG_ENDIAN)
31322 altivec_expand_vec_perm_const_le (operands);
31323 return true;
31326 return false;
31329 /* Expand a Paired Single, VSX Permute Doubleword, or SPE constant permutation.
31330 Return true if we match an efficient implementation. */
31332 static bool
31333 rs6000_expand_vec_perm_const_1 (rtx target, rtx op0, rtx op1,
31334 unsigned char perm0, unsigned char perm1)
31336 rtx x;
31338 /* If both selectors come from the same operand, fold to single op. */
31339 if ((perm0 & 2) == (perm1 & 2))
31341 if (perm0 & 2)
31342 op0 = op1;
31343 else
31344 op1 = op0;
31346 /* If both operands are equal, fold to simpler permutation. */
31347 if (rtx_equal_p (op0, op1))
31349 perm0 = perm0 & 1;
31350 perm1 = (perm1 & 1) + 2;
31352 /* If the first selector comes from the second operand, swap. */
31353 else if (perm0 & 2)
31355 if (perm1 & 2)
31356 return false;
31357 perm0 -= 2;
31358 perm1 += 2;
31359 x = op0, op0 = op1, op1 = x;
31361 /* If the second selector does not come from the second operand, fail. */
31362 else if ((perm1 & 2) == 0)
31363 return false;
31365 /* Success! */
31366 if (target != NULL)
31368 machine_mode vmode, dmode;
31369 rtvec v;
31371 vmode = GET_MODE (target);
31372 gcc_assert (GET_MODE_NUNITS (vmode) == 2);
31373 dmode = mode_for_vector (GET_MODE_INNER (vmode), 4);
31374 x = gen_rtx_VEC_CONCAT (dmode, op0, op1);
31375 v = gen_rtvec (2, GEN_INT (perm0), GEN_INT (perm1));
31376 x = gen_rtx_VEC_SELECT (vmode, x, gen_rtx_PARALLEL (VOIDmode, v));
31377 emit_insn (gen_rtx_SET (VOIDmode, target, x));
31379 return true;
31382 bool
31383 rs6000_expand_vec_perm_const (rtx operands[4])
31385 rtx target, op0, op1, sel;
31386 unsigned char perm0, perm1;
31388 target = operands[0];
31389 op0 = operands[1];
31390 op1 = operands[2];
31391 sel = operands[3];
31393 /* Unpack the constant selector. */
31394 perm0 = INTVAL (XVECEXP (sel, 0, 0)) & 3;
31395 perm1 = INTVAL (XVECEXP (sel, 0, 1)) & 3;
31397 return rs6000_expand_vec_perm_const_1 (target, op0, op1, perm0, perm1);
31400 /* Test whether a constant permutation is supported. */
31402 static bool
31403 rs6000_vectorize_vec_perm_const_ok (machine_mode vmode,
31404 const unsigned char *sel)
31406 /* AltiVec (and thus VSX) can handle arbitrary permutations. */
31407 if (TARGET_ALTIVEC)
31408 return true;
31410 /* Check for ps_merge* or evmerge* insns. */
31411 if ((TARGET_PAIRED_FLOAT && vmode == V2SFmode)
31412 || (TARGET_SPE && vmode == V2SImode))
31414 rtx op0 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 1);
31415 rtx op1 = gen_raw_REG (vmode, LAST_VIRTUAL_REGISTER + 2);
31416 return rs6000_expand_vec_perm_const_1 (NULL, op0, op1, sel[0], sel[1]);
31419 return false;
31422 /* A subroutine for rs6000_expand_extract_even & rs6000_expand_interleave. */
31424 static void
31425 rs6000_do_expand_vec_perm (rtx target, rtx op0, rtx op1,
31426 machine_mode vmode, unsigned nelt, rtx perm[])
31428 machine_mode imode;
31429 rtx x;
31431 imode = vmode;
31432 if (GET_MODE_CLASS (vmode) != MODE_VECTOR_INT)
31434 imode = GET_MODE_INNER (vmode);
31435 imode = mode_for_size (GET_MODE_BITSIZE (imode), MODE_INT, 0);
31436 imode = mode_for_vector (imode, nelt);
31439 x = gen_rtx_CONST_VECTOR (imode, gen_rtvec_v (nelt, perm));
31440 x = expand_vec_perm (vmode, op0, op1, x, target);
31441 if (x != target)
31442 emit_move_insn (target, x);
31445 /* Expand an extract even operation. */
31447 void
31448 rs6000_expand_extract_even (rtx target, rtx op0, rtx op1)
31450 machine_mode vmode = GET_MODE (target);
31451 unsigned i, nelt = GET_MODE_NUNITS (vmode);
31452 rtx perm[16];
31454 for (i = 0; i < nelt; i++)
31455 perm[i] = GEN_INT (i * 2);
31457 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31460 /* Expand a vector interleave operation. */
31462 void
31463 rs6000_expand_interleave (rtx target, rtx op0, rtx op1, bool highp)
31465 machine_mode vmode = GET_MODE (target);
31466 unsigned i, high, nelt = GET_MODE_NUNITS (vmode);
31467 rtx perm[16];
31469 high = (highp ? 0 : nelt / 2);
31470 for (i = 0; i < nelt / 2; i++)
31472 perm[i * 2] = GEN_INT (i + high);
31473 perm[i * 2 + 1] = GEN_INT (i + nelt + high);
31476 rs6000_do_expand_vec_perm (target, op0, op1, vmode, nelt, perm);
31479 /* Scale a V2DF vector SRC by two to the SCALE and place in TGT. */
31480 void
31481 rs6000_scale_v2df (rtx tgt, rtx src, int scale)
31483 HOST_WIDE_INT hwi_scale (scale);
31484 REAL_VALUE_TYPE r_pow;
31485 rtvec v = rtvec_alloc (2);
31486 rtx elt;
31487 rtx scale_vec = gen_reg_rtx (V2DFmode);
31488 (void)real_powi (&r_pow, DFmode, &dconst2, hwi_scale);
31489 elt = CONST_DOUBLE_FROM_REAL_VALUE (r_pow, DFmode);
31490 RTVEC_ELT (v, 0) = elt;
31491 RTVEC_ELT (v, 1) = elt;
31492 rs6000_expand_vector_init (scale_vec, gen_rtx_PARALLEL (V2DFmode, v));
31493 emit_insn (gen_mulv2df3 (tgt, src, scale_vec));
31496 /* Return an RTX representing where to find the function value of a
31497 function returning MODE. */
31498 static rtx
31499 rs6000_complex_function_value (machine_mode mode)
31501 unsigned int regno;
31502 rtx r1, r2;
31503 machine_mode inner = GET_MODE_INNER (mode);
31504 unsigned int inner_bytes = GET_MODE_SIZE (inner);
31506 if (FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31507 regno = FP_ARG_RETURN;
31508 else
31510 regno = GP_ARG_RETURN;
31512 /* 32-bit is OK since it'll go in r3/r4. */
31513 if (TARGET_32BIT && inner_bytes >= 4)
31514 return gen_rtx_REG (mode, regno);
31517 if (inner_bytes >= 8)
31518 return gen_rtx_REG (mode, regno);
31520 r1 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno),
31521 const0_rtx);
31522 r2 = gen_rtx_EXPR_LIST (inner, gen_rtx_REG (inner, regno + 1),
31523 GEN_INT (inner_bytes));
31524 return gen_rtx_PARALLEL (mode, gen_rtvec (2, r1, r2));
31527 /* Target hook for TARGET_FUNCTION_VALUE.
31529 On the SPE, both FPs and vectors are returned in r3.
31531 On RS/6000 an integer value is in r3 and a floating-point value is in
31532 fp1, unless -msoft-float. */
31534 static rtx
31535 rs6000_function_value (const_tree valtype,
31536 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
31537 bool outgoing ATTRIBUTE_UNUSED)
31539 machine_mode mode;
31540 unsigned int regno;
31541 machine_mode elt_mode;
31542 int n_elts;
31544 /* Special handling for structs in darwin64. */
31545 if (TARGET_MACHO
31546 && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
31548 CUMULATIVE_ARGS valcum;
31549 rtx valret;
31551 valcum.words = 0;
31552 valcum.fregno = FP_ARG_MIN_REG;
31553 valcum.vregno = ALTIVEC_ARG_MIN_REG;
31554 /* Do a trial code generation as if this were going to be passed as
31555 an argument; if any part goes in memory, we return NULL. */
31556 valret = rs6000_darwin64_record_arg (&valcum, valtype, true, /* retval= */ true);
31557 if (valret)
31558 return valret;
31559 /* Otherwise fall through to standard ABI rules. */
31562 /* The ELFv2 ABI returns homogeneous VFP aggregates in registers. */
31563 if (rs6000_discover_homogeneous_aggregate (TYPE_MODE (valtype), valtype,
31564 &elt_mode, &n_elts))
31566 int first_reg, n_regs, i;
31567 rtx par;
31569 if (SCALAR_FLOAT_MODE_P (elt_mode))
31571 /* _Decimal128 must use even/odd register pairs. */
31572 first_reg = (elt_mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31573 n_regs = (GET_MODE_SIZE (elt_mode) + 7) >> 3;
31575 else
31577 first_reg = ALTIVEC_ARG_RETURN;
31578 n_regs = 1;
31581 par = gen_rtx_PARALLEL (TYPE_MODE (valtype), rtvec_alloc (n_elts));
31582 for (i = 0; i < n_elts; i++)
31584 rtx r = gen_rtx_REG (elt_mode, first_reg + i * n_regs);
31585 rtx off = GEN_INT (i * GET_MODE_SIZE (elt_mode));
31586 XVECEXP (par, 0, i) = gen_rtx_EXPR_LIST (VOIDmode, r, off);
31589 return par;
31592 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DImode)
31594 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31595 return gen_rtx_PARALLEL (DImode,
31596 gen_rtvec (2,
31597 gen_rtx_EXPR_LIST (VOIDmode,
31598 gen_rtx_REG (SImode, GP_ARG_RETURN),
31599 const0_rtx),
31600 gen_rtx_EXPR_LIST (VOIDmode,
31601 gen_rtx_REG (SImode,
31602 GP_ARG_RETURN + 1),
31603 GEN_INT (4))));
31605 if (TARGET_32BIT && TARGET_POWERPC64 && TYPE_MODE (valtype) == DCmode)
31607 return gen_rtx_PARALLEL (DCmode,
31608 gen_rtvec (4,
31609 gen_rtx_EXPR_LIST (VOIDmode,
31610 gen_rtx_REG (SImode, GP_ARG_RETURN),
31611 const0_rtx),
31612 gen_rtx_EXPR_LIST (VOIDmode,
31613 gen_rtx_REG (SImode,
31614 GP_ARG_RETURN + 1),
31615 GEN_INT (4)),
31616 gen_rtx_EXPR_LIST (VOIDmode,
31617 gen_rtx_REG (SImode,
31618 GP_ARG_RETURN + 2),
31619 GEN_INT (8)),
31620 gen_rtx_EXPR_LIST (VOIDmode,
31621 gen_rtx_REG (SImode,
31622 GP_ARG_RETURN + 3),
31623 GEN_INT (12))));
31626 mode = TYPE_MODE (valtype);
31627 if ((INTEGRAL_TYPE_P (valtype) && GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
31628 || POINTER_TYPE_P (valtype))
31629 mode = TARGET_32BIT ? SImode : DImode;
31631 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31632 /* _Decimal128 must use an even/odd register pair. */
31633 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31634 else if (SCALAR_FLOAT_TYPE_P (valtype) && TARGET_HARD_FLOAT && TARGET_FPRS
31635 && ((TARGET_SINGLE_FLOAT && (mode == SFmode)) || TARGET_DOUBLE_FLOAT))
31636 regno = FP_ARG_RETURN;
31637 else if (TREE_CODE (valtype) == COMPLEX_TYPE
31638 && targetm.calls.split_complex_arg)
31639 return rs6000_complex_function_value (mode);
31640 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31641 return register is used in both cases, and we won't see V2DImode/V2DFmode
31642 for pure altivec, combine the two cases. */
31643 else if (TREE_CODE (valtype) == VECTOR_TYPE
31644 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI
31645 && ALTIVEC_OR_VSX_VECTOR_MODE (mode))
31646 regno = ALTIVEC_ARG_RETURN;
31647 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31648 && (mode == DFmode || mode == DCmode
31649 || mode == TFmode || mode == TCmode))
31650 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31651 else
31652 regno = GP_ARG_RETURN;
31654 return gen_rtx_REG (mode, regno);
31657 /* Define how to find the value returned by a library function
31658 assuming the value has mode MODE. */
31660 rs6000_libcall_value (machine_mode mode)
31662 unsigned int regno;
31664 if (TARGET_32BIT && TARGET_POWERPC64 && mode == DImode)
31666 /* Long long return value need be split in -mpowerpc64, 32bit ABI. */
31667 return gen_rtx_PARALLEL (DImode,
31668 gen_rtvec (2,
31669 gen_rtx_EXPR_LIST (VOIDmode,
31670 gen_rtx_REG (SImode, GP_ARG_RETURN),
31671 const0_rtx),
31672 gen_rtx_EXPR_LIST (VOIDmode,
31673 gen_rtx_REG (SImode,
31674 GP_ARG_RETURN + 1),
31675 GEN_INT (4))));
31678 if (DECIMAL_FLOAT_MODE_P (mode) && TARGET_HARD_FLOAT && TARGET_FPRS)
31679 /* _Decimal128 must use an even/odd register pair. */
31680 regno = (mode == TDmode) ? FP_ARG_RETURN + 1 : FP_ARG_RETURN;
31681 else if (SCALAR_FLOAT_MODE_P (mode)
31682 && TARGET_HARD_FLOAT && TARGET_FPRS
31683 && ((TARGET_SINGLE_FLOAT && mode == SFmode) || TARGET_DOUBLE_FLOAT))
31684 regno = FP_ARG_RETURN;
31685 /* VSX is a superset of Altivec and adds V2DImode/V2DFmode. Since the same
31686 return register is used in both cases, and we won't see V2DImode/V2DFmode
31687 for pure altivec, combine the two cases. */
31688 else if (ALTIVEC_OR_VSX_VECTOR_MODE (mode)
31689 && TARGET_ALTIVEC && TARGET_ALTIVEC_ABI)
31690 regno = ALTIVEC_ARG_RETURN;
31691 else if (COMPLEX_MODE_P (mode) && targetm.calls.split_complex_arg)
31692 return rs6000_complex_function_value (mode);
31693 else if (TARGET_E500_DOUBLE && TARGET_HARD_FLOAT
31694 && (mode == DFmode || mode == DCmode
31695 || mode == TFmode || mode == TCmode))
31696 return spe_build_register_parallel (mode, GP_ARG_RETURN);
31697 else
31698 regno = GP_ARG_RETURN;
31700 return gen_rtx_REG (mode, regno);
31704 /* Return true if we use LRA instead of reload pass. */
31705 static bool
31706 rs6000_lra_p (void)
31708 return rs6000_lra_flag;
31711 /* Given FROM and TO register numbers, say whether this elimination is allowed.
31712 Frame pointer elimination is automatically handled.
31714 For the RS/6000, if frame pointer elimination is being done, we would like
31715 to convert ap into fp, not sp.
31717 We need r30 if -mminimal-toc was specified, and there are constant pool
31718 references. */
31720 static bool
31721 rs6000_can_eliminate (const int from, const int to)
31723 return (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM
31724 ? ! frame_pointer_needed
31725 : from == RS6000_PIC_OFFSET_TABLE_REGNUM
31726 ? ! TARGET_MINIMAL_TOC || TARGET_NO_TOC || get_pool_size () == 0
31727 : true);
31730 /* Define the offset between two registers, FROM to be eliminated and its
31731 replacement TO, at the start of a routine. */
31732 HOST_WIDE_INT
31733 rs6000_initial_elimination_offset (int from, int to)
31735 rs6000_stack_t *info = rs6000_stack_info ();
31736 HOST_WIDE_INT offset;
31738 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31739 offset = info->push_p ? 0 : -info->total_size;
31740 else if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31742 offset = info->push_p ? 0 : -info->total_size;
31743 if (FRAME_GROWS_DOWNWARD)
31744 offset += info->fixed_size + info->vars_size + info->parm_size;
31746 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31747 offset = FRAME_GROWS_DOWNWARD
31748 ? info->fixed_size + info->vars_size + info->parm_size
31749 : 0;
31750 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
31751 offset = info->total_size;
31752 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
31753 offset = info->push_p ? info->total_size : 0;
31754 else if (from == RS6000_PIC_OFFSET_TABLE_REGNUM)
31755 offset = 0;
31756 else
31757 gcc_unreachable ();
31759 return offset;
31762 static rtx
31763 rs6000_dwarf_register_span (rtx reg)
31765 rtx parts[8];
31766 int i, words;
31767 unsigned regno = REGNO (reg);
31768 machine_mode mode = GET_MODE (reg);
31770 if (TARGET_SPE
31771 && regno < 32
31772 && (SPE_VECTOR_MODE (GET_MODE (reg))
31773 || (TARGET_E500_DOUBLE && FLOAT_MODE_P (mode)
31774 && mode != SFmode && mode != SDmode && mode != SCmode)))
31776 else
31777 return NULL_RTX;
31779 regno = REGNO (reg);
31781 /* The duality of the SPE register size wreaks all kinds of havoc.
31782 This is a way of distinguishing r0 in 32-bits from r0 in
31783 64-bits. */
31784 words = (GET_MODE_SIZE (mode) + UNITS_PER_FP_WORD - 1) / UNITS_PER_FP_WORD;
31785 gcc_assert (words <= 4);
31786 for (i = 0; i < words; i++, regno++)
31788 if (BYTES_BIG_ENDIAN)
31790 parts[2 * i] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31791 parts[2 * i + 1] = gen_rtx_REG (SImode, regno);
31793 else
31795 parts[2 * i] = gen_rtx_REG (SImode, regno);
31796 parts[2 * i + 1] = gen_rtx_REG (SImode, regno + FIRST_SPE_HIGH_REGNO);
31800 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (words * 2, parts));
31803 /* Fill in sizes for SPE register high parts in table used by unwinder. */
31805 static void
31806 rs6000_init_dwarf_reg_sizes_extra (tree address)
31808 if (TARGET_SPE)
31810 int i;
31811 machine_mode mode = TYPE_MODE (char_type_node);
31812 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31813 rtx mem = gen_rtx_MEM (BLKmode, addr);
31814 rtx value = gen_int_mode (4, mode);
31816 for (i = FIRST_SPE_HIGH_REGNO; i < LAST_SPE_HIGH_REGNO+1; i++)
31818 int column = DWARF_REG_TO_UNWIND_COLUMN
31819 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31820 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31822 emit_move_insn (adjust_address (mem, mode, offset), value);
31826 if (TARGET_MACHO && ! TARGET_ALTIVEC)
31828 int i;
31829 machine_mode mode = TYPE_MODE (char_type_node);
31830 rtx addr = expand_expr (address, NULL_RTX, VOIDmode, EXPAND_NORMAL);
31831 rtx mem = gen_rtx_MEM (BLKmode, addr);
31832 rtx value = gen_int_mode (16, mode);
31834 /* On Darwin, libgcc may be built to run on both G3 and G4/5.
31835 The unwinder still needs to know the size of Altivec registers. */
31837 for (i = FIRST_ALTIVEC_REGNO; i < LAST_ALTIVEC_REGNO+1; i++)
31839 int column = DWARF_REG_TO_UNWIND_COLUMN
31840 (DWARF2_FRAME_REG_OUT (DWARF_FRAME_REGNUM (i), true));
31841 HOST_WIDE_INT offset = column * GET_MODE_SIZE (mode);
31843 emit_move_insn (adjust_address (mem, mode, offset), value);
31848 /* Map internal gcc register numbers to debug format register numbers.
31849 FORMAT specifies the type of debug register number to use:
31850 0 -- debug information, except for frame-related sections
31851 1 -- DWARF .debug_frame section
31852 2 -- DWARF .eh_frame section */
31854 unsigned int
31855 rs6000_dbx_register_number (unsigned int regno, unsigned int format)
31857 /* We never use the GCC internal number for SPE high registers.
31858 Those are mapped to the 1200..1231 range for all debug formats. */
31859 if (SPE_HIGH_REGNO_P (regno))
31860 return regno - FIRST_SPE_HIGH_REGNO + 1200;
31862 /* Except for the above, we use the internal number for non-DWARF
31863 debug information, and also for .eh_frame. */
31864 if ((format == 0 && write_symbols != DWARF2_DEBUG) || format == 2)
31865 return regno;
31867 /* On some platforms, we use the standard DWARF register
31868 numbering for .debug_info and .debug_frame. */
31869 #ifdef RS6000_USE_DWARF_NUMBERING
31870 if (regno <= 63)
31871 return regno;
31872 if (regno == LR_REGNO)
31873 return 108;
31874 if (regno == CTR_REGNO)
31875 return 109;
31876 /* Special handling for CR for .debug_frame: rs6000_emit_prologue has
31877 translated any combination of CR2, CR3, CR4 saves to a save of CR2.
31878 The actual code emitted saves the whole of CR, so we map CR2_REGNO
31879 to the DWARF reg for CR. */
31880 if (format == 1 && regno == CR2_REGNO)
31881 return 64;
31882 if (CR_REGNO_P (regno))
31883 return regno - CR0_REGNO + 86;
31884 if (regno == CA_REGNO)
31885 return 101; /* XER */
31886 if (ALTIVEC_REGNO_P (regno))
31887 return regno - FIRST_ALTIVEC_REGNO + 1124;
31888 if (regno == VRSAVE_REGNO)
31889 return 356;
31890 if (regno == VSCR_REGNO)
31891 return 67;
31892 if (regno == SPE_ACC_REGNO)
31893 return 99;
31894 if (regno == SPEFSCR_REGNO)
31895 return 612;
31896 #endif
31897 return regno;
31900 /* target hook eh_return_filter_mode */
31901 static machine_mode
31902 rs6000_eh_return_filter_mode (void)
31904 return TARGET_32BIT ? SImode : word_mode;
31907 /* Target hook for scalar_mode_supported_p. */
31908 static bool
31909 rs6000_scalar_mode_supported_p (machine_mode mode)
31911 if (DECIMAL_FLOAT_MODE_P (mode))
31912 return default_decimal_float_supported_p ();
31913 else
31914 return default_scalar_mode_supported_p (mode);
31917 /* Target hook for vector_mode_supported_p. */
31918 static bool
31919 rs6000_vector_mode_supported_p (machine_mode mode)
31922 if (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (mode))
31923 return true;
31925 if (TARGET_SPE && SPE_VECTOR_MODE (mode))
31926 return true;
31928 else if (VECTOR_MEM_ALTIVEC_OR_VSX_P (mode))
31929 return true;
31931 else
31932 return false;
31935 /* Target hook for invalid_arg_for_unprototyped_fn. */
31936 static const char *
31937 invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
31939 return (!rs6000_darwin64_abi
31940 && typelist == 0
31941 && TREE_CODE (TREE_TYPE (val)) == VECTOR_TYPE
31942 && (funcdecl == NULL_TREE
31943 || (TREE_CODE (funcdecl) == FUNCTION_DECL
31944 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
31945 ? N_("AltiVec argument passed to unprototyped function")
31946 : NULL;
31949 /* For TARGET_SECURE_PLT 32-bit PIC code we can save PIC register
31950 setup by using __stack_chk_fail_local hidden function instead of
31951 calling __stack_chk_fail directly. Otherwise it is better to call
31952 __stack_chk_fail directly. */
31954 static tree ATTRIBUTE_UNUSED
31955 rs6000_stack_protect_fail (void)
31957 return (DEFAULT_ABI == ABI_V4 && TARGET_SECURE_PLT && flag_pic)
31958 ? default_hidden_stack_protect_fail ()
31959 : default_external_stack_protect_fail ();
31962 void
31963 rs6000_final_prescan_insn (rtx_insn *insn, rtx *operand ATTRIBUTE_UNUSED,
31964 int num_operands ATTRIBUTE_UNUSED)
31966 if (rs6000_warn_cell_microcode)
31968 const char *temp;
31969 int insn_code_number = recog_memoized (insn);
31970 location_t location = INSN_LOCATION (insn);
31972 /* Punt on insns we cannot recognize. */
31973 if (insn_code_number < 0)
31974 return;
31976 temp = get_insn_template (insn_code_number, insn);
31978 if (get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS)
31979 warning_at (location, OPT_mwarn_cell_microcode,
31980 "emitting microcode insn %s\t[%s] #%d",
31981 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31982 else if (get_attr_cell_micro (insn) == CELL_MICRO_CONDITIONAL)
31983 warning_at (location, OPT_mwarn_cell_microcode,
31984 "emitting conditional microcode insn %s\t[%s] #%d",
31985 temp, insn_data[INSN_CODE (insn)].name, INSN_UID (insn));
31989 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
31991 #if TARGET_ELF
31992 static unsigned HOST_WIDE_INT
31993 rs6000_asan_shadow_offset (void)
31995 return (unsigned HOST_WIDE_INT) 1 << (TARGET_64BIT ? 41 : 29);
31997 #endif
31999 /* Mask options that we want to support inside of attribute((target)) and
32000 #pragma GCC target operations. Note, we do not include things like
32001 64/32-bit, endianess, hard/soft floating point, etc. that would have
32002 different calling sequences. */
32004 struct rs6000_opt_mask {
32005 const char *name; /* option name */
32006 HOST_WIDE_INT mask; /* mask to set */
32007 bool invert; /* invert sense of mask */
32008 bool valid_target; /* option is a target option */
32011 static struct rs6000_opt_mask const rs6000_opt_masks[] =
32013 { "altivec", OPTION_MASK_ALTIVEC, false, true },
32014 { "cmpb", OPTION_MASK_CMPB, false, true },
32015 { "crypto", OPTION_MASK_CRYPTO, false, true },
32016 { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
32017 { "dlmzb", OPTION_MASK_DLMZB, false, true },
32018 { "fprnd", OPTION_MASK_FPRND, false, true },
32019 { "hard-dfp", OPTION_MASK_DFP, false, true },
32020 { "htm", OPTION_MASK_HTM, false, true },
32021 { "isel", OPTION_MASK_ISEL, false, true },
32022 { "mfcrf", OPTION_MASK_MFCRF, false, true },
32023 { "mfpgpr", OPTION_MASK_MFPGPR, false, true },
32024 { "mulhw", OPTION_MASK_MULHW, false, true },
32025 { "multiple", OPTION_MASK_MULTIPLE, false, true },
32026 { "popcntb", OPTION_MASK_POPCNTB, false, true },
32027 { "popcntd", OPTION_MASK_POPCNTD, false, true },
32028 { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
32029 { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
32030 { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
32031 { "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
32032 { "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
32033 { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
32034 { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true },
32035 { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
32036 { "string", OPTION_MASK_STRING, false, true },
32037 { "update", OPTION_MASK_NO_UPDATE, true , true },
32038 { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false },
32039 { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false },
32040 { "vsx", OPTION_MASK_VSX, false, true },
32041 { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
32042 #ifdef OPTION_MASK_64BIT
32043 #if TARGET_AIX_OS
32044 { "aix64", OPTION_MASK_64BIT, false, false },
32045 { "aix32", OPTION_MASK_64BIT, true, false },
32046 #else
32047 { "64", OPTION_MASK_64BIT, false, false },
32048 { "32", OPTION_MASK_64BIT, true, false },
32049 #endif
32050 #endif
32051 #ifdef OPTION_MASK_EABI
32052 { "eabi", OPTION_MASK_EABI, false, false },
32053 #endif
32054 #ifdef OPTION_MASK_LITTLE_ENDIAN
32055 { "little", OPTION_MASK_LITTLE_ENDIAN, false, false },
32056 { "big", OPTION_MASK_LITTLE_ENDIAN, true, false },
32057 #endif
32058 #ifdef OPTION_MASK_RELOCATABLE
32059 { "relocatable", OPTION_MASK_RELOCATABLE, false, false },
32060 #endif
32061 #ifdef OPTION_MASK_STRICT_ALIGN
32062 { "strict-align", OPTION_MASK_STRICT_ALIGN, false, false },
32063 #endif
32064 { "soft-float", OPTION_MASK_SOFT_FLOAT, false, false },
32065 { "string", OPTION_MASK_STRING, false, false },
32068 /* Builtin mask mapping for printing the flags. */
32069 static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
32071 { "altivec", RS6000_BTM_ALTIVEC, false, false },
32072 { "vsx", RS6000_BTM_VSX, false, false },
32073 { "spe", RS6000_BTM_SPE, false, false },
32074 { "paired", RS6000_BTM_PAIRED, false, false },
32075 { "fre", RS6000_BTM_FRE, false, false },
32076 { "fres", RS6000_BTM_FRES, false, false },
32077 { "frsqrte", RS6000_BTM_FRSQRTE, false, false },
32078 { "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
32079 { "popcntd", RS6000_BTM_POPCNTD, false, false },
32080 { "cell", RS6000_BTM_CELL, false, false },
32081 { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
32082 { "crypto", RS6000_BTM_CRYPTO, false, false },
32083 { "htm", RS6000_BTM_HTM, false, false },
32084 { "hard-dfp", RS6000_BTM_DFP, false, false },
32085 { "hard-float", RS6000_BTM_HARD_FLOAT, false, false },
32086 { "long-double-128", RS6000_BTM_LDBL128, false, false },
32089 /* Option variables that we want to support inside attribute((target)) and
32090 #pragma GCC target operations. */
32092 struct rs6000_opt_var {
32093 const char *name; /* option name */
32094 size_t global_offset; /* offset of the option in global_options. */
32095 size_t target_offset; /* offset of the option in target optiosn. */
32098 static struct rs6000_opt_var const rs6000_opt_vars[] =
32100 { "friz",
32101 offsetof (struct gcc_options, x_TARGET_FRIZ),
32102 offsetof (struct cl_target_option, x_TARGET_FRIZ), },
32103 { "avoid-indexed-addresses",
32104 offsetof (struct gcc_options, x_TARGET_AVOID_XFORM),
32105 offsetof (struct cl_target_option, x_TARGET_AVOID_XFORM) },
32106 { "paired",
32107 offsetof (struct gcc_options, x_rs6000_paired_float),
32108 offsetof (struct cl_target_option, x_rs6000_paired_float), },
32109 { "longcall",
32110 offsetof (struct gcc_options, x_rs6000_default_long_calls),
32111 offsetof (struct cl_target_option, x_rs6000_default_long_calls), },
32114 /* Inner function to handle attribute((target("..."))) and #pragma GCC target
32115 parsing. Return true if there were no errors. */
32117 static bool
32118 rs6000_inner_target_options (tree args, bool attr_p)
32120 bool ret = true;
32122 if (args == NULL_TREE)
32125 else if (TREE_CODE (args) == STRING_CST)
32127 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32128 char *q;
32130 while ((q = strtok (p, ",")) != NULL)
32132 bool error_p = false;
32133 bool not_valid_p = false;
32134 const char *cpu_opt = NULL;
32136 p = NULL;
32137 if (strncmp (q, "cpu=", 4) == 0)
32139 int cpu_index = rs6000_cpu_name_lookup (q+4);
32140 if (cpu_index >= 0)
32141 rs6000_cpu_index = cpu_index;
32142 else
32144 error_p = true;
32145 cpu_opt = q+4;
32148 else if (strncmp (q, "tune=", 5) == 0)
32150 int tune_index = rs6000_cpu_name_lookup (q+5);
32151 if (tune_index >= 0)
32152 rs6000_tune_index = tune_index;
32153 else
32155 error_p = true;
32156 cpu_opt = q+5;
32159 else
32161 size_t i;
32162 bool invert = false;
32163 char *r = q;
32165 error_p = true;
32166 if (strncmp (r, "no-", 3) == 0)
32168 invert = true;
32169 r += 3;
32172 for (i = 0; i < ARRAY_SIZE (rs6000_opt_masks); i++)
32173 if (strcmp (r, rs6000_opt_masks[i].name) == 0)
32175 HOST_WIDE_INT mask = rs6000_opt_masks[i].mask;
32177 if (!rs6000_opt_masks[i].valid_target)
32178 not_valid_p = true;
32179 else
32181 error_p = false;
32182 rs6000_isa_flags_explicit |= mask;
32184 /* VSX needs altivec, so -mvsx automagically sets
32185 altivec. */
32186 if (mask == OPTION_MASK_VSX && !invert)
32187 mask |= OPTION_MASK_ALTIVEC;
32189 if (rs6000_opt_masks[i].invert)
32190 invert = !invert;
32192 if (invert)
32193 rs6000_isa_flags &= ~mask;
32194 else
32195 rs6000_isa_flags |= mask;
32197 break;
32200 if (error_p && !not_valid_p)
32202 for (i = 0; i < ARRAY_SIZE (rs6000_opt_vars); i++)
32203 if (strcmp (r, rs6000_opt_vars[i].name) == 0)
32205 size_t j = rs6000_opt_vars[i].global_offset;
32206 *((int *) ((char *)&global_options + j)) = !invert;
32207 error_p = false;
32208 break;
32213 if (error_p)
32215 const char *eprefix, *esuffix;
32217 ret = false;
32218 if (attr_p)
32220 eprefix = "__attribute__((__target__(";
32221 esuffix = ")))";
32223 else
32225 eprefix = "#pragma GCC target ";
32226 esuffix = "";
32229 if (cpu_opt)
32230 error ("invalid cpu \"%s\" for %s\"%s\"%s", cpu_opt, eprefix,
32231 q, esuffix);
32232 else if (not_valid_p)
32233 error ("%s\"%s\"%s is not allowed", eprefix, q, esuffix);
32234 else
32235 error ("%s\"%s\"%s is invalid", eprefix, q, esuffix);
32240 else if (TREE_CODE (args) == TREE_LIST)
32244 tree value = TREE_VALUE (args);
32245 if (value)
32247 bool ret2 = rs6000_inner_target_options (value, attr_p);
32248 if (!ret2)
32249 ret = false;
32251 args = TREE_CHAIN (args);
32253 while (args != NULL_TREE);
32256 else
32257 gcc_unreachable ();
32259 return ret;
32262 /* Print out the target options as a list for -mdebug=target. */
32264 static void
32265 rs6000_debug_target_options (tree args, const char *prefix)
32267 if (args == NULL_TREE)
32268 fprintf (stderr, "%s<NULL>", prefix);
32270 else if (TREE_CODE (args) == STRING_CST)
32272 char *p = ASTRDUP (TREE_STRING_POINTER (args));
32273 char *q;
32275 while ((q = strtok (p, ",")) != NULL)
32277 p = NULL;
32278 fprintf (stderr, "%s\"%s\"", prefix, q);
32279 prefix = ", ";
32283 else if (TREE_CODE (args) == TREE_LIST)
32287 tree value = TREE_VALUE (args);
32288 if (value)
32290 rs6000_debug_target_options (value, prefix);
32291 prefix = ", ";
32293 args = TREE_CHAIN (args);
32295 while (args != NULL_TREE);
32298 else
32299 gcc_unreachable ();
32301 return;
32305 /* Hook to validate attribute((target("..."))). */
32307 static bool
32308 rs6000_valid_attribute_p (tree fndecl,
32309 tree ARG_UNUSED (name),
32310 tree args,
32311 int flags)
32313 struct cl_target_option cur_target;
32314 bool ret;
32315 tree old_optimize = build_optimization_node (&global_options);
32316 tree new_target, new_optimize;
32317 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32319 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
32321 if (TARGET_DEBUG_TARGET)
32323 tree tname = DECL_NAME (fndecl);
32324 fprintf (stderr, "\n==================== rs6000_valid_attribute_p:\n");
32325 if (tname)
32326 fprintf (stderr, "function: %.*s\n",
32327 (int) IDENTIFIER_LENGTH (tname),
32328 IDENTIFIER_POINTER (tname));
32329 else
32330 fprintf (stderr, "function: unknown\n");
32332 fprintf (stderr, "args:");
32333 rs6000_debug_target_options (args, " ");
32334 fprintf (stderr, "\n");
32336 if (flags)
32337 fprintf (stderr, "flags: 0x%x\n", flags);
32339 fprintf (stderr, "--------------------\n");
32342 old_optimize = build_optimization_node (&global_options);
32343 func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
32345 /* If the function changed the optimization levels as well as setting target
32346 options, start with the optimizations specified. */
32347 if (func_optimize && func_optimize != old_optimize)
32348 cl_optimization_restore (&global_options,
32349 TREE_OPTIMIZATION (func_optimize));
32351 /* The target attributes may also change some optimization flags, so update
32352 the optimization options if necessary. */
32353 cl_target_option_save (&cur_target, &global_options);
32354 rs6000_cpu_index = rs6000_tune_index = -1;
32355 ret = rs6000_inner_target_options (args, true);
32357 /* Set up any additional state. */
32358 if (ret)
32360 ret = rs6000_option_override_internal (false);
32361 new_target = build_target_option_node (&global_options);
32363 else
32364 new_target = NULL;
32366 new_optimize = build_optimization_node (&global_options);
32368 if (!new_target)
32369 ret = false;
32371 else if (fndecl)
32373 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
32375 if (old_optimize != new_optimize)
32376 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
32379 cl_target_option_restore (&global_options, &cur_target);
32381 if (old_optimize != new_optimize)
32382 cl_optimization_restore (&global_options,
32383 TREE_OPTIMIZATION (old_optimize));
32385 return ret;
32389 /* Hook to validate the current #pragma GCC target and set the state, and
32390 update the macros based on what was changed. If ARGS is NULL, then
32391 POP_TARGET is used to reset the options. */
32393 bool
32394 rs6000_pragma_target_parse (tree args, tree pop_target)
32396 tree prev_tree = build_target_option_node (&global_options);
32397 tree cur_tree;
32398 struct cl_target_option *prev_opt, *cur_opt;
32399 HOST_WIDE_INT prev_flags, cur_flags, diff_flags;
32400 HOST_WIDE_INT prev_bumask, cur_bumask, diff_bumask;
32402 if (TARGET_DEBUG_TARGET)
32404 fprintf (stderr, "\n==================== rs6000_pragma_target_parse\n");
32405 fprintf (stderr, "args:");
32406 rs6000_debug_target_options (args, " ");
32407 fprintf (stderr, "\n");
32409 if (pop_target)
32411 fprintf (stderr, "pop_target:\n");
32412 debug_tree (pop_target);
32414 else
32415 fprintf (stderr, "pop_target: <NULL>\n");
32417 fprintf (stderr, "--------------------\n");
32420 if (! args)
32422 cur_tree = ((pop_target)
32423 ? pop_target
32424 : target_option_default_node);
32425 cl_target_option_restore (&global_options,
32426 TREE_TARGET_OPTION (cur_tree));
32428 else
32430 rs6000_cpu_index = rs6000_tune_index = -1;
32431 if (!rs6000_inner_target_options (args, false)
32432 || !rs6000_option_override_internal (false)
32433 || (cur_tree = build_target_option_node (&global_options))
32434 == NULL_TREE)
32436 if (TARGET_DEBUG_BUILTIN || TARGET_DEBUG_TARGET)
32437 fprintf (stderr, "invalid pragma\n");
32439 return false;
32443 target_option_current_node = cur_tree;
32445 /* If we have the preprocessor linked in (i.e. C or C++ languages), possibly
32446 change the macros that are defined. */
32447 if (rs6000_target_modify_macros_ptr)
32449 prev_opt = TREE_TARGET_OPTION (prev_tree);
32450 prev_bumask = prev_opt->x_rs6000_builtin_mask;
32451 prev_flags = prev_opt->x_rs6000_isa_flags;
32453 cur_opt = TREE_TARGET_OPTION (cur_tree);
32454 cur_flags = cur_opt->x_rs6000_isa_flags;
32455 cur_bumask = cur_opt->x_rs6000_builtin_mask;
32457 diff_bumask = (prev_bumask ^ cur_bumask);
32458 diff_flags = (prev_flags ^ cur_flags);
32460 if ((diff_flags != 0) || (diff_bumask != 0))
32462 /* Delete old macros. */
32463 rs6000_target_modify_macros_ptr (false,
32464 prev_flags & diff_flags,
32465 prev_bumask & diff_bumask);
32467 /* Define new macros. */
32468 rs6000_target_modify_macros_ptr (true,
32469 cur_flags & diff_flags,
32470 cur_bumask & diff_bumask);
32474 return true;
32478 /* Remember the last target of rs6000_set_current_function. */
32479 static GTY(()) tree rs6000_previous_fndecl;
32481 /* Establish appropriate back-end context for processing the function
32482 FNDECL. The argument might be NULL to indicate processing at top
32483 level, outside of any function scope. */
32484 static void
32485 rs6000_set_current_function (tree fndecl)
32487 tree old_tree = (rs6000_previous_fndecl
32488 ? DECL_FUNCTION_SPECIFIC_TARGET (rs6000_previous_fndecl)
32489 : NULL_TREE);
32491 tree new_tree = (fndecl
32492 ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
32493 : NULL_TREE);
32495 if (TARGET_DEBUG_TARGET)
32497 bool print_final = false;
32498 fprintf (stderr, "\n==================== rs6000_set_current_function");
32500 if (fndecl)
32501 fprintf (stderr, ", fndecl %s (%p)",
32502 (DECL_NAME (fndecl)
32503 ? IDENTIFIER_POINTER (DECL_NAME (fndecl))
32504 : "<unknown>"), (void *)fndecl);
32506 if (rs6000_previous_fndecl)
32507 fprintf (stderr, ", prev_fndecl (%p)", (void *)rs6000_previous_fndecl);
32509 fprintf (stderr, "\n");
32510 if (new_tree)
32512 fprintf (stderr, "\nnew fndecl target specific options:\n");
32513 debug_tree (new_tree);
32514 print_final = true;
32517 if (old_tree)
32519 fprintf (stderr, "\nold fndecl target specific options:\n");
32520 debug_tree (old_tree);
32521 print_final = true;
32524 if (print_final)
32525 fprintf (stderr, "--------------------\n");
32528 /* Only change the context if the function changes. This hook is called
32529 several times in the course of compiling a function, and we don't want to
32530 slow things down too much or call target_reinit when it isn't safe. */
32531 if (fndecl && fndecl != rs6000_previous_fndecl)
32533 rs6000_previous_fndecl = fndecl;
32534 if (old_tree == new_tree)
32537 else if (new_tree)
32539 cl_target_option_restore (&global_options,
32540 TREE_TARGET_OPTION (new_tree));
32541 if (TREE_TARGET_GLOBALS (new_tree))
32542 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32543 else
32544 TREE_TARGET_GLOBALS (new_tree)
32545 = save_target_globals_default_opts ();
32548 else if (old_tree)
32550 new_tree = target_option_current_node;
32551 cl_target_option_restore (&global_options,
32552 TREE_TARGET_OPTION (new_tree));
32553 if (TREE_TARGET_GLOBALS (new_tree))
32554 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
32555 else if (new_tree == target_option_default_node)
32556 restore_target_globals (&default_target_globals);
32557 else
32558 TREE_TARGET_GLOBALS (new_tree)
32559 = save_target_globals_default_opts ();
32565 /* Save the current options */
32567 static void
32568 rs6000_function_specific_save (struct cl_target_option *ptr,
32569 struct gcc_options *opts)
32571 ptr->x_rs6000_isa_flags = opts->x_rs6000_isa_flags;
32572 ptr->x_rs6000_isa_flags_explicit = opts->x_rs6000_isa_flags_explicit;
32575 /* Restore the current options */
32577 static void
32578 rs6000_function_specific_restore (struct gcc_options *opts,
32579 struct cl_target_option *ptr)
32582 opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
32583 opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
32584 (void) rs6000_option_override_internal (false);
32587 /* Print the current options */
32589 static void
32590 rs6000_function_specific_print (FILE *file, int indent,
32591 struct cl_target_option *ptr)
32593 rs6000_print_isa_options (file, indent, "Isa options set",
32594 ptr->x_rs6000_isa_flags);
32596 rs6000_print_isa_options (file, indent, "Isa options explicit",
32597 ptr->x_rs6000_isa_flags_explicit);
32600 /* Helper function to print the current isa or misc options on a line. */
32602 static void
32603 rs6000_print_options_internal (FILE *file,
32604 int indent,
32605 const char *string,
32606 HOST_WIDE_INT flags,
32607 const char *prefix,
32608 const struct rs6000_opt_mask *opts,
32609 size_t num_elements)
32611 size_t i;
32612 size_t start_column = 0;
32613 size_t cur_column;
32614 size_t max_column = 76;
32615 const char *comma = "";
32617 if (indent)
32618 start_column += fprintf (file, "%*s", indent, "");
32620 if (!flags)
32622 fprintf (stderr, DEBUG_FMT_S, string, "<none>");
32623 return;
32626 start_column += fprintf (stderr, DEBUG_FMT_WX, string, flags);
32628 /* Print the various mask options. */
32629 cur_column = start_column;
32630 for (i = 0; i < num_elements; i++)
32632 if ((flags & opts[i].mask) != 0)
32634 const char *no_str = rs6000_opt_masks[i].invert ? "no-" : "";
32635 size_t len = (strlen (comma)
32636 + strlen (prefix)
32637 + strlen (no_str)
32638 + strlen (rs6000_opt_masks[i].name));
32640 cur_column += len;
32641 if (cur_column > max_column)
32643 fprintf (stderr, ", \\\n%*s", (int)start_column, "");
32644 cur_column = start_column + len;
32645 comma = "";
32648 fprintf (file, "%s%s%s%s", comma, prefix, no_str,
32649 rs6000_opt_masks[i].name);
32650 flags &= ~ opts[i].mask;
32651 comma = ", ";
32655 fputs ("\n", file);
32658 /* Helper function to print the current isa options on a line. */
32660 static void
32661 rs6000_print_isa_options (FILE *file, int indent, const char *string,
32662 HOST_WIDE_INT flags)
32664 rs6000_print_options_internal (file, indent, string, flags, "-m",
32665 &rs6000_opt_masks[0],
32666 ARRAY_SIZE (rs6000_opt_masks));
32669 static void
32670 rs6000_print_builtin_options (FILE *file, int indent, const char *string,
32671 HOST_WIDE_INT flags)
32673 rs6000_print_options_internal (file, indent, string, flags, "",
32674 &rs6000_builtin_mask_names[0],
32675 ARRAY_SIZE (rs6000_builtin_mask_names));
32679 /* Hook to determine if one function can safely inline another. */
32681 static bool
32682 rs6000_can_inline_p (tree caller, tree callee)
32684 bool ret = false;
32685 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
32686 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
32688 /* If callee has no option attributes, then it is ok to inline. */
32689 if (!callee_tree)
32690 ret = true;
32692 /* If caller has no option attributes, but callee does then it is not ok to
32693 inline. */
32694 else if (!caller_tree)
32695 ret = false;
32697 else
32699 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
32700 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
32702 /* Callee's options should a subset of the caller's, i.e. a vsx function
32703 can inline an altivec function but a non-vsx function can't inline a
32704 vsx function. */
32705 if ((caller_opts->x_rs6000_isa_flags & callee_opts->x_rs6000_isa_flags)
32706 == callee_opts->x_rs6000_isa_flags)
32707 ret = true;
32710 if (TARGET_DEBUG_TARGET)
32711 fprintf (stderr, "rs6000_can_inline_p:, caller %s, callee %s, %s inline\n",
32712 (DECL_NAME (caller)
32713 ? IDENTIFIER_POINTER (DECL_NAME (caller))
32714 : "<unknown>"),
32715 (DECL_NAME (callee)
32716 ? IDENTIFIER_POINTER (DECL_NAME (callee))
32717 : "<unknown>"),
32718 (ret ? "can" : "cannot"));
32720 return ret;
32723 /* Allocate a stack temp and fixup the address so it meets the particular
32724 memory requirements (either offetable or REG+REG addressing). */
32727 rs6000_allocate_stack_temp (machine_mode mode,
32728 bool offsettable_p,
32729 bool reg_reg_p)
32731 rtx stack = assign_stack_temp (mode, GET_MODE_SIZE (mode));
32732 rtx addr = XEXP (stack, 0);
32733 int strict_p = (reload_in_progress || reload_completed);
32735 if (!legitimate_indirect_address_p (addr, strict_p))
32737 if (offsettable_p
32738 && !rs6000_legitimate_offset_address_p (mode, addr, strict_p, true))
32739 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32741 else if (reg_reg_p && !legitimate_indexed_address_p (addr, strict_p))
32742 stack = replace_equiv_address (stack, copy_addr_to_reg (addr));
32745 return stack;
32748 /* Given a memory reference, if it is not a reg or reg+reg addressing, convert
32749 to such a form to deal with memory reference instructions like STFIWX that
32750 only take reg+reg addressing. */
32753 rs6000_address_for_fpconvert (rtx x)
32755 int strict_p = (reload_in_progress || reload_completed);
32756 rtx addr;
32758 gcc_assert (MEM_P (x));
32759 addr = XEXP (x, 0);
32760 if (! legitimate_indirect_address_p (addr, strict_p)
32761 && ! legitimate_indexed_address_p (addr, strict_p))
32763 if (GET_CODE (addr) == PRE_INC || GET_CODE (addr) == PRE_DEC)
32765 rtx reg = XEXP (addr, 0);
32766 HOST_WIDE_INT size = GET_MODE_SIZE (GET_MODE (x));
32767 rtx size_rtx = GEN_INT ((GET_CODE (addr) == PRE_DEC) ? -size : size);
32768 gcc_assert (REG_P (reg));
32769 emit_insn (gen_add3_insn (reg, reg, size_rtx));
32770 addr = reg;
32772 else if (GET_CODE (addr) == PRE_MODIFY)
32774 rtx reg = XEXP (addr, 0);
32775 rtx expr = XEXP (addr, 1);
32776 gcc_assert (REG_P (reg));
32777 gcc_assert (GET_CODE (expr) == PLUS);
32778 emit_insn (gen_add3_insn (reg, XEXP (expr, 0), XEXP (expr, 1)));
32779 addr = reg;
32782 x = replace_equiv_address (x, copy_addr_to_reg (addr));
32785 return x;
32788 /* Given a memory reference, if it is not in the form for altivec memory
32789 reference instructions (i.e. reg or reg+reg addressing with AND of -16),
32790 convert to the altivec format. */
32793 rs6000_address_for_altivec (rtx x)
32795 gcc_assert (MEM_P (x));
32796 if (!altivec_indexed_or_indirect_operand (x, GET_MODE (x)))
32798 rtx addr = XEXP (x, 0);
32799 int strict_p = (reload_in_progress || reload_completed);
32801 if (!legitimate_indexed_address_p (addr, strict_p)
32802 && !legitimate_indirect_address_p (addr, strict_p))
32803 addr = copy_to_mode_reg (Pmode, addr);
32805 addr = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
32806 x = change_address (x, GET_MODE (x), addr);
32809 return x;
32812 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
32814 On the RS/6000, all integer constants are acceptable, most won't be valid
32815 for particular insns, though. Only easy FP constants are acceptable. */
32817 static bool
32818 rs6000_legitimate_constant_p (machine_mode mode, rtx x)
32820 if (TARGET_ELF && tls_referenced_p (x))
32821 return false;
32823 return ((GET_CODE (x) != CONST_DOUBLE && GET_CODE (x) != CONST_VECTOR)
32824 || GET_MODE (x) == VOIDmode
32825 || (TARGET_POWERPC64 && mode == DImode)
32826 || easy_fp_constant (x, mode)
32827 || easy_vector_constant (x, mode));
32832 /* Expand code to perform a call under the AIX or ELFv2 ABI. */
32834 void
32835 rs6000_call_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32837 rtx toc_reg = gen_rtx_REG (Pmode, TOC_REGNUM);
32838 rtx toc_load = NULL_RTX;
32839 rtx toc_restore = NULL_RTX;
32840 rtx func_addr;
32841 rtx abi_reg = NULL_RTX;
32842 rtx call[4];
32843 int n_call;
32844 rtx insn;
32846 /* Handle longcall attributes. */
32847 if (INTVAL (cookie) & CALL_LONG)
32848 func_desc = rs6000_longcall_ref (func_desc);
32850 /* Handle indirect calls. */
32851 if (GET_CODE (func_desc) != SYMBOL_REF
32852 || (DEFAULT_ABI == ABI_AIX && !SYMBOL_REF_FUNCTION_P (func_desc)))
32854 /* Save the TOC into its reserved slot before the call,
32855 and prepare to restore it after the call. */
32856 rtx stack_ptr = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
32857 rtx stack_toc_offset = GEN_INT (RS6000_TOC_SAVE_SLOT);
32858 rtx stack_toc_mem = gen_frame_mem (Pmode,
32859 gen_rtx_PLUS (Pmode, stack_ptr,
32860 stack_toc_offset));
32861 toc_restore = gen_rtx_SET (VOIDmode, toc_reg, stack_toc_mem);
32863 /* Can we optimize saving the TOC in the prologue or
32864 do we need to do it at every call? */
32865 if (TARGET_SAVE_TOC_INDIRECT && !cfun->calls_alloca)
32866 cfun->machine->save_toc_in_prologue = true;
32867 else
32869 MEM_VOLATILE_P (stack_toc_mem) = 1;
32870 emit_move_insn (stack_toc_mem, toc_reg);
32873 if (DEFAULT_ABI == ABI_ELFv2)
32875 /* A function pointer in the ELFv2 ABI is just a plain address, but
32876 the ABI requires it to be loaded into r12 before the call. */
32877 func_addr = gen_rtx_REG (Pmode, 12);
32878 emit_move_insn (func_addr, func_desc);
32879 abi_reg = func_addr;
32881 else
32883 /* A function pointer under AIX is a pointer to a data area whose
32884 first word contains the actual address of the function, whose
32885 second word contains a pointer to its TOC, and whose third word
32886 contains a value to place in the static chain register (r11).
32887 Note that if we load the static chain, our "trampoline" need
32888 not have any executable code. */
32890 /* Load up address of the actual function. */
32891 func_desc = force_reg (Pmode, func_desc);
32892 func_addr = gen_reg_rtx (Pmode);
32893 emit_move_insn (func_addr, gen_rtx_MEM (Pmode, func_desc));
32895 /* Prepare to load the TOC of the called function. Note that the
32896 TOC load must happen immediately before the actual call so
32897 that unwinding the TOC registers works correctly. See the
32898 comment in frob_update_context. */
32899 rtx func_toc_offset = GEN_INT (GET_MODE_SIZE (Pmode));
32900 rtx func_toc_mem = gen_rtx_MEM (Pmode,
32901 gen_rtx_PLUS (Pmode, func_desc,
32902 func_toc_offset));
32903 toc_load = gen_rtx_USE (VOIDmode, func_toc_mem);
32905 /* If we have a static chain, load it up. */
32906 if (TARGET_POINTERS_TO_NESTED_FUNCTIONS)
32908 rtx sc_reg = gen_rtx_REG (Pmode, STATIC_CHAIN_REGNUM);
32909 rtx func_sc_offset = GEN_INT (2 * GET_MODE_SIZE (Pmode));
32910 rtx func_sc_mem = gen_rtx_MEM (Pmode,
32911 gen_rtx_PLUS (Pmode, func_desc,
32912 func_sc_offset));
32913 emit_move_insn (sc_reg, func_sc_mem);
32914 abi_reg = sc_reg;
32918 else
32920 /* Direct calls use the TOC: for local calls, the callee will
32921 assume the TOC register is set; for non-local calls, the
32922 PLT stub needs the TOC register. */
32923 abi_reg = toc_reg;
32924 func_addr = func_desc;
32927 /* Create the call. */
32928 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_addr), flag);
32929 if (value != NULL_RTX)
32930 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32931 n_call = 1;
32933 if (toc_load)
32934 call[n_call++] = toc_load;
32935 if (toc_restore)
32936 call[n_call++] = toc_restore;
32938 call[n_call++] = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (Pmode, LR_REGNO));
32940 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (n_call, call));
32941 insn = emit_call_insn (insn);
32943 /* Mention all registers defined by the ABI to hold information
32944 as uses in CALL_INSN_FUNCTION_USAGE. */
32945 if (abi_reg)
32946 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), abi_reg);
32949 /* Expand code to perform a sibling call under the AIX or ELFv2 ABI. */
32951 void
32952 rs6000_sibcall_aix (rtx value, rtx func_desc, rtx flag, rtx cookie)
32954 rtx call[2];
32955 rtx insn;
32957 gcc_assert (INTVAL (cookie) == 0);
32959 /* Create the call. */
32960 call[0] = gen_rtx_CALL (VOIDmode, gen_rtx_MEM (SImode, func_desc), flag);
32961 if (value != NULL_RTX)
32962 call[0] = gen_rtx_SET (VOIDmode, value, call[0]);
32964 call[1] = simple_return_rtx;
32966 insn = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (2, call));
32967 insn = emit_call_insn (insn);
32969 /* Note use of the TOC register. */
32970 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, TOC_REGNUM));
32971 /* We need to also mark a use of the link register since the function we
32972 sibling-call to will use it to return to our caller. */
32973 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, LR_REGNO));
32976 /* Return whether we need to always update the saved TOC pointer when we update
32977 the stack pointer. */
32979 static bool
32980 rs6000_save_toc_in_prologue_p (void)
32982 return (cfun && cfun->machine && cfun->machine->save_toc_in_prologue);
32985 #ifdef HAVE_GAS_HIDDEN
32986 # define USE_HIDDEN_LINKONCE 1
32987 #else
32988 # define USE_HIDDEN_LINKONCE 0
32989 #endif
32991 /* Fills in the label name that should be used for a 476 link stack thunk. */
32993 void
32994 get_ppc476_thunk_name (char name[32])
32996 gcc_assert (TARGET_LINK_STACK);
32998 if (USE_HIDDEN_LINKONCE)
32999 sprintf (name, "__ppc476.get_thunk");
33000 else
33001 ASM_GENERATE_INTERNAL_LABEL (name, "LPPC476_", 0);
33004 /* This function emits the simple thunk routine that is used to preserve
33005 the link stack on the 476 cpu. */
33007 static void rs6000_code_end (void) ATTRIBUTE_UNUSED;
33008 static void
33009 rs6000_code_end (void)
33011 char name[32];
33012 tree decl;
33014 if (!TARGET_LINK_STACK)
33015 return;
33017 get_ppc476_thunk_name (name);
33019 decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL, get_identifier (name),
33020 build_function_type_list (void_type_node, NULL_TREE));
33021 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
33022 NULL_TREE, void_type_node);
33023 TREE_PUBLIC (decl) = 1;
33024 TREE_STATIC (decl) = 1;
33026 #if RS6000_WEAK
33027 if (USE_HIDDEN_LINKONCE)
33029 cgraph_node::create (decl)->set_comdat_group (DECL_ASSEMBLER_NAME (decl));
33030 targetm.asm_out.unique_section (decl, 0);
33031 switch_to_section (get_named_section (decl, NULL, 0));
33032 DECL_WEAK (decl) = 1;
33033 ASM_WEAKEN_DECL (asm_out_file, decl, name, 0);
33034 targetm.asm_out.globalize_label (asm_out_file, name);
33035 targetm.asm_out.assemble_visibility (decl, VISIBILITY_HIDDEN);
33036 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
33038 else
33039 #endif
33041 switch_to_section (text_section);
33042 ASM_OUTPUT_LABEL (asm_out_file, name);
33045 DECL_INITIAL (decl) = make_node (BLOCK);
33046 current_function_decl = decl;
33047 init_function_start (decl);
33048 first_function_block_is_cold = false;
33049 /* Make sure unwind info is emitted for the thunk if needed. */
33050 final_start_function (emit_barrier (), asm_out_file, 1);
33052 fputs ("\tblr\n", asm_out_file);
33054 final_end_function ();
33055 init_insn_lengths ();
33056 free_after_compilation (cfun);
33057 set_cfun (NULL);
33058 current_function_decl = NULL;
33061 /* Add r30 to hard reg set if the prologue sets it up and it is not
33062 pic_offset_table_rtx. */
33064 static void
33065 rs6000_set_up_by_prologue (struct hard_reg_set_container *set)
33067 if (!TARGET_SINGLE_PIC_BASE
33068 && TARGET_TOC
33069 && TARGET_MINIMAL_TOC
33070 && get_pool_size () != 0)
33071 add_to_hard_reg_set (&set->set, Pmode, RS6000_PIC_OFFSET_TABLE_REGNUM);
33075 /* Helper function for rs6000_split_logical to emit a logical instruction after
33076 spliting the operation to single GPR registers.
33078 DEST is the destination register.
33079 OP1 and OP2 are the input source registers.
33080 CODE is the base operation (AND, IOR, XOR, NOT).
33081 MODE is the machine mode.
33082 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33083 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33084 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33086 static void
33087 rs6000_split_logical_inner (rtx dest,
33088 rtx op1,
33089 rtx op2,
33090 enum rtx_code code,
33091 machine_mode mode,
33092 bool complement_final_p,
33093 bool complement_op1_p,
33094 bool complement_op2_p)
33096 rtx bool_rtx;
33098 /* Optimize AND of 0/0xffffffff and IOR/XOR of 0. */
33099 if (op2 && GET_CODE (op2) == CONST_INT
33100 && (mode == SImode || (mode == DImode && TARGET_POWERPC64))
33101 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33103 HOST_WIDE_INT mask = GET_MODE_MASK (mode);
33104 HOST_WIDE_INT value = INTVAL (op2) & mask;
33106 /* Optimize AND of 0 to just set 0. Optimize AND of -1 to be a move. */
33107 if (code == AND)
33109 if (value == 0)
33111 emit_insn (gen_rtx_SET (VOIDmode, dest, const0_rtx));
33112 return;
33115 else if (value == mask)
33117 if (!rtx_equal_p (dest, op1))
33118 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33119 return;
33123 /* Optimize IOR/XOR of 0 to be a simple move. Split large operations
33124 into separate ORI/ORIS or XORI/XORIS instrucitons. */
33125 else if (code == IOR || code == XOR)
33127 if (value == 0)
33129 if (!rtx_equal_p (dest, op1))
33130 emit_insn (gen_rtx_SET (VOIDmode, dest, op1));
33131 return;
33136 if (code == AND && mode == SImode
33137 && !complement_final_p && !complement_op1_p && !complement_op2_p)
33139 emit_insn (gen_andsi3 (dest, op1, op2));
33140 return;
33143 if (complement_op1_p)
33144 op1 = gen_rtx_NOT (mode, op1);
33146 if (complement_op2_p)
33147 op2 = gen_rtx_NOT (mode, op2);
33149 bool_rtx = ((code == NOT)
33150 ? gen_rtx_NOT (mode, op1)
33151 : gen_rtx_fmt_ee (code, mode, op1, op2));
33153 if (complement_final_p)
33154 bool_rtx = gen_rtx_NOT (mode, bool_rtx);
33156 emit_insn (gen_rtx_SET (VOIDmode, dest, bool_rtx));
33159 /* Split a DImode AND/IOR/XOR with a constant on a 32-bit system. These
33160 operations are split immediately during RTL generation to allow for more
33161 optimizations of the AND/IOR/XOR.
33163 OPERANDS is an array containing the destination and two input operands.
33164 CODE is the base operation (AND, IOR, XOR, NOT).
33165 MODE is the machine mode.
33166 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33167 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33168 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT.
33169 CLOBBER_REG is either NULL or a scratch register of type CC to allow
33170 formation of the AND instructions. */
33172 static void
33173 rs6000_split_logical_di (rtx operands[3],
33174 enum rtx_code code,
33175 bool complement_final_p,
33176 bool complement_op1_p,
33177 bool complement_op2_p)
33179 const HOST_WIDE_INT lower_32bits = HOST_WIDE_INT_C(0xffffffff);
33180 const HOST_WIDE_INT upper_32bits = ~ lower_32bits;
33181 const HOST_WIDE_INT sign_bit = HOST_WIDE_INT_C(0x80000000);
33182 enum hi_lo { hi = 0, lo = 1 };
33183 rtx op0_hi_lo[2], op1_hi_lo[2], op2_hi_lo[2];
33184 size_t i;
33186 op0_hi_lo[hi] = gen_highpart (SImode, operands[0]);
33187 op1_hi_lo[hi] = gen_highpart (SImode, operands[1]);
33188 op0_hi_lo[lo] = gen_lowpart (SImode, operands[0]);
33189 op1_hi_lo[lo] = gen_lowpart (SImode, operands[1]);
33191 if (code == NOT)
33192 op2_hi_lo[hi] = op2_hi_lo[lo] = NULL_RTX;
33193 else
33195 if (GET_CODE (operands[2]) != CONST_INT)
33197 op2_hi_lo[hi] = gen_highpart_mode (SImode, DImode, operands[2]);
33198 op2_hi_lo[lo] = gen_lowpart (SImode, operands[2]);
33200 else
33202 HOST_WIDE_INT value = INTVAL (operands[2]);
33203 HOST_WIDE_INT value_hi_lo[2];
33205 gcc_assert (!complement_final_p);
33206 gcc_assert (!complement_op1_p);
33207 gcc_assert (!complement_op2_p);
33209 value_hi_lo[hi] = value >> 32;
33210 value_hi_lo[lo] = value & lower_32bits;
33212 for (i = 0; i < 2; i++)
33214 HOST_WIDE_INT sub_value = value_hi_lo[i];
33216 if (sub_value & sign_bit)
33217 sub_value |= upper_32bits;
33219 op2_hi_lo[i] = GEN_INT (sub_value);
33221 /* If this is an AND instruction, check to see if we need to load
33222 the value in a register. */
33223 if (code == AND && sub_value != -1 && sub_value != 0
33224 && !and_operand (op2_hi_lo[i], SImode))
33225 op2_hi_lo[i] = force_reg (SImode, op2_hi_lo[i]);
33230 for (i = 0; i < 2; i++)
33232 /* Split large IOR/XOR operations. */
33233 if ((code == IOR || code == XOR)
33234 && GET_CODE (op2_hi_lo[i]) == CONST_INT
33235 && !complement_final_p
33236 && !complement_op1_p
33237 && !complement_op2_p
33238 && !logical_const_operand (op2_hi_lo[i], SImode))
33240 HOST_WIDE_INT value = INTVAL (op2_hi_lo[i]);
33241 HOST_WIDE_INT hi_16bits = value & HOST_WIDE_INT_C(0xffff0000);
33242 HOST_WIDE_INT lo_16bits = value & HOST_WIDE_INT_C(0x0000ffff);
33243 rtx tmp = gen_reg_rtx (SImode);
33245 /* Make sure the constant is sign extended. */
33246 if ((hi_16bits & sign_bit) != 0)
33247 hi_16bits |= upper_32bits;
33249 rs6000_split_logical_inner (tmp, op1_hi_lo[i], GEN_INT (hi_16bits),
33250 code, SImode, false, false, false);
33252 rs6000_split_logical_inner (op0_hi_lo[i], tmp, GEN_INT (lo_16bits),
33253 code, SImode, false, false, false);
33255 else
33256 rs6000_split_logical_inner (op0_hi_lo[i], op1_hi_lo[i], op2_hi_lo[i],
33257 code, SImode, complement_final_p,
33258 complement_op1_p, complement_op2_p);
33261 return;
33264 /* Split the insns that make up boolean operations operating on multiple GPR
33265 registers. The boolean MD patterns ensure that the inputs either are
33266 exactly the same as the output registers, or there is no overlap.
33268 OPERANDS is an array containing the destination and two input operands.
33269 CODE is the base operation (AND, IOR, XOR, NOT).
33270 If COMPLEMENT_FINAL_P is true, wrap the whole operation with NOT.
33271 If COMPLEMENT_OP1_P is true, wrap operand1 with NOT.
33272 If COMPLEMENT_OP2_P is true, wrap operand2 with NOT. */
33274 void
33275 rs6000_split_logical (rtx operands[3],
33276 enum rtx_code code,
33277 bool complement_final_p,
33278 bool complement_op1_p,
33279 bool complement_op2_p)
33281 machine_mode mode = GET_MODE (operands[0]);
33282 machine_mode sub_mode;
33283 rtx op0, op1, op2;
33284 int sub_size, regno0, regno1, nregs, i;
33286 /* If this is DImode, use the specialized version that can run before
33287 register allocation. */
33288 if (mode == DImode && !TARGET_POWERPC64)
33290 rs6000_split_logical_di (operands, code, complement_final_p,
33291 complement_op1_p, complement_op2_p);
33292 return;
33295 op0 = operands[0];
33296 op1 = operands[1];
33297 op2 = (code == NOT) ? NULL_RTX : operands[2];
33298 sub_mode = (TARGET_POWERPC64) ? DImode : SImode;
33299 sub_size = GET_MODE_SIZE (sub_mode);
33300 regno0 = REGNO (op0);
33301 regno1 = REGNO (op1);
33303 gcc_assert (reload_completed);
33304 gcc_assert (IN_RANGE (regno0, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33305 gcc_assert (IN_RANGE (regno1, FIRST_GPR_REGNO, LAST_GPR_REGNO));
33307 nregs = rs6000_hard_regno_nregs[(int)mode][regno0];
33308 gcc_assert (nregs > 1);
33310 if (op2 && REG_P (op2))
33311 gcc_assert (IN_RANGE (REGNO (op2), FIRST_GPR_REGNO, LAST_GPR_REGNO));
33313 for (i = 0; i < nregs; i++)
33315 int offset = i * sub_size;
33316 rtx sub_op0 = simplify_subreg (sub_mode, op0, mode, offset);
33317 rtx sub_op1 = simplify_subreg (sub_mode, op1, mode, offset);
33318 rtx sub_op2 = ((code == NOT)
33319 ? NULL_RTX
33320 : simplify_subreg (sub_mode, op2, mode, offset));
33322 rs6000_split_logical_inner (sub_op0, sub_op1, sub_op2, code, sub_mode,
33323 complement_final_p, complement_op1_p,
33324 complement_op2_p);
33327 return;
33331 /* Return true if the peephole2 can combine a load involving a combination of
33332 an addis instruction and a load with an offset that can be fused together on
33333 a power8. */
33335 bool
33336 fusion_gpr_load_p (rtx addis_reg, /* register set via addis. */
33337 rtx addis_value, /* addis value. */
33338 rtx target, /* target register that is loaded. */
33339 rtx mem) /* bottom part of the memory addr. */
33341 rtx addr;
33342 rtx base_reg;
33344 /* Validate arguments. */
33345 if (!base_reg_operand (addis_reg, GET_MODE (addis_reg)))
33346 return false;
33348 if (!base_reg_operand (target, GET_MODE (target)))
33349 return false;
33351 if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value)))
33352 return false;
33354 /* Allow sign/zero extension. */
33355 if (GET_CODE (mem) == ZERO_EXTEND
33356 || (GET_CODE (mem) == SIGN_EXTEND && TARGET_P8_FUSION_SIGN))
33357 mem = XEXP (mem, 0);
33359 if (!MEM_P (mem))
33360 return false;
33362 if (!fusion_gpr_mem_load (mem, GET_MODE (mem)))
33363 return false;
33365 addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */
33366 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33367 return false;
33369 /* Validate that the register used to load the high value is either the
33370 register being loaded, or we can safely replace its use.
33372 This function is only called from the peephole2 pass and we assume that
33373 there are 2 instructions in the peephole (addis and load), so we want to
33374 check if the target register was not used in the memory address and the
33375 register to hold the addis result is dead after the peephole. */
33376 if (REGNO (addis_reg) != REGNO (target))
33378 if (reg_mentioned_p (target, mem))
33379 return false;
33381 if (!peep2_reg_dead_p (2, addis_reg))
33382 return false;
33384 /* If the target register being loaded is the stack pointer, we must
33385 avoid loading any other value into it, even temporarily. */
33386 if (REG_P (target) && REGNO (target) == STACK_POINTER_REGNUM)
33387 return false;
33390 base_reg = XEXP (addr, 0);
33391 return REGNO (addis_reg) == REGNO (base_reg);
33394 /* During the peephole2 pass, adjust and expand the insns for a load fusion
33395 sequence. We adjust the addis register to use the target register. If the
33396 load sign extends, we adjust the code to do the zero extending load, and an
33397 explicit sign extension later since the fusion only covers zero extending
33398 loads.
33400 The operands are:
33401 operands[0] register set with addis (to be replaced with target)
33402 operands[1] value set via addis
33403 operands[2] target register being loaded
33404 operands[3] D-form memory reference using operands[0]. */
33406 void
33407 expand_fusion_gpr_load (rtx *operands)
33409 rtx addis_value = operands[1];
33410 rtx target = operands[2];
33411 rtx orig_mem = operands[3];
33412 rtx new_addr, new_mem, orig_addr, offset;
33413 enum rtx_code plus_or_lo_sum;
33414 machine_mode target_mode = GET_MODE (target);
33415 machine_mode extend_mode = target_mode;
33416 machine_mode ptr_mode = Pmode;
33417 enum rtx_code extend = UNKNOWN;
33419 if (GET_CODE (orig_mem) == ZERO_EXTEND
33420 || (TARGET_P8_FUSION_SIGN && GET_CODE (orig_mem) == SIGN_EXTEND))
33422 extend = GET_CODE (orig_mem);
33423 orig_mem = XEXP (orig_mem, 0);
33424 target_mode = GET_MODE (orig_mem);
33427 gcc_assert (MEM_P (orig_mem));
33429 orig_addr = XEXP (orig_mem, 0);
33430 plus_or_lo_sum = GET_CODE (orig_addr);
33431 gcc_assert (plus_or_lo_sum == PLUS || plus_or_lo_sum == LO_SUM);
33433 offset = XEXP (orig_addr, 1);
33434 new_addr = gen_rtx_fmt_ee (plus_or_lo_sum, ptr_mode, addis_value, offset);
33435 new_mem = replace_equiv_address_nv (orig_mem, new_addr, false);
33437 if (extend != UNKNOWN)
33438 new_mem = gen_rtx_fmt_e (ZERO_EXTEND, extend_mode, new_mem);
33440 new_mem = gen_rtx_UNSPEC (extend_mode, gen_rtvec (1, new_mem),
33441 UNSPEC_FUSION_GPR);
33442 emit_insn (gen_rtx_SET (VOIDmode, target, new_mem));
33444 if (extend == SIGN_EXTEND)
33446 int sub_off = ((BYTES_BIG_ENDIAN)
33447 ? GET_MODE_SIZE (extend_mode) - GET_MODE_SIZE (target_mode)
33448 : 0);
33449 rtx sign_reg
33450 = simplify_subreg (target_mode, target, extend_mode, sub_off);
33452 emit_insn (gen_rtx_SET (VOIDmode, target,
33453 gen_rtx_SIGN_EXTEND (extend_mode, sign_reg)));
33456 return;
33459 /* Return a string to fuse an addis instruction with a gpr load to the same
33460 register that we loaded up the addis instruction. The address that is used
33461 is the logical address that was formed during peephole2:
33462 (lo_sum (high) (low-part))
33464 The code is complicated, so we call output_asm_insn directly, and just
33465 return "". */
33467 const char *
33468 emit_fusion_gpr_load (rtx target, rtx mem)
33470 rtx addis_value;
33471 rtx fuse_ops[10];
33472 rtx addr;
33473 rtx load_offset;
33474 const char *addis_str = NULL;
33475 const char *load_str = NULL;
33476 const char *mode_name = NULL;
33477 char insn_template[80];
33478 machine_mode mode;
33479 const char *comment_str = ASM_COMMENT_START;
33481 if (GET_CODE (mem) == ZERO_EXTEND)
33482 mem = XEXP (mem, 0);
33484 gcc_assert (REG_P (target) && MEM_P (mem));
33486 if (*comment_str == ' ')
33487 comment_str++;
33489 addr = XEXP (mem, 0);
33490 if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM)
33491 gcc_unreachable ();
33493 addis_value = XEXP (addr, 0);
33494 load_offset = XEXP (addr, 1);
33496 /* Now emit the load instruction to the same register. */
33497 mode = GET_MODE (mem);
33498 switch (mode)
33500 case QImode:
33501 mode_name = "char";
33502 load_str = "lbz";
33503 break;
33505 case HImode:
33506 mode_name = "short";
33507 load_str = "lhz";
33508 break;
33510 case SImode:
33511 mode_name = "int";
33512 load_str = "lwz";
33513 break;
33515 case DImode:
33516 gcc_assert (TARGET_POWERPC64);
33517 mode_name = "long";
33518 load_str = "ld";
33519 break;
33521 default:
33522 gcc_unreachable ();
33525 /* Emit the addis instruction. */
33526 fuse_ops[0] = target;
33527 if (satisfies_constraint_L (addis_value))
33529 fuse_ops[1] = addis_value;
33530 addis_str = "lis %0,%v1";
33533 else if (GET_CODE (addis_value) == PLUS)
33535 rtx op0 = XEXP (addis_value, 0);
33536 rtx op1 = XEXP (addis_value, 1);
33538 if (REG_P (op0) && CONST_INT_P (op1)
33539 && satisfies_constraint_L (op1))
33541 fuse_ops[1] = op0;
33542 fuse_ops[2] = op1;
33543 addis_str = "addis %0,%1,%v2";
33547 else if (GET_CODE (addis_value) == HIGH)
33549 rtx value = XEXP (addis_value, 0);
33550 if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL)
33552 fuse_ops[1] = XVECEXP (value, 0, 0); /* symbol ref. */
33553 fuse_ops[2] = XVECEXP (value, 0, 1); /* TOC register. */
33554 if (TARGET_ELF)
33555 addis_str = "addis %0,%2,%1@toc@ha";
33557 else if (TARGET_XCOFF)
33558 addis_str = "addis %0,%1@u(%2)";
33560 else
33561 gcc_unreachable ();
33564 else if (GET_CODE (value) == PLUS)
33566 rtx op0 = XEXP (value, 0);
33567 rtx op1 = XEXP (value, 1);
33569 if (GET_CODE (op0) == UNSPEC
33570 && XINT (op0, 1) == UNSPEC_TOCREL
33571 && CONST_INT_P (op1))
33573 fuse_ops[1] = XVECEXP (op0, 0, 0); /* symbol ref. */
33574 fuse_ops[2] = XVECEXP (op0, 0, 1); /* TOC register. */
33575 fuse_ops[3] = op1;
33576 if (TARGET_ELF)
33577 addis_str = "addis %0,%2,%1+%3@toc@ha";
33579 else if (TARGET_XCOFF)
33580 addis_str = "addis %0,%1+%3@u(%2)";
33582 else
33583 gcc_unreachable ();
33587 else if (satisfies_constraint_L (value))
33589 fuse_ops[1] = value;
33590 addis_str = "lis %0,%v1";
33593 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value))
33595 fuse_ops[1] = value;
33596 addis_str = "lis %0,%1@ha";
33600 if (!addis_str)
33601 fatal_insn ("Could not generate addis value for fusion", addis_value);
33603 sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s", addis_str,
33604 comment_str, mode_name);
33605 output_asm_insn (insn_template, fuse_ops);
33607 /* Emit the D-form load instruction. */
33608 if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset))
33610 sprintf (insn_template, "%s %%0,%%1(%%0)", load_str);
33611 fuse_ops[1] = load_offset;
33612 output_asm_insn (insn_template, fuse_ops);
33615 else if (GET_CODE (load_offset) == UNSPEC
33616 && XINT (load_offset, 1) == UNSPEC_TOCREL)
33618 if (TARGET_ELF)
33619 sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str);
33621 else if (TARGET_XCOFF)
33622 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33624 else
33625 gcc_unreachable ();
33627 fuse_ops[1] = XVECEXP (load_offset, 0, 0);
33628 output_asm_insn (insn_template, fuse_ops);
33631 else if (GET_CODE (load_offset) == PLUS
33632 && GET_CODE (XEXP (load_offset, 0)) == UNSPEC
33633 && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL
33634 && CONST_INT_P (XEXP (load_offset, 1)))
33636 rtx tocrel_unspec = XEXP (load_offset, 0);
33637 if (TARGET_ELF)
33638 sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str);
33640 else if (TARGET_XCOFF)
33641 sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str);
33643 else
33644 gcc_unreachable ();
33646 fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0);
33647 fuse_ops[2] = XEXP (load_offset, 1);
33648 output_asm_insn (insn_template, fuse_ops);
33651 else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset))
33653 sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str);
33655 fuse_ops[1] = load_offset;
33656 output_asm_insn (insn_template, fuse_ops);
33659 else
33660 fatal_insn ("Unable to generate load offset for fusion", load_offset);
33662 return "";
33665 /* Analyze vector computations and remove unnecessary doubleword
33666 swaps (xxswapdi instructions). This pass is performed only
33667 for little-endian VSX code generation.
33669 For this specific case, loads and stores of 4x32 and 2x64 vectors
33670 are inefficient. These are implemented using the lvx2dx and
33671 stvx2dx instructions, which invert the order of doublewords in
33672 a vector register. Thus the code generation inserts an xxswapdi
33673 after each such load, and prior to each such store. (For spill
33674 code after register assignment, an additional xxswapdi is inserted
33675 following each store in order to return a hard register to its
33676 unpermuted value.)
33678 The extra xxswapdi instructions reduce performance. This can be
33679 particularly bad for vectorized code. The purpose of this pass
33680 is to reduce the number of xxswapdi instructions required for
33681 correctness.
33683 The primary insight is that much code that operates on vectors
33684 does not care about the relative order of elements in a register,
33685 so long as the correct memory order is preserved. If we have
33686 a computation where all input values are provided by lvxd2x/xxswapdi
33687 sequences, all outputs are stored using xxswapdi/stvxd2x sequences,
33688 and all intermediate computations are pure SIMD (independent of
33689 element order), then all the xxswapdi's associated with the loads
33690 and stores may be removed.
33692 This pass uses some of the infrastructure and logical ideas from
33693 the "web" pass in web.c. We create maximal webs of computations
33694 fitting the description above using union-find. Each such web is
33695 then optimized by removing its unnecessary xxswapdi instructions.
33697 The pass is placed prior to global optimization so that we can
33698 perform the optimization in the safest and simplest way possible;
33699 that is, by replacing each xxswapdi insn with a register copy insn.
33700 Subsequent forward propagation will remove copies where possible.
33702 There are some operations sensitive to element order for which we
33703 can still allow the operation, provided we modify those operations.
33704 These include CONST_VECTORs, for which we must swap the first and
33705 second halves of the constant vector; and SUBREGs, for which we
33706 must adjust the byte offset to account for the swapped doublewords.
33707 A remaining opportunity would be non-immediate-form splats, for
33708 which we should adjust the selected lane of the input. We should
33709 also make code generation adjustments for sum-across operations,
33710 since this is a common vectorizer reduction.
33712 Because we run prior to the first split, we can see loads and stores
33713 here that match *vsx_le_perm_{load,store}_<mode>. These are vanilla
33714 vector loads and stores that have not yet been split into a permuting
33715 load/store and a swap. (One way this can happen is with a builtin
33716 call to vec_vsx_{ld,st}.) We can handle these as well, but rather
33717 than deleting a swap, we convert the load/store into a permuting
33718 load/store (which effectively removes the swap). */
33720 /* Notes on Permutes
33722 We do not currently handle computations that contain permutes. There
33723 is a general transformation that can be performed correctly, but it
33724 may introduce more expensive code than it replaces. To handle these
33725 would require a cost model to determine when to perform the optimization.
33726 This commentary records how this could be done if desired.
33728 The most general permute is something like this (example for V16QI):
33730 (vec_select:V16QI (vec_concat:V32QI (op1:V16QI) (op2:V16QI))
33731 (parallel [(const_int a0) (const_int a1)
33733 (const_int a14) (const_int a15)]))
33735 where a0,...,a15 are in [0,31] and select elements from op1 and op2
33736 to produce in the result.
33738 Regardless of mode, we can convert the PARALLEL to a mask of 16
33739 byte-element selectors. Let's call this M, with M[i] representing
33740 the ith byte-element selector value. Then if we swap doublewords
33741 throughout the computation, we can get correct behavior by replacing
33742 M with M' as follows:
33744 { M[i+8]+8 : i < 8, M[i+8] in [0,7] U [16,23]
33745 M'[i] = { M[i+8]-8 : i < 8, M[i+8] in [8,15] U [24,31]
33746 { M[i-8]+8 : i >= 8, M[i-8] in [0,7] U [16,23]
33747 { M[i-8]-8 : i >= 8, M[i-8] in [8,15] U [24,31]
33749 This seems promising at first, since we are just replacing one mask
33750 with another. But certain masks are preferable to others. If M
33751 is a mask that matches a vmrghh pattern, for example, M' certainly
33752 will not. Instead of a single vmrghh, we would generate a load of
33753 M' and a vperm. So we would need to know how many xxswapd's we can
33754 remove as a result of this transformation to determine if it's
33755 profitable; and preferably the logic would need to be aware of all
33756 the special preferable masks.
33758 Another form of permute is an UNSPEC_VPERM, in which the mask is
33759 already in a register. In some cases, this mask may be a constant
33760 that we can discover with ud-chains, in which case the above
33761 transformation is ok. However, the common usage here is for the
33762 mask to be produced by an UNSPEC_LVSL, in which case the mask
33763 cannot be known at compile time. In such a case we would have to
33764 generate several instructions to compute M' as above at run time,
33765 and a cost model is needed again. */
33767 /* This is based on the union-find logic in web.c. web_entry_base is
33768 defined in df.h. */
33769 class swap_web_entry : public web_entry_base
33771 public:
33772 /* Pointer to the insn. */
33773 rtx_insn *insn;
33774 /* Set if insn contains a mention of a vector register. All other
33775 fields are undefined if this field is unset. */
33776 unsigned int is_relevant : 1;
33777 /* Set if insn is a load. */
33778 unsigned int is_load : 1;
33779 /* Set if insn is a store. */
33780 unsigned int is_store : 1;
33781 /* Set if insn is a doubleword swap. This can either be a register swap
33782 or a permuting load or store (test is_load and is_store for this). */
33783 unsigned int is_swap : 1;
33784 /* Set if the insn has a live-in use of a parameter register. */
33785 unsigned int is_live_in : 1;
33786 /* Set if the insn has a live-out def of a return register. */
33787 unsigned int is_live_out : 1;
33788 /* Set if the insn contains a subreg reference of a vector register. */
33789 unsigned int contains_subreg : 1;
33790 /* Set if the insn contains a 128-bit integer operand. */
33791 unsigned int is_128_int : 1;
33792 /* Set if this is a call-insn. */
33793 unsigned int is_call : 1;
33794 /* Set if this insn does not perform a vector operation for which
33795 element order matters, or if we know how to fix it up if it does.
33796 Undefined if is_swap is set. */
33797 unsigned int is_swappable : 1;
33798 /* A nonzero value indicates what kind of special handling for this
33799 insn is required if doublewords are swapped. Undefined if
33800 is_swappable is not set. */
33801 unsigned int special_handling : 3;
33802 /* Set if the web represented by this entry cannot be optimized. */
33803 unsigned int web_not_optimizable : 1;
33804 /* Set if this insn should be deleted. */
33805 unsigned int will_delete : 1;
33808 enum special_handling_values {
33809 SH_NONE = 0,
33810 SH_CONST_VECTOR,
33811 SH_SUBREG,
33812 SH_NOSWAP_LD,
33813 SH_NOSWAP_ST,
33814 SH_EXTRACT,
33815 SH_SPLAT
33818 /* Union INSN with all insns containing definitions that reach USE.
33819 Detect whether USE is live-in to the current function. */
33820 static void
33821 union_defs (swap_web_entry *insn_entry, rtx insn, df_ref use)
33823 struct df_link *link = DF_REF_CHAIN (use);
33825 if (!link)
33826 insn_entry[INSN_UID (insn)].is_live_in = 1;
33828 while (link)
33830 if (DF_REF_IS_ARTIFICIAL (link->ref))
33831 insn_entry[INSN_UID (insn)].is_live_in = 1;
33833 if (DF_REF_INSN_INFO (link->ref))
33835 rtx def_insn = DF_REF_INSN (link->ref);
33836 (void)unionfind_union (insn_entry + INSN_UID (insn),
33837 insn_entry + INSN_UID (def_insn));
33840 link = link->next;
33844 /* Union INSN with all insns containing uses reached from DEF.
33845 Detect whether DEF is live-out from the current function. */
33846 static void
33847 union_uses (swap_web_entry *insn_entry, rtx insn, df_ref def)
33849 struct df_link *link = DF_REF_CHAIN (def);
33851 if (!link)
33852 insn_entry[INSN_UID (insn)].is_live_out = 1;
33854 while (link)
33856 /* This could be an eh use or some other artificial use;
33857 we treat these all the same (killing the optimization). */
33858 if (DF_REF_IS_ARTIFICIAL (link->ref))
33859 insn_entry[INSN_UID (insn)].is_live_out = 1;
33861 if (DF_REF_INSN_INFO (link->ref))
33863 rtx use_insn = DF_REF_INSN (link->ref);
33864 (void)unionfind_union (insn_entry + INSN_UID (insn),
33865 insn_entry + INSN_UID (use_insn));
33868 link = link->next;
33872 /* Return 1 iff INSN is a load insn, including permuting loads that
33873 represent an lvxd2x instruction; else return 0. */
33874 static unsigned int
33875 insn_is_load_p (rtx insn)
33877 rtx body = PATTERN (insn);
33879 if (GET_CODE (body) == SET)
33881 if (GET_CODE (SET_SRC (body)) == MEM)
33882 return 1;
33884 if (GET_CODE (SET_SRC (body)) == VEC_SELECT
33885 && GET_CODE (XEXP (SET_SRC (body), 0)) == MEM)
33886 return 1;
33888 return 0;
33891 if (GET_CODE (body) != PARALLEL)
33892 return 0;
33894 rtx set = XVECEXP (body, 0, 0);
33896 if (GET_CODE (set) == SET && GET_CODE (SET_SRC (set)) == MEM)
33897 return 1;
33899 return 0;
33902 /* Return 1 iff INSN is a store insn, including permuting stores that
33903 represent an stvxd2x instruction; else return 0. */
33904 static unsigned int
33905 insn_is_store_p (rtx insn)
33907 rtx body = PATTERN (insn);
33908 if (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == MEM)
33909 return 1;
33910 if (GET_CODE (body) != PARALLEL)
33911 return 0;
33912 rtx set = XVECEXP (body, 0, 0);
33913 if (GET_CODE (set) == SET && GET_CODE (SET_DEST (set)) == MEM)
33914 return 1;
33915 return 0;
33918 /* Return 1 iff INSN swaps doublewords. This may be a reg-reg swap,
33919 a permuting load, or a permuting store. */
33920 static unsigned int
33921 insn_is_swap_p (rtx insn)
33923 rtx body = PATTERN (insn);
33924 if (GET_CODE (body) != SET)
33925 return 0;
33926 rtx rhs = SET_SRC (body);
33927 if (GET_CODE (rhs) != VEC_SELECT)
33928 return 0;
33929 rtx parallel = XEXP (rhs, 1);
33930 if (GET_CODE (parallel) != PARALLEL)
33931 return 0;
33932 unsigned int len = XVECLEN (parallel, 0);
33933 if (len != 2 && len != 4 && len != 8 && len != 16)
33934 return 0;
33935 for (unsigned int i = 0; i < len / 2; ++i)
33937 rtx op = XVECEXP (parallel, 0, i);
33938 if (GET_CODE (op) != CONST_INT || INTVAL (op) != len / 2 + i)
33939 return 0;
33941 for (unsigned int i = len / 2; i < len; ++i)
33943 rtx op = XVECEXP (parallel, 0, i);
33944 if (GET_CODE (op) != CONST_INT || INTVAL (op) != i - len / 2)
33945 return 0;
33947 return 1;
33950 /* Return 1 iff OP is an operand that will not be affected by having
33951 vector doublewords swapped in memory. */
33952 static unsigned int
33953 rtx_is_swappable_p (rtx op, unsigned int *special)
33955 enum rtx_code code = GET_CODE (op);
33956 int i, j;
33957 rtx parallel;
33959 switch (code)
33961 case LABEL_REF:
33962 case SYMBOL_REF:
33963 case CLOBBER:
33964 case REG:
33965 return 1;
33967 case VEC_CONCAT:
33968 case ASM_INPUT:
33969 case ASM_OPERANDS:
33970 return 0;
33972 case CONST_VECTOR:
33974 *special = SH_CONST_VECTOR;
33975 return 1;
33978 case VEC_DUPLICATE:
33979 /* Opportunity: If XEXP (op, 0) has the same mode as the result,
33980 and XEXP (op, 1) is a PARALLEL with a single QImode const int,
33981 it represents a vector splat for which we can do special
33982 handling. */
33983 if (GET_CODE (XEXP (op, 0)) == CONST_INT)
33984 return 1;
33985 else if (GET_CODE (XEXP (op, 0)) == REG
33986 && GET_MODE_INNER (GET_MODE (op)) == GET_MODE (XEXP (op, 0)))
33987 /* This catches V2DF and V2DI splat, at a minimum. */
33988 return 1;
33989 else if (GET_CODE (XEXP (op, 0)) == VEC_SELECT)
33990 /* If the duplicated item is from a select, defer to the select
33991 processing to see if we can change the lane for the splat. */
33992 return rtx_is_swappable_p (XEXP (op, 0), special);
33993 else
33994 return 0;
33996 case VEC_SELECT:
33997 /* A vec_extract operation is ok if we change the lane. */
33998 if (GET_CODE (XEXP (op, 0)) == REG
33999 && GET_MODE_INNER (GET_MODE (XEXP (op, 0))) == GET_MODE (op)
34000 && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
34001 && XVECLEN (parallel, 0) == 1
34002 && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT)
34004 *special = SH_EXTRACT;
34005 return 1;
34007 else
34008 return 0;
34010 case UNSPEC:
34012 /* Various operations are unsafe for this optimization, at least
34013 without significant additional work. Permutes are obviously
34014 problematic, as both the permute control vector and the ordering
34015 of the target values are invalidated by doubleword swapping.
34016 Vector pack and unpack modify the number of vector lanes.
34017 Merge-high/low will not operate correctly on swapped operands.
34018 Vector shifts across element boundaries are clearly uncool,
34019 as are vector select and concatenate operations. Vector
34020 sum-across instructions define one operand with a specific
34021 order-dependent element, so additional fixup code would be
34022 needed to make those work. Vector set and non-immediate-form
34023 vector splat are element-order sensitive. A few of these
34024 cases might be workable with special handling if required. */
34025 int val = XINT (op, 1);
34026 switch (val)
34028 default:
34029 break;
34030 case UNSPEC_VMRGH_DIRECT:
34031 case UNSPEC_VMRGL_DIRECT:
34032 case UNSPEC_VPACK_SIGN_SIGN_SAT:
34033 case UNSPEC_VPACK_SIGN_UNS_SAT:
34034 case UNSPEC_VPACK_UNS_UNS_MOD:
34035 case UNSPEC_VPACK_UNS_UNS_MOD_DIRECT:
34036 case UNSPEC_VPACK_UNS_UNS_SAT:
34037 case UNSPEC_VPERM:
34038 case UNSPEC_VPERM_UNS:
34039 case UNSPEC_VPERMHI:
34040 case UNSPEC_VPERMSI:
34041 case UNSPEC_VPKPX:
34042 case UNSPEC_VSLDOI:
34043 case UNSPEC_VSLO:
34044 case UNSPEC_VSRO:
34045 case UNSPEC_VSUM2SWS:
34046 case UNSPEC_VSUM4S:
34047 case UNSPEC_VSUM4UBS:
34048 case UNSPEC_VSUMSWS:
34049 case UNSPEC_VSUMSWS_DIRECT:
34050 case UNSPEC_VSX_CONCAT:
34051 case UNSPEC_VSX_SET:
34052 case UNSPEC_VSX_SLDWI:
34053 case UNSPEC_VUNPACK_HI_SIGN:
34054 case UNSPEC_VUNPACK_HI_SIGN_DIRECT:
34055 case UNSPEC_VUNPACK_LO_SIGN:
34056 case UNSPEC_VUNPACK_LO_SIGN_DIRECT:
34057 case UNSPEC_VUPKHPX:
34058 case UNSPEC_VUPKHS_V4SF:
34059 case UNSPEC_VUPKHU_V4SF:
34060 case UNSPEC_VUPKLPX:
34061 case UNSPEC_VUPKLS_V4SF:
34062 case UNSPEC_VUPKLU_V4SF:
34063 /* The following could be handled as an idiom with XXSPLTW.
34064 These place a scalar in BE element zero, but the XXSPLTW
34065 will currently expect it in BE element 2 in a swapped
34066 region. When one of these feeds an XXSPLTW with no other
34067 defs/uses either way, we can avoid the lane change for
34068 XXSPLTW and things will be correct. TBD. */
34069 case UNSPEC_VSX_CVDPSPN:
34070 case UNSPEC_VSX_CVSPDP:
34071 case UNSPEC_VSX_CVSPDPN:
34072 return 0;
34073 case UNSPEC_VSPLT_DIRECT:
34074 *special = SH_SPLAT;
34075 return 1;
34079 default:
34080 break;
34083 const char *fmt = GET_RTX_FORMAT (code);
34084 int ok = 1;
34086 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34087 if (fmt[i] == 'e' || fmt[i] == 'u')
34089 unsigned int special_op = SH_NONE;
34090 ok &= rtx_is_swappable_p (XEXP (op, i), &special_op);
34091 /* Ensure we never have two kinds of special handling
34092 for the same insn. */
34093 if (*special != SH_NONE && special_op != SH_NONE
34094 && *special != special_op)
34095 return 0;
34096 *special = special_op;
34098 else if (fmt[i] == 'E')
34099 for (j = 0; j < XVECLEN (op, i); ++j)
34101 unsigned int special_op = SH_NONE;
34102 ok &= rtx_is_swappable_p (XVECEXP (op, i, j), &special_op);
34103 /* Ensure we never have two kinds of special handling
34104 for the same insn. */
34105 if (*special != SH_NONE && special_op != SH_NONE
34106 && *special != special_op)
34107 return 0;
34108 *special = special_op;
34111 return ok;
34114 /* Return 1 iff INSN is an operand that will not be affected by
34115 having vector doublewords swapped in memory (in which case
34116 *SPECIAL is unchanged), or that can be modified to be correct
34117 if vector doublewords are swapped in memory (in which case
34118 *SPECIAL is changed to a value indicating how). */
34119 static unsigned int
34120 insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
34121 unsigned int *special)
34123 /* Calls are always bad. */
34124 if (GET_CODE (insn) == CALL_INSN)
34125 return 0;
34127 /* Loads and stores seen here are not permuting, but we can still
34128 fix them up by converting them to permuting ones. Exceptions:
34129 UNSPEC_LVE, UNSPEC_LVX, and UNSPEC_STVX, which have a PARALLEL
34130 body instead of a SET; and UNSPEC_STVE, which has an UNSPEC
34131 for the SET source. */
34132 rtx body = PATTERN (insn);
34133 int i = INSN_UID (insn);
34135 if (insn_entry[i].is_load)
34137 if (GET_CODE (body) == SET)
34139 *special = SH_NOSWAP_LD;
34140 return 1;
34142 else
34143 return 0;
34146 if (insn_entry[i].is_store)
34148 if (GET_CODE (body) == SET && GET_CODE (SET_SRC (body)) != UNSPEC)
34150 *special = SH_NOSWAP_ST;
34151 return 1;
34153 else
34154 return 0;
34157 /* Otherwise check the operands for vector lane violations. */
34158 return rtx_is_swappable_p (body, special);
34161 enum chain_purpose { FOR_LOADS, FOR_STORES };
34163 /* Return true if the UD or DU chain headed by LINK is non-empty,
34164 and every entry on the chain references an insn that is a
34165 register swap. Furthermore, if PURPOSE is FOR_LOADS, each such
34166 register swap must have only permuting loads as reaching defs.
34167 If PURPOSE is FOR_STORES, each such register swap must have only
34168 register swaps or permuting stores as reached uses. */
34169 static bool
34170 chain_contains_only_swaps (swap_web_entry *insn_entry, struct df_link *link,
34171 enum chain_purpose purpose)
34173 if (!link)
34174 return false;
34176 for (; link; link = link->next)
34178 if (!VECTOR_MODE_P (GET_MODE (DF_REF_REG (link->ref))))
34179 continue;
34181 if (DF_REF_IS_ARTIFICIAL (link->ref))
34182 return false;
34184 rtx reached_insn = DF_REF_INSN (link->ref);
34185 unsigned uid = INSN_UID (reached_insn);
34186 struct df_insn_info *insn_info = DF_INSN_INFO_GET (reached_insn);
34188 if (!insn_entry[uid].is_swap || insn_entry[uid].is_load
34189 || insn_entry[uid].is_store)
34190 return false;
34192 if (purpose == FOR_LOADS)
34194 df_ref use;
34195 FOR_EACH_INSN_INFO_USE (use, insn_info)
34197 struct df_link *swap_link = DF_REF_CHAIN (use);
34199 while (swap_link)
34201 if (DF_REF_IS_ARTIFICIAL (link->ref))
34202 return false;
34204 rtx swap_def_insn = DF_REF_INSN (swap_link->ref);
34205 unsigned uid2 = INSN_UID (swap_def_insn);
34207 /* Only permuting loads are allowed. */
34208 if (!insn_entry[uid2].is_swap || !insn_entry[uid2].is_load)
34209 return false;
34211 swap_link = swap_link->next;
34215 else if (purpose == FOR_STORES)
34217 df_ref def;
34218 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34220 struct df_link *swap_link = DF_REF_CHAIN (def);
34222 while (swap_link)
34224 if (DF_REF_IS_ARTIFICIAL (link->ref))
34225 return false;
34227 rtx swap_use_insn = DF_REF_INSN (swap_link->ref);
34228 unsigned uid2 = INSN_UID (swap_use_insn);
34230 /* Permuting stores or register swaps are allowed. */
34231 if (!insn_entry[uid2].is_swap || insn_entry[uid2].is_load)
34232 return false;
34234 swap_link = swap_link->next;
34240 return true;
34243 /* Mark the xxswapdi instructions associated with permuting loads and
34244 stores for removal. Note that we only flag them for deletion here,
34245 as there is a possibility of a swap being reached from multiple
34246 loads, etc. */
34247 static void
34248 mark_swaps_for_removal (swap_web_entry *insn_entry, unsigned int i)
34250 rtx insn = insn_entry[i].insn;
34251 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34253 if (insn_entry[i].is_load)
34255 df_ref def;
34256 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34258 struct df_link *link = DF_REF_CHAIN (def);
34260 /* We know by now that these are swaps, so we can delete
34261 them confidently. */
34262 while (link)
34264 rtx use_insn = DF_REF_INSN (link->ref);
34265 insn_entry[INSN_UID (use_insn)].will_delete = 1;
34266 link = link->next;
34270 else if (insn_entry[i].is_store)
34272 df_ref use;
34273 FOR_EACH_INSN_INFO_USE (use, insn_info)
34275 /* Ignore uses for addressability. */
34276 machine_mode mode = GET_MODE (DF_REF_REG (use));
34277 if (!VECTOR_MODE_P (mode))
34278 continue;
34280 struct df_link *link = DF_REF_CHAIN (use);
34282 /* We know by now that these are swaps, so we can delete
34283 them confidently. */
34284 while (link)
34286 rtx def_insn = DF_REF_INSN (link->ref);
34287 insn_entry[INSN_UID (def_insn)].will_delete = 1;
34288 link = link->next;
34294 /* OP is either a CONST_VECTOR or an expression containing one.
34295 Swap the first half of the vector with the second in the first
34296 case. Recurse to find it in the second. */
34297 static void
34298 swap_const_vector_halves (rtx op)
34300 int i;
34301 enum rtx_code code = GET_CODE (op);
34302 if (GET_CODE (op) == CONST_VECTOR)
34304 int half_units = GET_MODE_NUNITS (GET_MODE (op)) / 2;
34305 for (i = 0; i < half_units; ++i)
34307 rtx temp = CONST_VECTOR_ELT (op, i);
34308 CONST_VECTOR_ELT (op, i) = CONST_VECTOR_ELT (op, i + half_units);
34309 CONST_VECTOR_ELT (op, i + half_units) = temp;
34312 else
34314 int j;
34315 const char *fmt = GET_RTX_FORMAT (code);
34316 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34317 if (fmt[i] == 'e' || fmt[i] == 'u')
34318 swap_const_vector_halves (XEXP (op, i));
34319 else if (fmt[i] == 'E')
34320 for (j = 0; j < XVECLEN (op, i); ++j)
34321 swap_const_vector_halves (XVECEXP (op, i, j));
34325 /* Find all subregs of a vector expression that perform a narrowing,
34326 and adjust the subreg index to account for doubleword swapping. */
34327 static void
34328 adjust_subreg_index (rtx op)
34330 enum rtx_code code = GET_CODE (op);
34331 if (code == SUBREG
34332 && (GET_MODE_SIZE (GET_MODE (op))
34333 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0)))))
34335 unsigned int index = SUBREG_BYTE (op);
34336 if (index < 8)
34337 index += 8;
34338 else
34339 index -= 8;
34340 SUBREG_BYTE (op) = index;
34343 const char *fmt = GET_RTX_FORMAT (code);
34344 int i,j;
34345 for (i = 0; i < GET_RTX_LENGTH (code); ++i)
34346 if (fmt[i] == 'e' || fmt[i] == 'u')
34347 adjust_subreg_index (XEXP (op, i));
34348 else if (fmt[i] == 'E')
34349 for (j = 0; j < XVECLEN (op, i); ++j)
34350 adjust_subreg_index (XVECEXP (op, i, j));
34353 /* Convert the non-permuting load INSN to a permuting one. */
34354 static void
34355 permute_load (rtx_insn *insn)
34357 rtx body = PATTERN (insn);
34358 rtx mem_op = SET_SRC (body);
34359 rtx tgt_reg = SET_DEST (body);
34360 machine_mode mode = GET_MODE (tgt_reg);
34361 int n_elts = GET_MODE_NUNITS (mode);
34362 int half_elts = n_elts / 2;
34363 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34364 int i, j;
34365 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34366 XVECEXP (par, 0, i) = GEN_INT (j);
34367 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34368 XVECEXP (par, 0, i) = GEN_INT (j);
34369 rtx sel = gen_rtx_VEC_SELECT (mode, mem_op, par);
34370 SET_SRC (body) = sel;
34371 INSN_CODE (insn) = -1; /* Force re-recognition. */
34372 df_insn_rescan (insn);
34374 if (dump_file)
34375 fprintf (dump_file, "Replacing load %d with permuted load\n",
34376 INSN_UID (insn));
34379 /* Convert the non-permuting store INSN to a permuting one. */
34380 static void
34381 permute_store (rtx_insn *insn)
34383 rtx body = PATTERN (insn);
34384 rtx src_reg = SET_SRC (body);
34385 machine_mode mode = GET_MODE (src_reg);
34386 int n_elts = GET_MODE_NUNITS (mode);
34387 int half_elts = n_elts / 2;
34388 rtx par = gen_rtx_PARALLEL (mode, rtvec_alloc (n_elts));
34389 int i, j;
34390 for (i = 0, j = half_elts; i < half_elts; ++i, ++j)
34391 XVECEXP (par, 0, i) = GEN_INT (j);
34392 for (i = half_elts, j = 0; j < half_elts; ++i, ++j)
34393 XVECEXP (par, 0, i) = GEN_INT (j);
34394 rtx sel = gen_rtx_VEC_SELECT (mode, src_reg, par);
34395 SET_SRC (body) = sel;
34396 INSN_CODE (insn) = -1; /* Force re-recognition. */
34397 df_insn_rescan (insn);
34399 if (dump_file)
34400 fprintf (dump_file, "Replacing store %d with permuted store\n",
34401 INSN_UID (insn));
34404 /* Given OP that contains a vector extract operation, adjust the index
34405 of the extracted lane to account for the doubleword swap. */
34406 static void
34407 adjust_extract (rtx_insn *insn)
34409 rtx src = SET_SRC (PATTERN (insn));
34410 /* The vec_select may be wrapped in a vec_duplicate for a splat, so
34411 account for that. */
34412 rtx sel = GET_CODE (src) == VEC_DUPLICATE ? XEXP (src, 0) : src;
34413 rtx par = XEXP (sel, 1);
34414 int half_elts = GET_MODE_NUNITS (GET_MODE (XEXP (sel, 0))) >> 1;
34415 int lane = INTVAL (XVECEXP (par, 0, 0));
34416 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34417 XVECEXP (par, 0, 0) = GEN_INT (lane);
34418 INSN_CODE (insn) = -1; /* Force re-recognition. */
34419 df_insn_rescan (insn);
34421 if (dump_file)
34422 fprintf (dump_file, "Changing lane for extract %d\n", INSN_UID (insn));
34425 /* Given OP that contains a vector direct-splat operation, adjust the index
34426 of the source lane to account for the doubleword swap. */
34427 static void
34428 adjust_splat (rtx_insn *insn)
34430 rtx body = PATTERN (insn);
34431 rtx unspec = XEXP (body, 1);
34432 int half_elts = GET_MODE_NUNITS (GET_MODE (unspec)) >> 1;
34433 int lane = INTVAL (XVECEXP (unspec, 0, 1));
34434 lane = lane >= half_elts ? lane - half_elts : lane + half_elts;
34435 XVECEXP (unspec, 0, 1) = GEN_INT (lane);
34436 INSN_CODE (insn) = -1; /* Force re-recognition. */
34437 df_insn_rescan (insn);
34439 if (dump_file)
34440 fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
34443 /* The insn described by INSN_ENTRY[I] can be swapped, but only
34444 with special handling. Take care of that here. */
34445 static void
34446 handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
34448 rtx_insn *insn = insn_entry[i].insn;
34449 rtx body = PATTERN (insn);
34451 switch (insn_entry[i].special_handling)
34453 default:
34454 gcc_unreachable ();
34455 case SH_CONST_VECTOR:
34457 /* A CONST_VECTOR will only show up somewhere in the RHS of a SET. */
34458 gcc_assert (GET_CODE (body) == SET);
34459 rtx rhs = SET_SRC (body);
34460 swap_const_vector_halves (rhs);
34461 if (dump_file)
34462 fprintf (dump_file, "Swapping constant halves in insn %d\n", i);
34463 break;
34465 case SH_SUBREG:
34466 /* A subreg of the same size is already safe. For subregs that
34467 select a smaller portion of a reg, adjust the index for
34468 swapped doublewords. */
34469 adjust_subreg_index (body);
34470 if (dump_file)
34471 fprintf (dump_file, "Adjusting subreg in insn %d\n", i);
34472 break;
34473 case SH_NOSWAP_LD:
34474 /* Convert a non-permuting load to a permuting one. */
34475 permute_load (insn);
34476 break;
34477 case SH_NOSWAP_ST:
34478 /* Convert a non-permuting store to a permuting one. */
34479 permute_store (insn);
34480 break;
34481 case SH_EXTRACT:
34482 /* Change the lane on an extract operation. */
34483 adjust_extract (insn);
34484 break;
34485 case SH_SPLAT:
34486 /* Change the lane on a direct-splat operation. */
34487 adjust_splat (insn);
34488 break;
34492 /* Find the insn from the Ith table entry, which is known to be a
34493 register swap Y = SWAP(X). Replace it with a copy Y = X. */
34494 static void
34495 replace_swap_with_copy (swap_web_entry *insn_entry, unsigned i)
34497 rtx_insn *insn = insn_entry[i].insn;
34498 rtx body = PATTERN (insn);
34499 rtx src_reg = XEXP (SET_SRC (body), 0);
34500 rtx copy = gen_rtx_SET (VOIDmode, SET_DEST (body), src_reg);
34501 rtx_insn *new_insn = emit_insn_before (copy, insn);
34502 set_block_for_insn (new_insn, BLOCK_FOR_INSN (insn));
34503 df_insn_rescan (new_insn);
34505 if (dump_file)
34507 unsigned int new_uid = INSN_UID (new_insn);
34508 fprintf (dump_file, "Replacing swap %d with copy %d\n", i, new_uid);
34511 df_insn_delete (insn);
34512 remove_insn (insn);
34513 insn->set_deleted ();
34516 /* Dump the swap table to DUMP_FILE. */
34517 static void
34518 dump_swap_insn_table (swap_web_entry *insn_entry)
34520 int e = get_max_uid ();
34521 fprintf (dump_file, "\nRelevant insns with their flag settings\n\n");
34523 for (int i = 0; i < e; ++i)
34524 if (insn_entry[i].is_relevant)
34526 swap_web_entry *pred_entry = (swap_web_entry *)insn_entry[i].pred ();
34527 fprintf (dump_file, "%6d %6d ", i,
34528 pred_entry && pred_entry->insn
34529 ? INSN_UID (pred_entry->insn) : 0);
34530 if (insn_entry[i].is_load)
34531 fputs ("load ", dump_file);
34532 if (insn_entry[i].is_store)
34533 fputs ("store ", dump_file);
34534 if (insn_entry[i].is_swap)
34535 fputs ("swap ", dump_file);
34536 if (insn_entry[i].is_live_in)
34537 fputs ("live-in ", dump_file);
34538 if (insn_entry[i].is_live_out)
34539 fputs ("live-out ", dump_file);
34540 if (insn_entry[i].contains_subreg)
34541 fputs ("subreg ", dump_file);
34542 if (insn_entry[i].is_128_int)
34543 fputs ("int128 ", dump_file);
34544 if (insn_entry[i].is_call)
34545 fputs ("call ", dump_file);
34546 if (insn_entry[i].is_swappable)
34548 fputs ("swappable ", dump_file);
34549 if (insn_entry[i].special_handling == SH_CONST_VECTOR)
34550 fputs ("special:constvec ", dump_file);
34551 else if (insn_entry[i].special_handling == SH_SUBREG)
34552 fputs ("special:subreg ", dump_file);
34553 else if (insn_entry[i].special_handling == SH_NOSWAP_LD)
34554 fputs ("special:load ", dump_file);
34555 else if (insn_entry[i].special_handling == SH_NOSWAP_ST)
34556 fputs ("special:store ", dump_file);
34557 else if (insn_entry[i].special_handling == SH_EXTRACT)
34558 fputs ("special:extract ", dump_file);
34559 else if (insn_entry[i].special_handling == SH_SPLAT)
34560 fputs ("special:splat ", dump_file);
34562 if (insn_entry[i].web_not_optimizable)
34563 fputs ("unoptimizable ", dump_file);
34564 if (insn_entry[i].will_delete)
34565 fputs ("delete ", dump_file);
34566 fputs ("\n", dump_file);
34568 fputs ("\n", dump_file);
34571 /* Main entry point for this pass. */
34572 unsigned int
34573 rs6000_analyze_swaps (function *fun)
34575 swap_web_entry *insn_entry;
34576 basic_block bb;
34577 rtx_insn *insn;
34579 /* Dataflow analysis for use-def chains. */
34580 df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
34581 df_chain_add_problem (DF_DU_CHAIN | DF_UD_CHAIN);
34582 df_analyze ();
34583 df_set_flags (DF_DEFER_INSN_RESCAN);
34585 /* Allocate structure to represent webs of insns. */
34586 insn_entry = XCNEWVEC (swap_web_entry, get_max_uid ());
34588 /* Walk the insns to gather basic data. */
34589 FOR_ALL_BB_FN (bb, fun)
34590 FOR_BB_INSNS (bb, insn)
34592 unsigned int uid = INSN_UID (insn);
34593 if (NONDEBUG_INSN_P (insn))
34595 insn_entry[uid].insn = insn;
34597 if (GET_CODE (insn) == CALL_INSN)
34598 insn_entry[uid].is_call = 1;
34600 /* Walk the uses and defs to see if we mention vector regs.
34601 Record any constraints on optimization of such mentions. */
34602 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34603 df_ref mention;
34604 FOR_EACH_INSN_INFO_USE (mention, insn_info)
34606 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34607 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34609 /* If a use gets its value from a call insn, it will be
34610 a hard register and will look like (reg:V4SI 3 3).
34611 The df analysis creates two mentions for GPR3 and GPR4,
34612 both DImode. We must recognize this and treat it as a
34613 vector mention to ensure the call is unioned with this
34614 use. */
34615 if (mode == DImode && DF_REF_INSN_INFO (mention))
34617 rtx feeder = DF_REF_INSN (mention);
34618 /* FIXME: It is pretty hard to get from the df mention
34619 to the mode of the use in the insn. We arbitrarily
34620 pick a vector mode here, even though the use might
34621 be a real DImode. We can be too conservative
34622 (create a web larger than necessary) because of
34623 this, so consider eventually fixing this. */
34624 if (GET_CODE (feeder) == CALL_INSN)
34625 mode = V4SImode;
34628 if (VECTOR_MODE_P (mode))
34630 insn_entry[uid].is_relevant = 1;
34631 if (mode == TImode || mode == V1TImode)
34632 insn_entry[uid].is_128_int = 1;
34633 if (DF_REF_INSN_INFO (mention))
34634 insn_entry[uid].contains_subreg
34635 = !rtx_equal_p (DF_REF_REG (mention),
34636 DF_REF_REAL_REG (mention));
34637 union_defs (insn_entry, insn, mention);
34640 FOR_EACH_INSN_INFO_DEF (mention, insn_info)
34642 /* We use DF_REF_REAL_REG here to get inside any subregs. */
34643 machine_mode mode = GET_MODE (DF_REF_REAL_REG (mention));
34645 /* If we're loading up a hard vector register for a call,
34646 it looks like (set (reg:V4SI 9 9) (...)). The df
34647 analysis creates two mentions for GPR9 and GPR10, both
34648 DImode. So relying on the mode from the mentions
34649 isn't sufficient to ensure we union the call into the
34650 web with the parameter setup code. */
34651 if (mode == DImode && GET_CODE (insn) == SET
34652 && VECTOR_MODE_P (GET_MODE (SET_DEST (insn))))
34653 mode = GET_MODE (SET_DEST (insn));
34655 if (VECTOR_MODE_P (mode))
34657 insn_entry[uid].is_relevant = 1;
34658 if (mode == TImode || mode == V1TImode)
34659 insn_entry[uid].is_128_int = 1;
34660 if (DF_REF_INSN_INFO (mention))
34661 insn_entry[uid].contains_subreg
34662 = !rtx_equal_p (DF_REF_REG (mention),
34663 DF_REF_REAL_REG (mention));
34664 /* REG_FUNCTION_VALUE_P is not valid for subregs. */
34665 else if (REG_FUNCTION_VALUE_P (DF_REF_REG (mention)))
34666 insn_entry[uid].is_live_out = 1;
34667 union_uses (insn_entry, insn, mention);
34671 if (insn_entry[uid].is_relevant)
34673 /* Determine if this is a load or store. */
34674 insn_entry[uid].is_load = insn_is_load_p (insn);
34675 insn_entry[uid].is_store = insn_is_store_p (insn);
34677 /* Determine if this is a doubleword swap. If not,
34678 determine whether it can legally be swapped. */
34679 if (insn_is_swap_p (insn))
34680 insn_entry[uid].is_swap = 1;
34681 else
34683 unsigned int special = SH_NONE;
34684 insn_entry[uid].is_swappable
34685 = insn_is_swappable_p (insn_entry, insn, &special);
34686 if (special != SH_NONE && insn_entry[uid].contains_subreg)
34687 insn_entry[uid].is_swappable = 0;
34688 else if (special != SH_NONE)
34689 insn_entry[uid].special_handling = special;
34690 else if (insn_entry[uid].contains_subreg)
34691 insn_entry[uid].special_handling = SH_SUBREG;
34697 if (dump_file)
34699 fprintf (dump_file, "\nSwap insn entry table when first built\n");
34700 dump_swap_insn_table (insn_entry);
34703 /* Record unoptimizable webs. */
34704 unsigned e = get_max_uid (), i;
34705 for (i = 0; i < e; ++i)
34707 if (!insn_entry[i].is_relevant)
34708 continue;
34710 swap_web_entry *root
34711 = (swap_web_entry*)(&insn_entry[i])->unionfind_root ();
34713 if (insn_entry[i].is_live_in || insn_entry[i].is_live_out
34714 || (insn_entry[i].contains_subreg
34715 && insn_entry[i].special_handling != SH_SUBREG)
34716 || insn_entry[i].is_128_int || insn_entry[i].is_call
34717 || !(insn_entry[i].is_swappable || insn_entry[i].is_swap))
34718 root->web_not_optimizable = 1;
34720 /* If we have loads or stores that aren't permuting then the
34721 optimization isn't appropriate. */
34722 else if ((insn_entry[i].is_load || insn_entry[i].is_store)
34723 && !insn_entry[i].is_swap && !insn_entry[i].is_swappable)
34724 root->web_not_optimizable = 1;
34726 /* If we have permuting loads or stores that are not accompanied
34727 by a register swap, the optimization isn't appropriate. */
34728 else if (insn_entry[i].is_load && insn_entry[i].is_swap)
34730 rtx insn = insn_entry[i].insn;
34731 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34732 df_ref def;
34734 FOR_EACH_INSN_INFO_DEF (def, insn_info)
34736 struct df_link *link = DF_REF_CHAIN (def);
34738 if (!chain_contains_only_swaps (insn_entry, link, FOR_LOADS))
34740 root->web_not_optimizable = 1;
34741 break;
34745 else if (insn_entry[i].is_store && insn_entry[i].is_swap)
34747 rtx insn = insn_entry[i].insn;
34748 struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
34749 df_ref use;
34751 FOR_EACH_INSN_INFO_USE (use, insn_info)
34753 struct df_link *link = DF_REF_CHAIN (use);
34755 if (!chain_contains_only_swaps (insn_entry, link, FOR_STORES))
34757 root->web_not_optimizable = 1;
34758 break;
34764 if (dump_file)
34766 fprintf (dump_file, "\nSwap insn entry table after web analysis\n");
34767 dump_swap_insn_table (insn_entry);
34770 /* For each load and store in an optimizable web (which implies
34771 the loads and stores are permuting), find the associated
34772 register swaps and mark them for removal. Due to various
34773 optimizations we may mark the same swap more than once. Also
34774 perform special handling for swappable insns that require it. */
34775 for (i = 0; i < e; ++i)
34776 if ((insn_entry[i].is_load || insn_entry[i].is_store)
34777 && insn_entry[i].is_swap)
34779 swap_web_entry* root_entry
34780 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34781 if (!root_entry->web_not_optimizable)
34782 mark_swaps_for_removal (insn_entry, i);
34784 else if (insn_entry[i].is_swappable && insn_entry[i].special_handling)
34786 swap_web_entry* root_entry
34787 = (swap_web_entry*)((&insn_entry[i])->unionfind_root ());
34788 if (!root_entry->web_not_optimizable)
34789 handle_special_swappables (insn_entry, i);
34792 /* Now delete the swaps marked for removal. */
34793 for (i = 0; i < e; ++i)
34794 if (insn_entry[i].will_delete)
34795 replace_swap_with_copy (insn_entry, i);
34797 /* Clean up. */
34798 free (insn_entry);
34799 return 0;
34802 const pass_data pass_data_analyze_swaps =
34804 RTL_PASS, /* type */
34805 "swaps", /* name */
34806 OPTGROUP_NONE, /* optinfo_flags */
34807 TV_NONE, /* tv_id */
34808 0, /* properties_required */
34809 0, /* properties_provided */
34810 0, /* properties_destroyed */
34811 0, /* todo_flags_start */
34812 TODO_df_finish, /* todo_flags_finish */
34815 class pass_analyze_swaps : public rtl_opt_pass
34817 public:
34818 pass_analyze_swaps(gcc::context *ctxt)
34819 : rtl_opt_pass(pass_data_analyze_swaps, ctxt)
34822 /* opt_pass methods: */
34823 virtual bool gate (function *)
34825 return (optimize > 0 && !BYTES_BIG_ENDIAN && TARGET_VSX
34826 && rs6000_optimize_swaps);
34829 virtual unsigned int execute (function *fun)
34831 return rs6000_analyze_swaps (fun);
34834 }; // class pass_analyze_swaps
34836 rtl_opt_pass *
34837 make_pass_analyze_swaps (gcc::context *ctxt)
34839 return new pass_analyze_swaps (ctxt);
34842 #ifdef RS6000_GLIBC_ATOMIC_FENV
34843 /* Function declarations for rs6000_atomic_assign_expand_fenv. */
34844 static tree atomic_hold_decl, atomic_clear_decl, atomic_update_decl;
34845 #endif
34847 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
34849 static void
34850 rs6000_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
34852 if (!TARGET_HARD_FLOAT || !TARGET_FPRS)
34854 #ifdef RS6000_GLIBC_ATOMIC_FENV
34855 if (atomic_hold_decl == NULL_TREE)
34857 atomic_hold_decl
34858 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34859 get_identifier ("__atomic_feholdexcept"),
34860 build_function_type_list (void_type_node,
34861 double_ptr_type_node,
34862 NULL_TREE));
34863 TREE_PUBLIC (atomic_hold_decl) = 1;
34864 DECL_EXTERNAL (atomic_hold_decl) = 1;
34867 if (atomic_clear_decl == NULL_TREE)
34869 atomic_clear_decl
34870 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34871 get_identifier ("__atomic_feclearexcept"),
34872 build_function_type_list (void_type_node,
34873 NULL_TREE));
34874 TREE_PUBLIC (atomic_clear_decl) = 1;
34875 DECL_EXTERNAL (atomic_clear_decl) = 1;
34878 tree const_double = build_qualified_type (double_type_node,
34879 TYPE_QUAL_CONST);
34880 tree const_double_ptr = build_pointer_type (const_double);
34881 if (atomic_update_decl == NULL_TREE)
34883 atomic_update_decl
34884 = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
34885 get_identifier ("__atomic_feupdateenv"),
34886 build_function_type_list (void_type_node,
34887 const_double_ptr,
34888 NULL_TREE));
34889 TREE_PUBLIC (atomic_update_decl) = 1;
34890 DECL_EXTERNAL (atomic_update_decl) = 1;
34893 tree fenv_var = create_tmp_var (double_type_node, NULL);
34894 mark_addressable (fenv_var);
34895 tree fenv_addr = build1 (ADDR_EXPR, double_ptr_type_node, fenv_var);
34897 *hold = build_call_expr (atomic_hold_decl, 1, fenv_addr);
34898 *clear = build_call_expr (atomic_clear_decl, 0);
34899 *update = build_call_expr (atomic_update_decl, 1,
34900 fold_convert (const_double_ptr, fenv_addr));
34901 #endif
34902 return;
34905 tree mffs = rs6000_builtin_decls[RS6000_BUILTIN_MFFS];
34906 tree mtfsf = rs6000_builtin_decls[RS6000_BUILTIN_MTFSF];
34907 tree call_mffs = build_call_expr (mffs, 0);
34909 /* Generates the equivalent of feholdexcept (&fenv_var)
34911 *fenv_var = __builtin_mffs ();
34912 double fenv_hold;
34913 *(uint64_t*)&fenv_hold = *(uint64_t*)fenv_var & 0xffffffff00000007LL;
34914 __builtin_mtfsf (0xff, fenv_hold); */
34916 /* Mask to clear everything except for the rounding modes and non-IEEE
34917 arithmetic flag. */
34918 const unsigned HOST_WIDE_INT hold_exception_mask =
34919 HOST_WIDE_INT_C (0xffffffff00000007);
34921 tree fenv_var = create_tmp_var (double_type_node, NULL);
34923 tree hold_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_var, call_mffs);
34925 tree fenv_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_var);
34926 tree fenv_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34927 build_int_cst (uint64_type_node,
34928 hold_exception_mask));
34930 tree fenv_hold_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34931 fenv_llu_and);
34933 tree hold_mtfsf = build_call_expr (mtfsf, 2,
34934 build_int_cst (unsigned_type_node, 0xff),
34935 fenv_hold_mtfsf);
34937 *hold = build2 (COMPOUND_EXPR, void_type_node, hold_mffs, hold_mtfsf);
34939 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT):
34941 double fenv_clear = __builtin_mffs ();
34942 *(uint64_t)&fenv_clear &= 0xffffffff00000000LL;
34943 __builtin_mtfsf (0xff, fenv_clear); */
34945 /* Mask to clear everything except for the rounding modes and non-IEEE
34946 arithmetic flag. */
34947 const unsigned HOST_WIDE_INT clear_exception_mask =
34948 HOST_WIDE_INT_C (0xffffffff00000000);
34950 tree fenv_clear = create_tmp_var (double_type_node, NULL);
34952 tree clear_mffs = build2 (MODIFY_EXPR, void_type_node, fenv_clear, call_mffs);
34954 tree fenv_clean_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, fenv_clear);
34955 tree fenv_clear_llu_and = build2 (BIT_AND_EXPR, uint64_type_node,
34956 fenv_clean_llu,
34957 build_int_cst (uint64_type_node,
34958 clear_exception_mask));
34960 tree fenv_clear_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34961 fenv_clear_llu_and);
34963 tree clear_mtfsf = build_call_expr (mtfsf, 2,
34964 build_int_cst (unsigned_type_node, 0xff),
34965 fenv_clear_mtfsf);
34967 *clear = build2 (COMPOUND_EXPR, void_type_node, clear_mffs, clear_mtfsf);
34969 /* Generates the equivalent of feupdateenv (&fenv_var)
34971 double old_fenv = __builtin_mffs ();
34972 double fenv_update;
34973 *(uint64_t*)&fenv_update = (*(uint64_t*)&old & 0xffffffff1fffff00LL) |
34974 (*(uint64_t*)fenv_var 0x1ff80fff);
34975 __builtin_mtfsf (0xff, fenv_update); */
34977 const unsigned HOST_WIDE_INT update_exception_mask =
34978 HOST_WIDE_INT_C (0xffffffff1fffff00);
34979 const unsigned HOST_WIDE_INT new_exception_mask =
34980 HOST_WIDE_INT_C (0x1ff80fff);
34982 tree old_fenv = create_tmp_var (double_type_node, NULL);
34983 tree update_mffs = build2 (MODIFY_EXPR, void_type_node, old_fenv, call_mffs);
34985 tree old_llu = build1 (VIEW_CONVERT_EXPR, uint64_type_node, old_fenv);
34986 tree old_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, old_llu,
34987 build_int_cst (uint64_type_node,
34988 update_exception_mask));
34990 tree new_llu_and = build2 (BIT_AND_EXPR, uint64_type_node, fenv_llu,
34991 build_int_cst (uint64_type_node,
34992 new_exception_mask));
34994 tree new_llu_mask = build2 (BIT_IOR_EXPR, uint64_type_node,
34995 old_llu_and, new_llu_and);
34997 tree fenv_update_mtfsf = build1 (VIEW_CONVERT_EXPR, double_type_node,
34998 new_llu_mask);
35000 tree update_mtfsf = build_call_expr (mtfsf, 2,
35001 build_int_cst (unsigned_type_node, 0xff),
35002 fenv_update_mtfsf);
35004 *update = build2 (COMPOUND_EXPR, void_type_node, update_mffs, update_mtfsf);
35008 struct gcc_target targetm = TARGET_INITIALIZER;
35010 #include "gt-rs6000.h"